|
@@ -276,9 +276,9 @@ def detect_latest_month(driver, url):
|
|
|
continue
|
|
|
return f"{check_year}年-{check_month}月"
|
|
|
except:
|
|
|
- log.info(f"未找到 {target_title}")
|
|
|
+ log.error(f"未找到 {target_title}")
|
|
|
continue
|
|
|
- log.info("三个月内未找到有效数据")
|
|
|
+ log.error("三个月内未找到有效数据")
|
|
|
return None
|
|
|
|
|
|
def crawl_with_selenium(url, mark):
|
|
@@ -386,15 +386,18 @@ def main():
|
|
|
parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
+ start_time = time.time()
|
|
|
if args.year == 2023:
|
|
|
log.info("正在全量爬取浙江省海关数据")
|
|
|
crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
|
|
|
- send_dingtalk_message('浙江省海关全量数据爬取完成')
|
|
|
+ duration = time.time() - start_time
|
|
|
+ send_dingtalk_message(f'浙江省海关全量数据爬取完成,耗时 {duration:.2f} 秒')
|
|
|
else:
|
|
|
log.info("正在增量爬取浙江省海关数据")
|
|
|
res = crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
|
|
|
if res == 'finish':
|
|
|
- send_dingtalk_message('浙江省海关增量数据爬取完成')
|
|
|
+ duration = time.time() - start_time
|
|
|
+ send_dingtalk_message(f'浙江省海关增量数据爬取完成,耗时 {duration:.2f} 秒')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|