|
@@ -312,14 +312,14 @@ def crawl_with_selenium(url, mark):
|
|
|
|
|
|
finally:
|
|
finally:
|
|
driver.quit()
|
|
driver.quit()
|
|
- log.info(f"浙江杭州海关全量数据下载任务完成")
|
|
|
|
|
|
+ log.info(f"浙江省海关全量数据下载任务完成")
|
|
# 等待5s后执行
|
|
# 等待5s后执行
|
|
time.sleep(5)
|
|
time.sleep(5)
|
|
hierarchical_traversal(download_dir)
|
|
hierarchical_traversal(download_dir)
|
|
- log.info("浙江杭州海关类章、国家、城市所有文件处理完成!")
|
|
|
|
|
|
+ log.info("浙江省海关类章、国家、城市所有文件处理完成!")
|
|
time.sleep(5)
|
|
time.sleep(5)
|
|
base_mysql.update_shandong_yoy('浙江省')
|
|
base_mysql.update_shandong_yoy('浙江省')
|
|
- log.info("浙江杭州海关城市同比sql处理完成")
|
|
|
|
|
|
+ log.info("浙江省海关城市同比sql处理完成")
|
|
return 'finish'
|
|
return 'finish'
|
|
|
|
|
|
def wait_for_download_complete(timeout=30, existing_files=None):
|
|
def wait_for_download_complete(timeout=30, existing_files=None):
|
|
@@ -387,14 +387,14 @@ def main():
|
|
args = parser.parse_args()
|
|
args = parser.parse_args()
|
|
|
|
|
|
if args.year == 2023:
|
|
if args.year == 2023:
|
|
- log.info("正在全量爬取浙江海关数据")
|
|
|
|
|
|
+ log.info("正在全量爬取浙江省海关数据")
|
|
crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
|
|
crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
|
|
- send_dingtalk_message('浙江海关全量数据爬取完成')
|
|
|
|
|
|
+ send_dingtalk_message('浙江省海关全量数据爬取完成')
|
|
else:
|
|
else:
|
|
- log.info("正在增量爬取浙江海关数据")
|
|
|
|
|
|
+ log.info("正在增量爬取浙江省海关数据")
|
|
res = crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
|
|
res = crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
|
|
if res == 'finish':
|
|
if res == 'finish':
|
|
- send_dingtalk_message('浙江海关增量数据爬取完成')
|
|
|
|
|
|
+ send_dingtalk_message('浙江省海关增量数据爬取完成')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|
|
main()
|
|
main()
|