|
@@ -17,6 +17,7 @@ from crossborder.anhui import gov_commodity_anhui_city, download_dir
|
|
|
from crossborder.anhui import gov_commodity_anhui_country
|
|
|
from crossborder.anhui import gov_commodity_anhui_import_export
|
|
|
from crossborder.utils import base_country_code, base_mysql
|
|
|
+from crossborder.utils.dingtalk import send_dingtalk_message
|
|
|
from crossborder.utils.log import log
|
|
|
|
|
|
def configure_stealth_options():
|
|
@@ -199,7 +200,6 @@ def crawl_with_selenium(url, mark):
|
|
|
res = detect_latest_month(driver, url)
|
|
|
if res is None:
|
|
|
log.info("安徽省海关没有最新数据更新")
|
|
|
- # sys.exit(0)
|
|
|
return None
|
|
|
year_month = res
|
|
|
print(f"检测到最新有效数据:{year_month}")
|
|
@@ -257,6 +257,7 @@ def crawl_with_selenium(url, mark):
|
|
|
time.sleep(5)
|
|
|
base_mysql.update_shandong_yoy('安徽省')
|
|
|
print("安徽合肥海关城市同比sql处理完成")
|
|
|
+ return 'finish'
|
|
|
|
|
|
|
|
|
def wait_for_download_complete(timeout=30, existing_files=None):
|
|
@@ -323,11 +324,15 @@ def main():
|
|
|
parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
- print(f'anhui args: {args}')
|
|
|
if args.year == 2023:
|
|
|
+ log.info("正在全量爬取安徽海关数据")
|
|
|
crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','all')
|
|
|
+ send_dingtalk_message('安徽海关全量数据爬取完成')
|
|
|
else:
|
|
|
- crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')
|
|
|
+ log.info("正在增量爬取安徽海关数据")
|
|
|
+ res = crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')
|
|
|
+ if res == 'finish':
|
|
|
+ send_dingtalk_message('安徽海关增量数据爬取完成')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
main()
|