|
@@ -7,7 +7,6 @@ import time
|
|
|
import rarfile
|
|
|
import shutil
|
|
|
from pathlib import Path
|
|
|
-import sys
|
|
|
from datetime import datetime, timedelta
|
|
|
|
|
|
from faker import Faker
|
|
@@ -23,7 +22,7 @@ from crossborder.jiangsu import gov_commodity_jiangsu_city
|
|
|
from crossborder.jiangsu import gov_commodity_jiangsu_import_export
|
|
|
|
|
|
from crossborder.utils import base_country_code, base_mysql
|
|
|
-from crossborder.utils.base_country_code import get_last_month
|
|
|
+from crossborder.utils.base_country_code import extract_year_month
|
|
|
from crossborder.utils.dingtalk import send_dingtalk_message
|
|
|
from crossborder.utils.log import get_logger
|
|
|
|
|
@@ -222,8 +221,7 @@ def crawl_with_selenium(url, mark):
|
|
|
res = detect_latest_month(driver, url)
|
|
|
if res is None:
|
|
|
log.info("江苏省海关没有最新数据更新")
|
|
|
- # sys.exit(0)
|
|
|
- return
|
|
|
+ return None
|
|
|
year_month = res
|
|
|
print(f"检测到最新有效数据:{year_month}")
|
|
|
|
|
@@ -276,7 +274,7 @@ def crawl_with_selenium(url, mark):
|
|
|
# 等待5s后执行
|
|
|
time.sleep(5)
|
|
|
all_records = base_mysql.get_hs_all()
|
|
|
- hierarchical_traversal(download_dir, all_records)
|
|
|
+ hierarchical_traversal(download_dir, all_records, year_month)
|
|
|
log.info("江苏省海关类章、国家、城市所有文件处理完成!")
|
|
|
time.sleep(5)
|
|
|
base_mysql.update_shandong_yoy('江苏省')
|
|
@@ -307,7 +305,7 @@ def wait_for_download_complete(timeout=30, existing_files=None):
|
|
|
time.sleep(2)
|
|
|
raise TimeoutError("未找到 .rar 文件或超时")
|
|
|
|
|
|
-def hierarchical_traversal(root_path, all_records):
|
|
|
+def hierarchical_traversal(root_path, all_records, year_month):
|
|
|
"""分层遍历:省份->年份->月目录"""
|
|
|
root = Path(root_path)
|
|
|
# 获取所有年份目录
|
|
@@ -333,9 +331,16 @@ def hierarchical_traversal(root_path, all_records):
|
|
|
if month_dirs:
|
|
|
for md in sorted(month_dirs, key=lambda x: x["month"], reverse=True):
|
|
|
log.info(f" 月份:{md['month']:02d} | 路径:{md['path']}")
|
|
|
- gov_commodity_jiangsu_import_export.process_folder(md['path'], all_records)
|
|
|
- gov_commodity_jiangsu_country.process_folder(md['path'])
|
|
|
- gov_commodity_jiangsu_city.process_folder(md['path'])
|
|
|
+ path = md['path']
|
|
|
+ if year_month is not None:
|
|
|
+ year, month = extract_year_month(year_month)
|
|
|
+ parts = path.parts
|
|
|
+ if year_dir.name != year or parts[-1] != month:
|
|
|
+ log.info(f"江苏省海关已处理 {year_month} 数据,返回")
|
|
|
+ return
|
|
|
+ gov_commodity_jiangsu_import_export.process_folder(path, all_records)
|
|
|
+ gov_commodity_jiangsu_country.process_folder(path)
|
|
|
+ gov_commodity_jiangsu_city.process_folder(path)
|
|
|
|
|
|
def main():
|
|
|
try:
|