| 
					
				 | 
			
			
				@@ -7,7 +7,6 @@ import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import rarfile 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import shutil 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from pathlib import Path 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from datetime import datetime, timedelta 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from faker import Faker 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -23,7 +22,7 @@ from crossborder.jiangsu import gov_commodity_jiangsu_city 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.jiangsu import gov_commodity_jiangsu_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from crossborder.utils.base_country_code import get_last_month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils.base_country_code import extract_year_month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils.dingtalk import send_dingtalk_message 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils.log import  get_logger 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -222,8 +221,7 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         res = detect_latest_month(driver, url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if res is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             log.info("江苏省海关没有最新数据更新") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            # sys.exit(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         year_month = res 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(f"检测到最新有效数据:{year_month}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -276,7 +274,7 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 等待5s后执行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         all_records = base_mysql.get_hs_all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        hierarchical_traversal(download_dir, all_records) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        hierarchical_traversal(download_dir, all_records, year_month) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         log.info("江苏省海关类章、国家、城市所有文件处理完成!") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         base_mysql.update_shandong_yoy('江苏省') 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -307,7 +305,7 @@ def wait_for_download_complete(timeout=30, existing_files=None): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         time.sleep(2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     raise TimeoutError("未找到 .rar 文件或超时") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def hierarchical_traversal(root_path, all_records): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def hierarchical_traversal(root_path, all_records, year_month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """分层遍历:省份->年份->月目录""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     root = Path(root_path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 获取所有年份目录 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -333,9 +331,16 @@ def hierarchical_traversal(root_path, all_records): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if month_dirs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for md in sorted(month_dirs, key=lambda x: x["month"], reverse=True): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 log.info(f"  月份:{md['month']:02d} | 路径:{md['path']}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                gov_commodity_jiangsu_import_export.process_folder(md['path'], all_records) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                gov_commodity_jiangsu_country.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                gov_commodity_jiangsu_city.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                path = md['path'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if year_month is not None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    year, month = extract_year_month(year_month) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    parts = path.parts 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if year_dir.name != year or parts[-1] != month: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        log.info(f"江苏省海关已处理 {year_month} 数据,返回") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                gov_commodity_jiangsu_import_export.process_folder(path, all_records) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                gov_commodity_jiangsu_country.process_folder(path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                gov_commodity_jiangsu_city.process_folder(path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 |