| 
														
															@@ -7,7 +7,6 @@ import time 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 import rarfile 
														 | 
														
														 | 
														
															 import rarfile 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 import shutil 
														 | 
														
														 | 
														
															 import shutil 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from pathlib import Path 
														 | 
														
														 | 
														
															 from pathlib import Path 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-import sys 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from datetime import datetime, timedelta 
														 | 
														
														 | 
														
															 from datetime import datetime, timedelta 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from faker import Faker 
														 | 
														
														 | 
														
															 from faker import Faker 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -23,7 +22,7 @@ from crossborder.jiangsu import gov_commodity_jiangsu_city 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.jiangsu import gov_commodity_jiangsu_import_export 
														 | 
														
														 | 
														
															 from crossborder.jiangsu import gov_commodity_jiangsu_import_export 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.utils import base_country_code, base_mysql 
														 | 
														
														 | 
														
															 from crossborder.utils import base_country_code, base_mysql 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-from crossborder.utils.base_country_code import get_last_month 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from crossborder.utils.base_country_code import extract_year_month 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.utils.dingtalk import send_dingtalk_message 
														 | 
														
														 | 
														
															 from crossborder.utils.dingtalk import send_dingtalk_message 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.utils.log import  get_logger 
														 | 
														
														 | 
														
															 from crossborder.utils.log import  get_logger 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -222,8 +221,7 @@ def crawl_with_selenium(url, mark): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         res = detect_latest_month(driver, url) 
														 | 
														
														 | 
														
															         res = detect_latest_month(driver, url) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         if res is None: 
														 | 
														
														 | 
														
															         if res is None: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             log.info("江苏省海关没有最新数据更新") 
														 | 
														
														 | 
														
															             log.info("江苏省海关没有最新数据更新") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-            # sys.exit(0) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-            return 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            return None 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         year_month = res 
														 | 
														
														 | 
														
															         year_month = res 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         print(f"检测到最新有效数据:{year_month}") 
														 | 
														
														 | 
														
															         print(f"检测到最新有效数据:{year_month}") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -276,7 +274,7 @@ def crawl_with_selenium(url, mark): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         # 等待5s后执行 
														 | 
														
														 | 
														
															         # 等待5s后执行 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         time.sleep(5) 
														 | 
														
														 | 
														
															         time.sleep(5) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         all_records = base_mysql.get_hs_all() 
														 | 
														
														 | 
														
															         all_records = base_mysql.get_hs_all() 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        hierarchical_traversal(download_dir, all_records) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        hierarchical_traversal(download_dir, all_records, year_month) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         log.info("江苏省海关类章、国家、城市所有文件处理完成!") 
														 | 
														
														 | 
														
															         log.info("江苏省海关类章、国家、城市所有文件处理完成!") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         time.sleep(5) 
														 | 
														
														 | 
														
															         time.sleep(5) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         base_mysql.update_shandong_yoy('江苏省') 
														 | 
														
														 | 
														
															         base_mysql.update_shandong_yoy('江苏省') 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -307,7 +305,7 @@ def wait_for_download_complete(timeout=30, existing_files=None): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         time.sleep(2) 
														 | 
														
														 | 
														
															         time.sleep(2) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     raise TimeoutError("未找到 .rar 文件或超时") 
														 | 
														
														 | 
														
															     raise TimeoutError("未找到 .rar 文件或超时") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															-def hierarchical_traversal(root_path, all_records): 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+def hierarchical_traversal(root_path, all_records, year_month): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     """分层遍历:省份->年份->月目录""" 
														 | 
														
														 | 
														
															     """分层遍历:省份->年份->月目录""" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     root = Path(root_path) 
														 | 
														
														 | 
														
															     root = Path(root_path) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     # 获取所有年份目录 
														 | 
														
														 | 
														
															     # 获取所有年份目录 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -333,9 +331,16 @@ def hierarchical_traversal(root_path, all_records): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         if month_dirs: 
														 | 
														
														 | 
														
															         if month_dirs: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             for md in sorted(month_dirs, key=lambda x: x["month"], reverse=True): 
														 | 
														
														 | 
														
															             for md in sorted(month_dirs, key=lambda x: x["month"], reverse=True): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 log.info(f"  月份:{md['month']:02d} | 路径:{md['path']}") 
														 | 
														
														 | 
														
															                 log.info(f"  月份:{md['month']:02d} | 路径:{md['path']}") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-                gov_commodity_jiangsu_import_export.process_folder(md['path'], all_records) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-                gov_commodity_jiangsu_country.process_folder(md['path']) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-                gov_commodity_jiangsu_city.process_folder(md['path']) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                path = md['path'] 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                if year_month is not None: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                    year, month = extract_year_month(year_month) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                    parts = path.parts 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                    if year_dir.name != year or parts[-1] != month: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                        log.info(f"江苏省海关已处理 {year_month} 数据,返回") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                        return 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                gov_commodity_jiangsu_import_export.process_folder(path, all_records) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                gov_commodity_jiangsu_country.process_folder(path) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                gov_commodity_jiangsu_city.process_folder(path) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 def main(): 
														 | 
														
														 | 
														
															 def main(): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     try: 
														 | 
														
														 | 
														
															     try: 
														 |