| 
					
				 | 
			
			
				@@ -15,17 +15,17 @@ from selenium.webdriver import FirefoxOptions 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.common.by import By 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.support import expected_conditions as EC 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.support.ui import WebDriverWait 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import gov_commodity_jiangsu_country 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import gov_commodity_jiangsu_city 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import gov_commodity_jiangsu_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from jiangsu import download_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from jiangsu import gov_commodity_jiangsu_country 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from jiangsu import gov_commodity_jiangsu_city 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from jiangsu import gov_commodity_jiangsu_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from utils.log import log 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 rarfile.UNRAR_EXECUTABLE = "unrar" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-download_dir = base_country_code.download_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-Path(download_dir).mkdir(parents=True, exist_ok=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def configure_stealth_options(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """增强型反检测配置[1,4](@ref)""" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -217,7 +217,8 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         res = detect_latest_month(driver, url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if res is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             log.info("江苏省海关没有最新数据更新") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            sys.exit(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # sys.exit(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         year_month = res 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         print(f"检测到最新有效数据:{year_month}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -266,6 +267,16 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     finally: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.quit() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"江苏南京海关全量数据下载任务完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 等待5s后执行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        all_records = base_mysql.get_hs_all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        hierarchical_traversal(download_dir, all_records) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info("江苏南京海关类章、国家、城市所有文件处理完成!") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        base_mysql.update_january_yoy('江苏省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        base_mysql.update_shandong_yoy('江苏省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info("江苏南京海关城市同比sql处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def wait_for_download_complete(timeout=30, existing_files=None): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -321,17 +332,19 @@ def hierarchical_traversal(root_path, all_records): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 gov_commodity_jiangsu_country.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 gov_commodity_jiangsu_city.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-if __name__ == "__main__": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'all') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'increment') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    log.info(f"江苏南京海关全量数据下载任务完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # 等待5s后执行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    all_records = base_mysql.get_hs_all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    hierarchical_traversal(base_country_code.download_dir, all_records) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    log.info("江苏南京海关类章、国家、城市所有文件处理完成!") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    base_mysql.update_january_yoy('江苏省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    base_mysql.update_shandong_yoy('江苏省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    log.info("江苏南京海关城市同比sql处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # log.info(f"江苏南京海关全量数据下载任务完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # # 等待5s后执行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # all_records = base_mysql.get_hs_all() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # hierarchical_traversal(base_country_code.download_dir, all_records) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # log.info("江苏南京海关类章、国家、城市所有文件处理完成!") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # base_mysql.update_january_yoy('江苏省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # base_mysql.update_shandong_yoy('江苏省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # log.info("江苏南京海关城市同比sql处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    main() 
			 |