| 
														
															@@ -15,17 +15,17 @@ from selenium.webdriver import FirefoxOptions 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.webdriver.common.by import By 
														 | 
														
														 | 
														
															 from selenium.webdriver.common.by import By 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.webdriver.support import expected_conditions as EC 
														 | 
														
														 | 
														
															 from selenium.webdriver.support import expected_conditions as EC 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.webdriver.support.ui import WebDriverWait 
														 | 
														
														 | 
														
															 from selenium.webdriver.support.ui import WebDriverWait 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-import gov_commodity_jiangsu_country 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-import gov_commodity_jiangsu_city 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-import gov_commodity_jiangsu_import_export 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from jiangsu import download_dir 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from jiangsu import gov_commodity_jiangsu_country 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from jiangsu import gov_commodity_jiangsu_city 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from jiangsu import gov_commodity_jiangsu_import_export 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from utils import base_country_code, base_mysql 
														 | 
														
														 | 
														
															 from utils import base_country_code, base_mysql 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from utils.log import log 
														 | 
														
														 | 
														
															 from utils.log import log 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 # rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe" 
														 | 
														
														 | 
														
															 # rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 rarfile.UNRAR_EXECUTABLE = "unrar" 
														 | 
														
														 | 
														
															 rarfile.UNRAR_EXECUTABLE = "unrar" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-download_dir = base_country_code.download_dir 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-Path(download_dir).mkdir(parents=True, exist_ok=True) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 def configure_stealth_options(): 
														 | 
														
														 | 
														
															 def configure_stealth_options(): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     """增强型反检测配置[1,4](@ref)""" 
														 | 
														
														 | 
														
															     """增强型反检测配置[1,4](@ref)""" 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -217,7 +217,8 @@ def crawl_with_selenium(url, mark): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         res = detect_latest_month(driver, url) 
														 | 
														
														 | 
														
															         res = detect_latest_month(driver, url) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         if res is None: 
														 | 
														
														 | 
														
															         if res is None: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             log.info("江苏省海关没有最新数据更新") 
														 | 
														
														 | 
														
															             log.info("江苏省海关没有最新数据更新") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-            sys.exit(0) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            # sys.exit(0) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            return 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         year_month = res 
														 | 
														
														 | 
														
															         year_month = res 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         print(f"检测到最新有效数据:{year_month}") 
														 | 
														
														 | 
														
															         print(f"检测到最新有效数据:{year_month}") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -266,6 +267,16 @@ def crawl_with_selenium(url, mark): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															     finally: 
														 | 
														
														 | 
														
															     finally: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         driver.quit() 
														 | 
														
														 | 
														
															         driver.quit() 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        log.info(f"江苏南京海关全量数据下载任务完成") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        # 等待5s后执行 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        time.sleep(5) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        all_records = base_mysql.get_hs_all() 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        hierarchical_traversal(download_dir, all_records) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        log.info("江苏南京海关类章、国家、城市所有文件处理完成!") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        time.sleep(5) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        base_mysql.update_january_yoy('江苏省') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        base_mysql.update_shandong_yoy('江苏省') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        log.info("江苏南京海关城市同比sql处理完成") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 def wait_for_download_complete(timeout=30, existing_files=None): 
														 | 
														
														 | 
														
															 def wait_for_download_complete(timeout=30, existing_files=None): 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -321,17 +332,19 @@ def hierarchical_traversal(root_path, all_records): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 gov_commodity_jiangsu_country.process_folder(md['path']) 
														 | 
														
														 | 
														
															                 gov_commodity_jiangsu_country.process_folder(md['path']) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 gov_commodity_jiangsu_city.process_folder(md['path']) 
														 | 
														
														 | 
														
															                 gov_commodity_jiangsu_city.process_folder(md['path']) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															-if __name__ == "__main__": 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+def main(): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'all') 
														 | 
														
														 | 
														
															     crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'all') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     # crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'increment') 
														 | 
														
														 | 
														
															     # crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'increment') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    log.info(f"江苏南京海关全量数据下载任务完成") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    # 等待5s后执行 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    time.sleep(5) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    all_records = base_mysql.get_hs_all() 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    hierarchical_traversal(base_country_code.download_dir, all_records) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    log.info("江苏南京海关类章、国家、城市所有文件处理完成!") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    time.sleep(5) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    base_mysql.update_january_yoy('江苏省') 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    base_mysql.update_shandong_yoy('江苏省') 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    log.info("江苏南京海关城市同比sql处理完成") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															- 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # log.info(f"江苏南京海关全量数据下载任务完成") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # # 等待5s后执行 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # time.sleep(5) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # all_records = base_mysql.get_hs_all() 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # hierarchical_traversal(base_country_code.download_dir, all_records) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # log.info("江苏南京海关类章、国家、城市所有文件处理完成!") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # time.sleep(5) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # base_mysql.update_january_yoy('江苏省') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # base_mysql.update_shandong_yoy('江苏省') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    # log.info("江苏南京海关城市同比sql处理完成") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+if __name__ == '__main__': 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    main() 
														 |