| 
														
															@@ -3,59 +3,27 @@ import os 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 import random 
														 | 
														
														 | 
														
															 import random 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 import re 
														 | 
														
														 | 
														
															 import re 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 import time 
														 | 
														
														 | 
														
															 import time 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-import sys 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from datetime import datetime, timedelta 
														 | 
														
														 | 
														
															 from datetime import datetime, timedelta 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from pathlib import Path 
														 | 
														
														 | 
														
															 from pathlib import Path 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from urllib.parse import urljoin 
														 | 
														
														 | 
														
															 from urllib.parse import urljoin 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															-from faker import Faker 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium import webdriver 
														 | 
														
														 | 
														
															 from selenium import webdriver 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.common.exceptions import StaleElementReferenceException 
														 | 
														
														 | 
														
															 from selenium.common.exceptions import StaleElementReferenceException 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-from selenium.webdriver import FirefoxOptions 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.webdriver.common.by import By 
														 | 
														
														 | 
														
															 from selenium.webdriver.common.by import By 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.webdriver.support import expected_conditions as EC 
														 | 
														
														 | 
														
															 from selenium.webdriver.support import expected_conditions as EC 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.webdriver.support.ui import WebDriverWait 
														 | 
														
														 | 
														
															 from selenium.webdriver.support.ui import WebDriverWait 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															-from crossborder.utils.base_country_code import extract_year_month 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from crossborder.utils import base_country_code, base_mysql 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.utils.dingtalk import send_dingtalk_message 
														 | 
														
														 | 
														
															 from crossborder.utils.dingtalk import send_dingtalk_message 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from crossborder.utils.download_utils import configure_stealth_options 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from crossborder.utils.log import get_logger 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.zhejiang import download_dir 
														 | 
														
														 | 
														
															 from crossborder.zhejiang import download_dir 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.zhejiang import gov_commodity_zhejiang_city 
														 | 
														
														 | 
														
															 from crossborder.zhejiang import gov_commodity_zhejiang_city 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.zhejiang import gov_commodity_zhejiang_country 
														 | 
														
														 | 
														
															 from crossborder.zhejiang import gov_commodity_zhejiang_country 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.zhejiang import gov_commodity_zhejiang_import_export 
														 | 
														
														 | 
														
															 from crossborder.zhejiang import gov_commodity_zhejiang_import_export 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-from crossborder.utils import base_country_code, base_mysql 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-from crossborder.utils.log import  get_logger 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 log = get_logger(__name__) 
														 | 
														
														 | 
														
															 log = get_logger(__name__) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															-def configure_stealth_options(): 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    """增强型反检测配置[1,4](@ref)""" 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts = FirefoxOptions() 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    print("当前下载路径:", Path(download_dir).resolve()) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    # 文件下载配置 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("browser.download.dir", download_dir) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("browser.download.folderList", 2) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("browser.download.manager.showWhenStarting", False) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("browser.helperApps.neverAsk.saveToDisk", 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-                        "application/octet-stream, application/vnd.ms-excel")  # 覆盖常见文件类型 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("browser.download.manager.useWindow", False)  # 禁用下载管理器窗口 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("browser.download.manager.showAlertOnComplete", False)  # 关闭完成提示 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															- 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    # 反检测参数 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("dom.webdriver.enabled", False) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("useAutomationExtension", False) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.add_argument("--disable-blink-features=AutomationControlled") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															- 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    # 动态指纹 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    fake = Faker() 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("general.useragent.override", fake.firefox()) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.set_preference("intl.accept_languages", "zh-CN,zh;q=0.9") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															- 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    # 视口配置 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.add_argument("--width=1440") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.add_argument("--height=900") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    opts.add_argument("--headless") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    return opts 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															- 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 def crawl_by_year_tabs(driver, base_url, year_month): 
														 | 
														
														 | 
														
															 def crawl_by_year_tabs(driver, base_url, year_month): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     """按年份Tab导航采集数据""" 
														 | 
														
														 | 
														
															     """按年份Tab导航采集数据""" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     years = ['2023年', '2024年', '2025年'] 
														 | 
														
														 | 
														
															     years = ['2023年', '2024年', '2025年'] 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -387,7 +355,7 @@ def extract_year_month_chinese(text): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     return year, month 
														 | 
														
														 | 
														
															     return year, month 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 def crawl_with_selenium(url, mark): 
														 | 
														
														 | 
														
															 def crawl_with_selenium(url, mark): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    driver = webdriver.Firefox(options=configure_stealth_options()) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    driver = webdriver.Firefox(options=configure_stealth_options(download_dir)) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															     year_month = None 
														 | 
														
														 | 
														
															     year_month = None 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     if 'auto' == mark: 
														 | 
														
														 | 
														
															     if 'auto' == mark: 
														 |