| 
					
				 | 
			
			
				@@ -3,59 +3,27 @@ import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import re 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-import sys 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from datetime import datetime, timedelta 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from pathlib import Path 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from urllib.parse import urljoin 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from faker import Faker 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium import webdriver 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.common.exceptions import StaleElementReferenceException 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from selenium.webdriver import FirefoxOptions 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.common.by import By 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.support import expected_conditions as EC 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.support.ui import WebDriverWait 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from crossborder.utils.base_country_code import extract_year_month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils.dingtalk import send_dingtalk_message 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils.download_utils import configure_stealth_options 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils.log import get_logger 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.zhejiang import download_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.zhejiang import gov_commodity_zhejiang_city 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.zhejiang import gov_commodity_zhejiang_country 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.zhejiang import gov_commodity_zhejiang_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from crossborder.utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from crossborder.utils.log import  get_logger 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 log = get_logger(__name__) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def configure_stealth_options(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """增强型反检测配置[1,4](@ref)""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts = FirefoxOptions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print("当前下载路径:", Path(download_dir).resolve()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # 文件下载配置 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("browser.download.dir", download_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("browser.download.folderList", 2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("browser.download.manager.showWhenStarting", False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("browser.helperApps.neverAsk.saveToDisk", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        "application/octet-stream, application/vnd.ms-excel")  # 覆盖常见文件类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("browser.download.manager.useWindow", False)  # 禁用下载管理器窗口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("browser.download.manager.showAlertOnComplete", False)  # 关闭完成提示 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # 反检测参数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("dom.webdriver.enabled", False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("useAutomationExtension", False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.add_argument("--disable-blink-features=AutomationControlled") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # 动态指纹 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    fake = Faker() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("general.useragent.override", fake.firefox()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.set_preference("intl.accept_languages", "zh-CN,zh;q=0.9") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # 视口配置 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.add_argument("--width=1440") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.add_argument("--height=900") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    opts.add_argument("--headless") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return opts 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def crawl_by_year_tabs(driver, base_url, year_month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """按年份Tab导航采集数据""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     years = ['2023年', '2024年', '2025年'] 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -387,7 +355,7 @@ def extract_year_month_chinese(text): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return year, month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    driver = webdriver.Firefox(options=configure_stealth_options()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    driver = webdriver.Firefox(options=configure_stealth_options(download_dir)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     year_month = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if 'auto' == mark: 
			 |