| 
														
															@@ -6,6 +6,8 @@ import time 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from datetime import datetime, timedelta 
														 | 
														
														 | 
														
															 from datetime import datetime, timedelta 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from pathlib import Path 
														 | 
														
														 | 
														
															 from pathlib import Path 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from selenium.webdriver.firefox.service import Service 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+from webdriver_manager.firefox import GeckoDriverManager 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from faker import Faker 
														 | 
														
														 | 
														
															 from faker import Faker 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium import webdriver 
														 | 
														
														 | 
														
															 from selenium import webdriver 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from selenium.webdriver import FirefoxOptions 
														 | 
														
														 | 
														
															 from selenium.webdriver import FirefoxOptions 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -20,6 +22,8 @@ from crossborder.utils import base_country_code, base_mysql 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.utils.base_country_code import extract_year_month 
														 | 
														
														 | 
														
															 from crossborder.utils.base_country_code import extract_year_month 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.utils.dingtalk import send_dingtalk_message 
														 | 
														
														 | 
														
															 from crossborder.utils.dingtalk import send_dingtalk_message 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 from crossborder.utils.log import  get_logger 
														 | 
														
														 | 
														
															 from crossborder.utils.log import  get_logger 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+import urllib.request 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+import urllib.error 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 log = get_logger(__name__) 
														 | 
														
														 | 
														
															 log = get_logger(__name__) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -190,24 +194,43 @@ def detect_latest_month(driver, url): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 continue 
														 | 
														
														 | 
														
															                 continue 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             return f"{check_year}年{check_month}月" 
														 | 
														
														 | 
														
															             return f"{check_year}年{check_month}月" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         except: 
														 | 
														
														 | 
														
															         except: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        # except Exception as e: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             log.error(f"未找到 {target_title}") 
														 | 
														
														 | 
														
															             log.error(f"未找到 {target_title}") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            # log.error(f"未找到 {target_title} {e}") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             continue 
														 | 
														
														 | 
														
															             continue 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     log.error("三个月内未找到有效数据") 
														 | 
														
														 | 
														
															     log.error("三个月内未找到有效数据") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     return None 
														 | 
														
														 | 
														
															     return None 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+def check_internet_connection(url="http://www.baidu.com", timeout=5): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    """检查网络连接""" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    try: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        urllib.request.urlopen(url, timeout=timeout) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        return True 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    except urllib.error.URLError: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        return False 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 def crawl_with_selenium(url, mark): 
														 | 
														
														 | 
														
															 def crawl_with_selenium(url, mark): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    driver = webdriver.Firefox(options=configure_stealth_options()) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    if not check_internet_connection(): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        log.error("无法连接到互联网,请检查网络设置") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        raise Exception("网络连接失败") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    driver = None 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     year_month = None 
														 | 
														
														 | 
														
															     year_month = None 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    if 'auto' == mark: 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        res = detect_latest_month(driver, url) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        if res is None: 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-            log.info("安徽省海关没有最新数据更新") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-            return None 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        year_month = res 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        print(f"检测到最新有效数据:{year_month}") 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															- 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     try: 
														 | 
														
														 | 
														
															     try: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        # 使用WebDriverManager自动管理geckodriver 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        service = Service(GeckoDriverManager().install()) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        driver = webdriver.Firefox(service=service, options=configure_stealth_options()) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        log.info("Firefox WebDriver初始化成功") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        year_month = None 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        if 'auto' == mark: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            res = detect_latest_month(driver, url) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            if res is None: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                log.info("安徽省海关没有最新数据更新") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+                return None 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            year_month = res 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            print(f"检测到最新有效数据:{year_month}") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+ 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         # 注入反检测脚本 
														 | 
														
														 | 
														
															         # 注入反检测脚本 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         driver.execute_script(""" 
														 | 
														
														 | 
														
															         driver.execute_script(""" 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             Object.defineProperty(navigator, 'webdriver', {  
														 | 
														
														 | 
														
															             Object.defineProperty(navigator, 'webdriver', {  
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -251,13 +274,14 @@ def crawl_with_selenium(url, mark): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															             log.info(f"开始采集 {next_page_url} 页面数据") 
														 | 
														
														 | 
														
															             log.info(f"开始采集 {next_page_url} 页面数据") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															     finally: 
														 | 
														
														 | 
														
															     finally: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        driver.quit() 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        if driver: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+            driver.quit() 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         print(f"安徽省合肥海关全量数据下载任务完成") 
														 | 
														
														 | 
														
															         print(f"安徽省合肥海关全量数据下载任务完成") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         # 等待5s后执行 
														 | 
														
														 | 
														
															         # 等待5s后执行 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        time.sleep(5) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        time.sleep(3) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         hierarchical_traversal(download_dir, year_month) 
														 | 
														
														 | 
														
															         hierarchical_traversal(download_dir, year_month) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         print("安徽省海关类章、国家、城市所有文件处理完成!") 
														 | 
														
														 | 
														
															         print("安徽省海关类章、国家、城市所有文件处理完成!") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        time.sleep(5) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        time.sleep(3) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         base_mysql.update_shandong_yoy('安徽省') 
														 | 
														
														 | 
														
															         base_mysql.update_shandong_yoy('安徽省') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         print("安徽省合肥海关城市同比sql处理完成") 
														 | 
														
														 | 
														
															         print("安徽省合肥海关城市同比sql处理完成") 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     return 'finish', year_month 
														 | 
														
														 | 
														
															     return 'finish', year_month 
														 | 
													
												
											
										
											
												
													
														 | 
														
															@@ -353,7 +377,8 @@ def main(): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                     send_dingtalk_message(f'【安徽省海关】 {r2} 增量数据采集完成,{int(minutes)}分{seconds:.1f}秒') 
														 | 
														
														 | 
														
															                     send_dingtalk_message(f'【安徽省海关】 {r2} 增量数据采集完成,{int(minutes)}分{seconds:.1f}秒') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															     except Exception as e: 
														 | 
														
														 | 
														
															     except Exception as e: 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-        send_dingtalk_message(f'【安徽省海关】发生错误:{e}') 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        # send_dingtalk_message(f'【安徽省海关】发生错误:{e}') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+        log.error(f'【安徽省海关】发生错误:{e}') 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															 if __name__ == '__main__': 
														 | 
														
														 | 
														
															 if __name__ == '__main__': 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     main() 
														 | 
														
														 | 
														
															     main() 
														 |