Browse Source

广东湛江海关地级市目录适配

zhangfan 2 months ago
parent
commit
97c6a90b9a

+ 0 - 15
crossborder/anhui/crawl_gov_anhui_full.py

@@ -169,25 +169,10 @@ def detect_latest_month(driver, url):
     log.error("三个月内未找到有效数据")
     return None
 
-def check_internet_connection(url="http://www.baidu.com", timeout=5):
-    """检查网络连接"""
-    try:
-        urllib.request.urlopen(url, timeout=timeout)
-        return True
-    except urllib.error.URLError:
-        return False
-
 def crawl_with_selenium(url, mark):
-    # if not check_internet_connection():
-    #     log.error("无法连接到互联网,请检查网络设置")
-    #     raise Exception("网络连接失败")
-
     driver = None
     year_month = None
     try:
-        # 使用WebDriverManager自动管理geckodriver
-        # service = Service(GeckoDriverManager().install())
-        # driver = webdriver.Firefox(service=service, options=configure_stealth_options())
         driver = webdriver.Firefox(options=configure_stealth_options(download_dir))
         log.info("Firefox WebDriver初始化成功")
 

+ 2 - 0
crossborder/guangdong/selenium_guangdong_city.py

@@ -52,6 +52,8 @@ def generate_target_title(check_year, check_month, customs_name):
     elif customs_name == "湛江海关":
         if check_month == 3:
             target_title = rf"{check_year}年\s*(?:一季度|前3个月|3月).*外贸进出口数据"
+        if check_month == 6:
+            target_title = rf'{check_year}年\s*(?:上半年|前6个月|6月)?湛江市、茂名市(?:外贸)?进出口数据'
         elif check_month == 9:
             target_title = rf"{check_year}年\s*(?:前三季度|前9个月|9月).*外贸进出口数据"
         elif check_month == 12: