5 maanden geleden · 3cc7b4df15
--- a/crossborder/anhui/crawl_gov_anhui_full.py
+++ b/crossborder/anhui/crawl_gov_anhui_full.py
@@ -1,3 +1,4 @@
 
				+import argparse
			
 
				 import os
			
 
				 import random
			
 
				 import re
			
@@ -12,11 +13,11 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from anhui import gov_commodity_anhui_city, download_dir
			
 
				-from anhui import gov_commodity_anhui_country
			
 
				-from anhui import gov_commodity_anhui_import_export
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.log import log
			
 
				+from crossborder.anhui import gov_commodity_anhui_city, download_dir
			
 
				+from crossborder.anhui import gov_commodity_anhui_country
			
 
				+from crossborder.anhui import gov_commodity_anhui_import_export
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 def configure_stealth_options():
			
 
				     """增强型反检测配置[1,4](@ref)"""
			
@@ -194,7 +195,7 @@ def crawl_with_selenium(url, mark):
 
				     driver = webdriver.Firefox(options=configure_stealth_options())
			
 
				 
			
 
				     year_month = None
			
 
				-    if 'increment' == mark:
			
 
				+    if 'auto' == mark:
			
 
				         res = detect_latest_month(driver, url)
			
 
				         if res is None:
			
 
				             log.info("安徽省海关没有最新数据更新")
			
@@ -319,17 +320,14 @@ def hierarchical_traversal(root_path):
 
				                 gov_commodity_anhui_city.process_folder(md['path'])
			
 
				 
			
 
				 def main():
			
 
				-    crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html', 'all')
			
 
				-    # crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html', 'increment')
			
 
				-    # print(f"安徽合肥海关全量数据下载任务完成")
			
 
				-    # # 等待5s后执行
			
 
				-    # time.sleep(5)
			
 
				-    # hierarchical_traversal(base_country_code.download_dir)
			
 
				-    # print("安徽合肥海关类章、国家、城市所有文件处理完成！")
			
 
				-    # time.sleep(5)
			
 
				-    # base_mysql.update_january_yoy('安徽省')
			
 
				-    # base_mysql.update_shandong_yoy('安徽省')
			
 
				-    # print("安徽合肥海关城市同比sql处理完成")
			
 
				+    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
			
 
				+    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.mode == "all":
			
 
				+        crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','all')
			
 
				+    else:
			
 
				+        crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     main()
			
--- a/crossborder/anhui/gov_commodity_anhui_city.py
+++ b/crossborder/anhui/gov_commodity_anhui_city.py
@@ -2,10 +2,10 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from anhui import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.anhui import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 city_code_map = {
			
 
				     "安徽省合肥市": "340100",
			
--- a/crossborder/anhui/gov_commodity_anhui_country.py
+++ b/crossborder/anhui/gov_commodity_anhui_country.py
@@ -2,10 +2,10 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from anhui import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.anhui import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 # 排除地区名单
			
 
				 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",
			
--- a/crossborder/anhui/gov_commodity_anhui_import_export.py
+++ b/crossborder/anhui/gov_commodity_anhui_import_export.py
@@ -3,11 +3,11 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from anhui import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				+from crossborder.anhui import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				 
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 CUSTOM_COMMODITY_REPLACEMENTS = {
			
 
				     '家具': '家具及其零件',
			
--- a/crossborder/auto_incre_main.py
+++ b/crossborder/auto_incre_main.py
@@ -1,7 +1,7 @@
 
				-from anhui import crawl_gov_anhui_full
			
 
				-from hebei import crawl_gov_hebei_full
			
 
				-from jiangsu import crawl_gov_jiangsu_full
			
 
				-from zhejiang import crawl_gov_zhejiang_full
			
 
				+from crossborder.anhui import crawl_gov_anhui_full
			
 
				+from crossborder.hebei import crawl_gov_hebei_full
			
 
				+from crossborder.jiangsu import crawl_gov_jiangsu_full
			
 
				+from crossborder.zhejiang import crawl_gov_zhejiang_full
			
 
				 from fujian import selenium_fujian_download
			
 
				 from henan import selenium_henan_download
			
 
				 from shandong import selenium_shandong_download
			
--- a/crossborder/fujian/fujian_parse_excel.py
+++ b/crossborder/fujian/fujian_parse_excel.py
@@ -3,9 +3,9 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				-from utils.constants import DOWNLOAD_DIR
			
 
				-from utils.parse_utils import convert_wan_to_yuan, extract_year_month_from_path, traverse_and_process
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR
			
 
				+from crossborder.utils.parse_utils import convert_wan_to_yuan, extract_year_month_from_path, traverse_and_process
			
 
				 
			
 
				 FUJIAN_CITY = {
			
 
				 "福州市": "350100",
			
--- a/crossborder/fujian/selenium_fujian_download.py
+++ b/crossborder/fujian/selenium_fujian_download.py
@@ -10,9 +10,9 @@ from selenium.webdriver.support import expected_conditions as EC
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				 from fujian.fujian_parse_excel import parse_excel
			
 
				-from utils.constants import DOWNLOAD_DIR
			
 
				-from utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel
			
 
				-from utils.parse_utils import traverse_and_process
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR
			
 
				+from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel
			
 
				+from crossborder.utils.parse_utils import traverse_and_process
			
 
				 
			
 
				 # 基础配置
			
 
				 
			
--- a/crossborder/guangdong/guangdong_gongbei_parse_excel.py
+++ b/crossborder/guangdong/guangdong_gongbei_parse_excel.py
@@ -3,10 +3,10 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				 from quanguo.detail import parse_value
			
 
				-from utils.constants import GUANGDONG_CITY
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.constants import GUANGDONG_CITY
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 PROV_CODE = "440000"
			
 
				 PROV_NAME = "广东省"
			
--- a/crossborder/guangdong/guangdong_sub_customs_parse_excel.py
+++ b/crossborder/guangdong/guangdong_sub_customs_parse_excel.py
@@ -3,10 +3,10 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				-from utils.constants import DOWNLOAD_DIR, GUANGDONG_CITY
			
 
				-from utils.log import log
			
 
				-from utils.parse_utils import traverse_and_process, extract_year_month_from_path, get_previous_month_dir
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR, GUANGDONG_CITY
			
 
				+from crossborder.utils.log import log
			
 
				+from crossborder.utils.parse_utils import traverse_and_process, extract_year_month_from_path, get_previous_month_dir
			
 
				 
			
 
				 # 配置日志
			
 
				 PROV_CODE = "440000"
			
@@ -643,7 +643,7 @@ def parse_excel(current_dir):
 
				         # log.debug(f"处理后数据示例:\n{final_df.head()}")
			
 
				 
			
 
				         # 这里调用DBHelper入库（实际使用时请取消注释）
			
 
				-        from utils.db_helper import DBHelper
			
 
				+        from crossborder.utils.db_helper import DBHelper
			
 
				         db = DBHelper()
			
 
				         db.bulk_insert(
			
 
				             final_df,
			
--- a/crossborder/guangdong/selenium_guangdong_city.py
+++ b/crossborder/guangdong/selenium_guangdong_city.py
@@ -10,15 +10,15 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				 from guangdong.guangdong_gongbei_parse_excel import parse_region_table, calculate_monthly_data
			
 
				 from guangdong.guangdong_sub_customs_parse_excel import parse_excel
			
 
				-from utils.constants import DOWNLOAD_DIR
			
 
				-from utils.constants import GUANGDONG_CUSTOMS_URL
			
 
				-from utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel, download_excel2, \
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR
			
 
				+from crossborder.utils.constants import GUANGDONG_CUSTOMS_URL
			
 
				+from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel, download_excel2, \
			
 
				     batch_download_excel
			
 
				-from utils.log import log
			
 
				-from utils.parse_utils import traverse_and_process
			
 
				+from crossborder.utils.log import log
			
 
				+from crossborder.utils.parse_utils import traverse_and_process
			
 
				 
			
 
				 download_dir = DOWNLOAD_DIR / "guangdong"
			
 
				 
			
--- a/crossborder/guangdong/selenium_guangdong_download.py
+++ b/crossborder/guangdong/selenium_guangdong_download.py
@@ -11,11 +11,11 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				-from utils.constants import DOWNLOAD_DIR, COUNTRY_CODE_MAPPING
			
 
				-from utils.download_utils import configure_stealth_options, generate_month_sequence
			
 
				-from utils.log import log
			
 
				-from utils.parse_utils import clean_county_name, convert_wan_to_yuan, clean_commodity_name
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR, COUNTRY_CODE_MAPPING
			
 
				+from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence
			
 
				+from crossborder.utils.log import log
			
 
				+from crossborder.utils.parse_utils import clean_county_name, convert_wan_to_yuan, clean_commodity_name
			
 
				 
			
 
				 
			
 
				 BASE_URL = "http://gdfs.customs.gov.cn/guangdong_sub/zwgk62/sjgb59/6b4cdb3f-1.html"
			
--- a/crossborder/hebei/crawl_gov_hebei_full.py
+++ b/crossborder/hebei/crawl_gov_hebei_full.py
@@ -1,3 +1,4 @@
 
				+import argparse
			
 
				 import os
			
 
				 import random
			
 
				 import re
			
@@ -12,12 +13,12 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from hebei import download_dir
			
 
				-from hebei import gov_commodity_hebei_city
			
 
				-from hebei import gov_commodity_hebei_country
			
 
				-from hebei import gov_commodity_hebei_import_export
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.log import log
			
 
				+from crossborder.hebei import download_dir
			
 
				+from crossborder.hebei import gov_commodity_hebei_city
			
 
				+from crossborder.hebei import gov_commodity_hebei_country
			
 
				+from crossborder.hebei import gov_commodity_hebei_import_export
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 
			
 
				 def get_current_target_titles():
			
@@ -172,7 +173,7 @@ def crawl_with_selenium(url, mark):
 
				     driver = webdriver.Firefox(options=configure_stealth_options())
			
 
				 
			
 
				     year_month = None
			
 
				-    if 'increment' == mark:
			
 
				+    if 'auto' == mark:
			
 
				         res = detect_latest_month(driver, url)
			
 
				         if res is None:
			
 
				             log.info("河北省海关没有最新数据更新")
			
@@ -296,16 +297,14 @@ def hierarchical_traversal(root_path):
 
				 
			
 
				 
			
 
				 def main():
			
 
				-    # crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html', 'all')
			
 
				-    crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html', 'increment')
			
 
				-    # 等待5s后执行
			
 
				-    # time.sleep(5)
			
 
				-    # hierarchical_traversal(base_country_code.download_dir)
			
 
				-    # log.info(f"河北石家庄海关全量数据下载任务完成")
			
 
				-    # time.sleep(5)
			
 
				-    # base_mysql.update_january_yoy('河北省')
			
 
				-    # base_mysql.update_shandong_yoy('河北省')
			
 
				-    # log.info("河北石家庄海关城市同比sql处理完成")
			
 
				+    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
			
 
				+    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.mode == "all":
			
 
				+        crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html', 'all')
			
 
				+    else:
			
 
				+        crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html','auto')
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     main()
			
--- a/crossborder/hebei/gov_commodity_hebei_city.py
+++ b/crossborder/hebei/gov_commodity_hebei_city.py
@@ -3,10 +3,10 @@ from pathlib import Path
 
				 import pandas
			
 
				 import pandas as pd
			
 
				 
			
 
				-from hebei import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.hebei import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 city_code_map = {
			
 
				     "石家庄市": "130100",
			
--- a/crossborder/hebei/gov_commodity_hebei_country.py
+++ b/crossborder/hebei/gov_commodity_hebei_country.py
@@ -3,10 +3,10 @@ from pathlib import Path
 
				 import pandas
			
 
				 import pandas as pd
			
 
				 
			
 
				-from hebei import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.hebei import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",
			
 
				                    "东南亚国家联盟", "欧洲联盟", "亚太经济合作组织",
			
--- a/crossborder/hebei/gov_commodity_hebei_import_export.py
+++ b/crossborder/hebei/gov_commodity_hebei_import_export.py
@@ -2,11 +2,11 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 import re
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				-from hebei import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				+from crossborder.hebei import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				 
			
 
				 CUSTOM_COMMODITY_REPLACEMENTS = {
			
 
				     '稻谷及大米': '稻谷、大米及大米粉',
			
--- a/crossborder/henan/henan_parse_excel.py
+++ b/crossborder/henan/henan_parse_excel.py
@@ -3,9 +3,9 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				-from utils.constants import COUNTRY_CODE_MAPPING, EXCLUDE_REGIONS, DOWNLOAD_DIR
			
 
				-from utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, find_unmatched_countries, \
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				+from crossborder.utils.constants import COUNTRY_CODE_MAPPING, EXCLUDE_REGIONS, DOWNLOAD_DIR
			
 
				+from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, find_unmatched_countries, \
			
 
				     extract_year_month_from_path, traverse_and_process
			
 
				 
			
 
				 # 常量配置（新增路径正则校验）
			
--- a/crossborder/henan/selenium_henan_download.py
+++ b/crossborder/henan/selenium_henan_download.py
@@ -13,9 +13,9 @@ from selenium.webdriver.support import expected_conditions as EC
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				 from henan.henan_parse_excel import parse_excel
			
 
				-from utils.constants import DOWNLOAD_DIR
			
 
				-from utils.download_utils import configure_stealth_options, get_previous_month, download_excel, generate_month_sequence
			
 
				-from utils.parse_utils import traverse_and_process
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR
			
 
				+from crossborder.utils.download_utils import configure_stealth_options, get_previous_month, download_excel, generate_month_sequence
			
 
				+from crossborder.utils.parse_utils import traverse_and_process
			
 
				 
			
 
				 # 基础配置
			
 
				 
			
--- a/crossborder/jiangsu/crawl_gov_jiangsu_full.py
+++ b/crossborder/jiangsu/crawl_gov_jiangsu_full.py
@@ -1,3 +1,4 @@
 
				+import argparse
			
 
				 import os
			
 
				 import random
			
 
				 import re
			
@@ -16,13 +17,13 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from jiangsu import download_dir
			
 
				-from jiangsu import gov_commodity_jiangsu_country
			
 
				-from jiangsu import gov_commodity_jiangsu_city
			
 
				-from jiangsu import gov_commodity_jiangsu_import_export
			
 
				+from crossborder.jiangsu import download_dir
			
 
				+from crossborder.jiangsu import gov_commodity_jiangsu_country
			
 
				+from crossborder.jiangsu import gov_commodity_jiangsu_city
			
 
				+from crossborder.jiangsu import gov_commodity_jiangsu_import_export
			
 
				 
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.log import log
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 # rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe"
			
 
				 rarfile.UNRAR_EXECUTABLE = "unrar"
			
@@ -213,7 +214,7 @@ def crawl_with_selenium(url, mark):
 
				     driver = webdriver.Firefox(options=configure_stealth_options())
			
 
				 
			
 
				     year_month = None
			
 
				-    if 'increment' == mark:
			
 
				+    if 'auto' == mark:
			
 
				         res = detect_latest_month(driver, url)
			
 
				         if res is None:
			
 
				             log.info("江苏省海关没有最新数据更新")
			
@@ -333,18 +334,14 @@ def hierarchical_traversal(root_path, all_records):
 
				                 gov_commodity_jiangsu_city.process_folder(md['path'])
			
 
				 
			
 
				 def main():
			
 
				-    crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'all')
			
 
				-    # crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'increment')
			
 
				-    # log.info(f"江苏南京海关全量数据下载任务完成")
			
 
				-    # # 等待5s后执行
			
 
				-    # time.sleep(5)
			
 
				-    # all_records = base_mysql.get_hs_all()
			
 
				-    # hierarchical_traversal(base_country_code.download_dir, all_records)
			
 
				-    # log.info("江苏南京海关类章、国家、城市所有文件处理完成！")
			
 
				-    # time.sleep(5)
			
 
				-    # base_mysql.update_january_yoy('江苏省')
			
 
				-    # base_mysql.update_shandong_yoy('江苏省')
			
 
				-    # log.info("江苏南京海关城市同比sql处理完成")
			
 
				+    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
			
 
				+    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.mode == "all":
			
 
				+        crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','all')
			
 
				+    else:
			
 
				+        crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto')
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     main()
			
--- a/crossborder/jiangsu/gov_commodity_jiangsu_city.py
+++ b/crossborder/jiangsu/gov_commodity_jiangsu_city.py
@@ -3,10 +3,10 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from jiangsu import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.jiangsu import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 city_code_map = {
			
 
				     "南京市": "320100",
			
--- a/crossborder/jiangsu/gov_commodity_jiangsu_country.py
+++ b/crossborder/jiangsu/gov_commodity_jiangsu_country.py
@@ -2,10 +2,10 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from jiangsu import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.jiangsu import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 # 排除地区名单
			
 
				 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",
			
--- a/crossborder/jiangsu/gov_commodity_jiangsu_import_export.py
+++ b/crossborder/jiangsu/gov_commodity_jiangsu_import_export.py
@@ -3,9 +3,9 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from jiangsu import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.log import log
			
 
				+from crossborder.jiangsu import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 YEAR_PATTERN = re.compile(r"^\d{4}$")
			
 
				 MONTH_PATTERN = re.compile(r"^(0[1-9]|1[0-2])$")
			
--- a/crossborder/quanguo/CountryTrade.py
+++ b/crossborder/quanguo/CountryTrade.py
@@ -2,7 +2,7 @@ import re
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.parse_utils import clean_county_name
			
 
				+from crossborder.utils.parse_utils import clean_county_name
			
 
				 
			
 
				 # 配置参数
			
 
				 EXCEL_PATH = r"D:/Downloads/2025051809262394128.xls"
			
--- a/crossborder/quanguo/CountryTradeYear.py
+++ b/crossborder/quanguo/CountryTradeYear.py
@@ -5,7 +5,7 @@ import pandas as pd
 
				 import pymysql
			
 
				 from pymysql import Error
			
 
				 
			
 
				-from utils.constants import COUNTRY_CODE_MAPPING
			
 
				+from crossborder.utils.constants import COUNTRY_CODE_MAPPING
			
 
				 
			
 
				 YEAR = 2023
			
 
				 
			
--- a/crossborder/quanguo/detail.py
+++ b/crossborder/quanguo/detail.py
@@ -1,6 +1,6 @@
 
				 # ---------------------------- 核心解析逻辑修改 ----------------------------
			
 
				 from quanguo.CountryTradeYear import COUNTRY_CODE_MAPPING
			
 
				-from utils.parse_utils import clean_county_name
			
 
				+from crossborder.utils.parse_utils import clean_county_name
			
 
				 
			
 
				 
			
 
				 def chinese_class_to_number(class_str):
			
--- a/crossborder/quanguo/selenium_download.py
+++ b/crossborder/quanguo/selenium_download.py
@@ -11,11 +11,11 @@ from selenium.webdriver import FirefoxOptions, ActionChains
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from utils.constants import DOWNLOAD_DIR
			
 
				-from utils.download_utils import configure_stealth_options, wait_for_download, download_excel
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR
			
 
				+from crossborder.utils.download_utils import configure_stealth_options, wait_for_download, download_excel
			
 
				 from selenium.webdriver.common.by import By
			
 
				 
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 YEAR = 2025
			
 
				 TARGET_TABLES = [
			
--- a/crossborder/shandong/selenium_shandong_download.py
+++ b/crossborder/shandong/selenium_shandong_download.py
@@ -9,12 +9,12 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				 from shandong.shandong_parse_excel import parse_excel
			
 
				-from utils.constants import DOWNLOAD_DIR
			
 
				-from utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel
			
 
				-from utils.log import log
			
 
				-from utils.parse_utils import traverse_and_process
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR
			
 
				+from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel
			
 
				+from crossborder.utils.log import log
			
 
				+from crossborder.utils.parse_utils import traverse_and_process
			
 
				 
			
 
				 # 基础配置
			
 
				 
			
--- a/crossborder/shandong/shandong_parse_excel.py
+++ b/crossborder/shandong/shandong_parse_excel.py
@@ -4,11 +4,11 @@ from pathlib import Path
 
				 import numpy as np
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.db_helper import DBHelper
			
 
				+from crossborder.utils.db_helper import DBHelper
			
 
				 from quanguo.CountryTrade import COUNTRY_CODE_MAPPING
			
 
				-from utils.constants import DOWNLOAD_DIR
			
 
				-from utils.log import log
			
 
				-from utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, \
			
 
				+from crossborder.utils.constants import DOWNLOAD_DIR
			
 
				+from crossborder.utils.log import log
			
 
				+from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, \
			
 
				     extract_year_month_from_path, get_previous_month_dir, find_unmatched_countries, traverse_and_process
			
 
				 
			
 
				 # 常量配置
			
--- a/crossborder/utils/base_country_code.py
+++ b/crossborder/utils/base_country_code.py
@@ -4,7 +4,7 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 YEAR_PATTERN = re.compile(r"^\d{4}$")
			
 
				 MONTH_PATTERN = re.compile(r"^(0[1-9]|1[0-2])$")
			
--- a/crossborder/utils/base_mysql.py
+++ b/crossborder/utils/base_mysql.py
@@ -2,7 +2,7 @@ import pymysql
 
				 from sqlalchemy import create_engine, text
			
 
				 from urllib.parse import quote_plus
			
 
				 
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 provinces = [
			
 
				     "北京市", "天津市", "上海市", "重庆市",
			
--- a/crossborder/utils/crawl_gov_commodity.py
+++ b/crossborder/utils/crawl_gov_commodity.py
@@ -1,7 +1,7 @@
 
				 import pandas as pd
			
 
				 
			
 
				-from utils import base_mysql
			
 
				-from utils.log import log
			
 
				+from crossborder.utils import base_mysql
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 
			
 
				 def generate_sql_from_excel(excel_file):
			
--- a/crossborder/utils/db_helper.py
+++ b/crossborder/utils/db_helper.py
@@ -5,7 +5,7 @@ import logging
 
				 import pymysql
			
 
				 import pandas as pd
			
 
				 
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 DB_CONFIG = {
			
 
				     'host': '10.130.75.149',
			
--- a/crossborder/utils/download_utils.py
+++ b/crossborder/utils/download_utils.py
@@ -10,7 +10,7 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 DOWNLOAD_TIMEOUT = 60
			
 
				 
			
--- a/crossborder/utils/parse_utils.py
+++ b/crossborder/utils/parse_utils.py
@@ -2,7 +2,7 @@ import re
 
				 from decimal import Decimal
			
 
				 from pathlib import Path
			
 
				 
			
 
				-from utils.log import log
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 YEAR_PATTERN = re.compile(r"^\d{4}$")
			
 
				 MONTH_PATTERN = re.compile(r"^(0[1-9]|1[0-2])$")
			
--- a/crossborder/zhejiang/crawl_gov_zhejiang_full.py
+++ b/crossborder/zhejiang/crawl_gov_zhejiang_full.py
@@ -1,3 +1,4 @@
 
				+import argparse
			
 
				 import os
			
 
				 import random
			
 
				 import re
			
@@ -15,12 +16,12 @@ from selenium.webdriver.common.by import By
 
				 from selenium.webdriver.support import expected_conditions as EC
			
 
				 from selenium.webdriver.support.ui import WebDriverWait
			
 
				 
			
 
				-from zhejiang import download_dir
			
 
				-from zhejiang import gov_commodity_zhejiang_city
			
 
				-from zhejiang import gov_commodity_zhejiang_country
			
 
				-from zhejiang import gov_commodity_zhejiang_import_export
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.log import log
			
 
				+from crossborder.zhejiang import download_dir
			
 
				+from crossborder.zhejiang import gov_commodity_zhejiang_city
			
 
				+from crossborder.zhejiang import gov_commodity_zhejiang_country
			
 
				+from crossborder.zhejiang import gov_commodity_zhejiang_import_export
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 def configure_stealth_options():
			
 
				     """增强型反检测配置[1,4](@ref)"""
			
@@ -120,7 +121,7 @@ def process_month_tabs(driver, year, base_url, year_month):
 
				                         retry_count += 1
			
 
				                         break
			
 
				                     if tar_month != month_text:
			
 
				-                        log.info(f"{year}年 {month_text} 月份跳过, increment tar: {year_month}")
			
 
				+                        log.info(f"{year}年 {month_text} 月份跳过, auto tar: {year_month}")
			
 
				                         continue
			
 
				                 a_tag.click()
			
 
				 
			
@@ -283,7 +284,7 @@ def crawl_with_selenium(url, mark):
 
				     driver = webdriver.Firefox(options=configure_stealth_options())
			
 
				 
			
 
				     year_month = None
			
 
				-    if 'increment' == mark:
			
 
				+    if 'auto' == mark:
			
 
				         res = detect_latest_month(driver, url)
			
 
				         if res is None:
			
 
				             log.info("浙江省海关没有最新数据更新")
			
@@ -380,17 +381,14 @@ def hierarchical_traversal(root_path):
 
				                 gov_commodity_zhejiang_city.process_folder(md['path'])
			
 
				 
			
 
				 def main():
			
 
				-    # crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
			
 
				-    crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'increment')
			
 
				-    # log.info(f"浙江杭州海关全量数据下载任务完成")
			
 
				-    # # 等待5s后执行
			
 
				-    # time.sleep(5)
			
 
				-    # hierarchical_traversal(download_dir)
			
 
				-    # log.info("浙江杭州海关类章、国家、城市所有文件处理完成！")
			
 
				-    # time.sleep(5)
			
 
				-    # base_mysql.update_january_yoy('浙江省')
			
 
				-    # base_mysql.update_shandong_yoy('浙江省')
			
 
				-    # log.info("浙江杭州海关城市同比sql处理完成")
			
 
				+    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
			
 
				+    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
			
 
				+    args = parser.parse_args()
			
 
				+
			
 
				+    if args.mode == "all":
			
 
				+        crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
			
 
				+    else:
			
 
				+        crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
			
 
				 
			
 
				 if __name__ == '__main__':
			
 
				     main()
			
--- a/crossborder/zhejiang/gov_commodity_zhejiang_city.py
+++ b/crossborder/zhejiang/gov_commodity_zhejiang_city.py
@@ -3,10 +3,10 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from zhejiang import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.zhejiang import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 city_code_map = {
			
 
				     "杭州地区": "330100",
			
--- a/crossborder/zhejiang/gov_commodity_zhejiang_country.py
+++ b/crossborder/zhejiang/gov_commodity_zhejiang_country.py
@@ -2,10 +2,10 @@ from pathlib import Path
 
				 
			
 
				 import pandas as pd
			
 
				 
			
 
				-from zhejiang import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.zhejiang import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 # 排除地区名单
			
 
				 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",
			
--- a/crossborder/zhejiang/gov_commodity_zhejiang_import_export.py
+++ b/crossborder/zhejiang/gov_commodity_zhejiang_import_export.py
@@ -3,10 +3,10 @@ from pathlib import Path
 
				 import re
			
 
				 import pandas as pd
			
 
				 
			
 
				-from zhejiang import download_dir
			
 
				-from utils import base_country_code, base_mysql
			
 
				-from utils.base_country_code import format_sql_value
			
 
				-from utils.log import log
			
 
				+from crossborder.zhejiang import download_dir
			
 
				+from crossborder.utils import base_country_code, base_mysql
			
 
				+from crossborder.utils.base_country_code import format_sql_value
			
 
				+from crossborder.utils.log import log
			
 
				 
			
 
				 CUSTOM_COMMODITY_REPLACEMENTS = {
			
 
				     '稻谷及大米': '稻谷、大米及大米粉',