Przeglądaj źródła

1.日志调用修改
2.消息通知修改

01495251 1 miesiąc temu
rodzic
commit
3089bb6643
39 zmienionych plików z 262 dodań i 111 usunięć
  1. 3 1
      crossborder/anhui/crawl_gov_anhui_full.py
  2. 3 1
      crossborder/anhui/gov_commodity_anhui_city.py
  3. 3 1
      crossborder/anhui/gov_commodity_anhui_country.py
  4. 3 1
      crossborder/anhui/gov_commodity_anhui_import_export.py
  5. 3 1
      crossborder/cli.py
  6. 4 1
      crossborder/fujian/fujian_parse_excel.py
  7. 10 2
      crossborder/fujian/selenium_fujian_download.py
  8. 4 1
      crossborder/guangdong/guangdong_gongbei_parse_excel.py
  9. 3 1
      crossborder/guangdong/guangdong_sub_customs_parse_excel.py
  10. 3 1
      crossborder/guangdong/selenium_guangdong_city.py
  11. 9 2
      crossborder/guangdong/selenium_guangdong_download.py
  12. 3 1
      crossborder/hebei/crawl_gov_hebei_full.py
  13. 3 1
      crossborder/hebei/gov_commodity_hebei_city.py
  14. 3 1
      crossborder/hebei/gov_commodity_hebei_country.py
  15. 3 1
      crossborder/hebei/gov_commodity_hebei_import_export.py
  16. 11 2
      crossborder/henan/selenium_henan_download.py
  17. 3 1
      crossborder/jiangsu/crawl_gov_jiangsu_full.py
  18. 3 1
      crossborder/jiangsu/gov_commodity_jiangsu_city.py
  19. 3 1
      crossborder/jiangsu/gov_commodity_jiangsu_country.py
  20. 3 1
      crossborder/jiangsu/gov_commodity_jiangsu_import_export.py
  21. 3 1
      crossborder/quanguo/data_cleaning_to_db.py
  22. 3 1
      crossborder/quanguo/parse_country_table_excel.py
  23. 3 1
      crossborder/quanguo/parse_month_excel.py
  24. 3 1
      crossborder/quanguo/parse_region_table_excel.py
  25. 3 1
      crossborder/quanguo/parse_year_excel.py
  26. 10 1
      crossborder/quanguo/selenium_download.py
  27. 10 1
      crossborder/shandong/selenium_shandong_download.py
  28. 3 1
      crossborder/shandong/shandong_parse_excel.py
  29. 3 1
      crossborder/utils/base_country_code.py
  30. 3 1
      crossborder/utils/base_mysql.py
  31. 3 1
      crossborder/utils/crawl_gov_commodity.py
  32. 3 1
      crossborder/utils/db_helper.py
  33. 3 1
      crossborder/utils/download_utils.py
  34. 111 70
      crossborder/utils/log.py
  35. 3 1
      crossborder/utils/parse_utils.py
  36. 3 1
      crossborder/zhejiang/crawl_gov_zhejiang_full.py
  37. 3 1
      crossborder/zhejiang/gov_commodity_zhejiang_city.py
  38. 3 1
      crossborder/zhejiang/gov_commodity_zhejiang_country.py
  39. 3 1
      crossborder/zhejiang/gov_commodity_zhejiang_import_export.py

+ 3 - 1
crossborder/anhui/crawl_gov_anhui_full.py

@@ -18,7 +18,9 @@ from crossborder.anhui import gov_commodity_anhui_country
 from crossborder.anhui import gov_commodity_anhui_import_export
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.dingtalk import send_dingtalk_message
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 def configure_stealth_options():
     """增强型反检测配置[1,4](@ref)"""

+ 3 - 1
crossborder/anhui/gov_commodity_anhui_city.py

@@ -5,7 +5,9 @@ import pandas as pd
 from crossborder.anhui import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 city_code_map = {
     "安徽省合肥市": "340100",

+ 3 - 1
crossborder/anhui/gov_commodity_anhui_country.py

@@ -5,7 +5,9 @@ import pandas as pd
 from crossborder.anhui import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 # 排除地区名单
 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",

+ 3 - 1
crossborder/anhui/gov_commodity_anhui_import_export.py

@@ -7,7 +7,9 @@ from crossborder.anhui import download_dir
 from crossborder.utils import base_country_code, base_mysql
 
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 CUSTOM_COMMODITY_REPLACEMENTS = {
     '家具': '家具及其零件',

+ 3 - 1
crossborder/cli.py

@@ -2,7 +2,9 @@ import argparse
 from importlib import import_module
 import sys
 
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 PROVINCE_MODULES = {
     "shandong": "crossborder.shandong.selenium_shandong_download",

+ 4 - 1
crossborder/fujian/fujian_parse_excel.py

@@ -6,6 +6,9 @@ import pandas as pd
 from crossborder.utils.db_helper import DBHelper
 from crossborder.utils.constants import DOWNLOAD_DIR
 from crossborder.utils.parse_utils import convert_wan_to_yuan, extract_year_month_from_path, traverse_and_process
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 FUJIAN_CITY = {
 "福州市": "350100",
@@ -300,7 +303,7 @@ def calculate_january_values(df):
 
 if __name__ == "__main__":
     traverse_and_process(download_dir, parse_excel, province_name="fujian")
-    print("更新同比数据……")
+    log.info("更新同比数据……")
     db_helper = DBHelper()
     db_helper.update_prov_yoy("福建省")
     # parse_excel(download_dir/"2023"/"02")

+ 10 - 2
crossborder/fujian/selenium_fujian_download.py

@@ -12,8 +12,11 @@ from selenium.webdriver.support.ui import WebDriverWait
 from crossborder.fujian.fujian_parse_excel import parse_excel
 from crossborder.utils.constants import DOWNLOAD_DIR
 from crossborder.utils.db_helper import DBHelper
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import traverse_and_process
 
 # 基础配置
@@ -163,10 +166,12 @@ def handle_retry(driver):
 
 def main():
     """主入口(优化参数处理逻辑)"""
+    global target_months
     parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
     parser.add_argument('--year', type=int, default=None,
                         help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
+    start_time = time.time()
     driver = webdriver.Firefox(options=configure_stealth_options(download_dir))
     try:
         # 智能检测最新有效月份
@@ -199,7 +204,10 @@ def main():
         log.info("\n福建省地级市数据同比更新中...")
         db_helper = DBHelper()
         db_helper.update_prov_yoy("福建省")
-
+        duration = time.time() - start_time
+        minutes, seconds = divmod(duration, 60)  # 转换为分钟和秒
+        message = f'【福建海关】{len(target_months)}个月份数据已采集完毕,总耗时:{int(minutes)}分{seconds:.1f}秒'
+        send_dingtalk_message(message)
 
 if __name__ == "__main__":
     main()

+ 4 - 1
crossborder/guangdong/guangdong_gongbei_parse_excel.py

@@ -8,8 +8,11 @@ from selenium.webdriver.support.ui import WebDriverWait
 from crossborder.utils.db_helper import DBHelper
 
 from crossborder.utils.constants import GUANGDONG_CITY
-from crossborder.utils.log import log
 from crossborder.utils.parse_utils import parse_value
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
+
 
 PROV_CODE = "440000"
 PROV_NAME = "广东省"

+ 3 - 1
crossborder/guangdong/guangdong_sub_customs_parse_excel.py

@@ -6,7 +6,9 @@ import pandas as pd
 
 from crossborder.utils.db_helper import DBHelper
 from crossborder.utils.constants import DOWNLOAD_DIR, GUANGDONG_CITY
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import traverse_and_process, extract_year_month_from_path, get_previous_month_dir
 
 # 配置日志

+ 3 - 1
crossborder/guangdong/selenium_guangdong_city.py

@@ -18,7 +18,9 @@ from crossborder.utils.constants import DOWNLOAD_DIR
 from crossborder.utils.constants import GUANGDONG_CUSTOMS_URL
 from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel, download_excel2, \
     batch_download_excel
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import traverse_and_process
 
 download_dir = DOWNLOAD_DIR / "guangdong"

+ 9 - 2
crossborder/guangdong/selenium_guangdong_download.py

@@ -14,8 +14,11 @@ from selenium.webdriver.support.ui import WebDriverWait
 
 from crossborder.utils.db_helper import DBHelper
 from crossborder.utils.constants import DOWNLOAD_DIR, COUNTRY_CODE_MAPPING
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import clean_county_name, convert_wan_to_yuan, clean_commodity_name
 
 
@@ -415,6 +418,7 @@ def main():
     parser.add_argument('--year', type=int, default=None,
                         help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
+    start_time = time.time()
     driver = webdriver.Firefox(options=configure_stealth_options(download_dir))
     try:
         # 智能检测最新有效月份
@@ -437,7 +441,10 @@ def main():
         log.info(f"【广东海关】目标采集月份序列:{target_months}")
         reverse_crawler(driver, target_months)
         log.info(f"【广东海关】{len(target_months)}个月份数据已采集完毕")
-
+        duration = time.time() - start_time
+        minutes, seconds = divmod(duration, 60)  # 转换为分钟和秒
+        message = f'【广东海关】{len(target_months)}个月份数据已采集完毕,总耗时:{int(minutes)}分{seconds:.1f}秒'
+        send_dingtalk_message(message)
     finally:
         driver.quit()
 

+ 3 - 1
crossborder/hebei/crawl_gov_hebei_full.py

@@ -19,7 +19,9 @@ from crossborder.hebei import gov_commodity_hebei_country
 from crossborder.hebei import gov_commodity_hebei_import_export
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.dingtalk import send_dingtalk_message
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 
 def get_current_target_titles():

+ 3 - 1
crossborder/hebei/gov_commodity_hebei_city.py

@@ -6,7 +6,9 @@ import pandas as pd
 from crossborder.hebei import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 city_code_map = {
     "石家庄市": "130100",

+ 3 - 1
crossborder/hebei/gov_commodity_hebei_country.py

@@ -6,7 +6,9 @@ import pandas as pd
 from crossborder.hebei import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",
                    "东南亚国家联盟", "欧洲联盟", "亚太经济合作组织",

+ 3 - 1
crossborder/hebei/gov_commodity_hebei_import_export.py

@@ -2,7 +2,9 @@ from pathlib import Path
 
 import pandas as pd
 import re
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 from crossborder.hebei import download_dir
 from crossborder.utils import base_country_code, base_mysql

+ 11 - 2
crossborder/henan/selenium_henan_download.py

@@ -14,8 +14,11 @@ from selenium.webdriver.support.ui import WebDriverWait
 
 from crossborder.henan.henan_parse_excel import parse_excel
 from crossborder.utils.constants import DOWNLOAD_DIR
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.download_utils import configure_stealth_options, get_previous_month, download_excel, generate_month_sequence
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import traverse_and_process
 
 # 基础配置
@@ -197,10 +200,12 @@ def handle_retry(driver):
 
 def main():
     """主入口(优化参数处理逻辑)"""
+    global target_months
     parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
     parser.add_argument('--year', type=int, default=None,
                         help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
+    start_time = time.time()
     driver = webdriver.Firefox(options=configure_stealth_options(download_dir))
     try:
         # 智能检测最新有效月份
@@ -224,11 +229,15 @@ def main():
         reverse_crawler(driver, target_months)
         log.info(f"{len(target_months)}个月份数据已采集完毕")
 
+
     finally:
         driver.quit()
         log.info("\n数据清洗入库中...")
         traverse_and_process(download_dir, parse_excel, province_name="henan", year=args.year)
-
+        duration = time.time() - start_time
+        minutes, seconds = divmod(duration, 60)  # 转换为分钟和秒
+        message = f'【河南海关】{len(target_months)}个月份数据已采集完毕,总耗时:{int(minutes)}分{seconds:.1f}秒'
+        send_dingtalk_message(message)
 
 if __name__ == "__main__":
     main()

+ 3 - 1
crossborder/jiangsu/crawl_gov_jiangsu_full.py

@@ -24,7 +24,9 @@ from crossborder.jiangsu import gov_commodity_jiangsu_import_export
 
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.dingtalk import send_dingtalk_message
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 # rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe"
 rarfile.UNRAR_EXECUTABLE = "unrar"

+ 3 - 1
crossborder/jiangsu/gov_commodity_jiangsu_city.py

@@ -6,7 +6,9 @@ import pandas as pd
 from crossborder.jiangsu import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 city_code_map = {
     "南京市": "320100",

+ 3 - 1
crossborder/jiangsu/gov_commodity_jiangsu_country.py

@@ -5,7 +5,9 @@ import pandas as pd
 from crossborder.jiangsu import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 # 排除地区名单
 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",

+ 3 - 1
crossborder/jiangsu/gov_commodity_jiangsu_import_export.py

@@ -5,7 +5,9 @@ import pandas as pd
 
 from crossborder.jiangsu import download_dir
 from crossborder.utils import base_country_code, base_mysql
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 YEAR_PATTERN = re.compile(r"^\d{4}$")
 MONTH_PATTERN = re.compile(r"^(0[1-9]|1[0-2])$")

+ 3 - 1
crossborder/quanguo/data_cleaning_to_db.py

@@ -13,7 +13,9 @@ from crossborder.quanguo.parse_region_table_excel import parse_region_table_exce
 from crossborder.quanguo.parse_year_excel import parse_year_table_excel
 from crossborder.utils.base_mysql import provinces
 from crossborder.utils.constants import DOWNLOAD_DIR
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 
 def perform_data_cleanup_and_import(current_year):

+ 3 - 1
crossborder/quanguo/parse_country_table_excel.py

@@ -9,7 +9,9 @@ from pymysql import Error
 
 from crossborder.utils.constants import COUNTRY_CODE_MAPPING, EXCLUDE_REGIONS
 from crossborder.utils.db_helper import DBHelper
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import parse_value, parse_ratio, extract_year_month_from_path
 
 

+ 3 - 1
crossborder/quanguo/parse_month_excel.py

@@ -4,7 +4,9 @@ from crossborder.utils.db_helper import DBHelper
 import xlrd
 import re
 
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import convert_unit, parse_ratio
 
 CURRENT_YEAR = str(datetime.now().year)

+ 3 - 1
crossborder/quanguo/parse_region_table_excel.py

@@ -4,7 +4,9 @@ import xlrd
 
 from crossborder.utils.constants import REGION_MAPPING
 from crossborder.utils.db_helper import DBHelper
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import extract_year_month_from_path, parse_value
 
 

+ 3 - 1
crossborder/quanguo/parse_year_excel.py

@@ -1,7 +1,9 @@
 from datetime import datetime
 import xlrd
 from crossborder.utils.db_helper import DBHelper
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import convert_unit, parse_ratio
 
 _parse_executed = False  # 模块级变量,控制执行次数

+ 10 - 1
crossborder/quanguo/selenium_download.py

@@ -13,8 +13,11 @@ from selenium.webdriver.support.ui import WebDriverWait
 
 from crossborder.quanguo.data_cleaning_to_db import perform_data_cleanup_and_import
 from crossborder.utils.constants import DOWNLOAD_DIR
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.download_utils import configure_stealth_options, download_excel
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 base_url = "http://www.customs.gov.cn/customs/302249/zfxxgk/2799825/302274/302277/6348926/index.html"
 download_dir = DOWNLOAD_DIR / "total"
@@ -186,6 +189,8 @@ if __name__ == "__main__":
     parser.add_argument("--year", type=int, help="起始年份,例如:--year 2023")
     args = parser.parse_args()
 
+    start_time = time.time()
+
     current_year = datetime.now().year
     start_year = args.year if args.year else current_year
     years_to_crawl = list(range(start_year, current_year + 1))
@@ -208,3 +213,7 @@ if __name__ == "__main__":
         log.info("\n数据清洗入库中...")
         perform_data_cleanup_and_import(current_year)
         log.info("\n数据清洗入库完毕...")
+        duration = time.time() - start_time
+        minutes, seconds = divmod(duration, 60)  # 转换为分钟和秒
+        message = f'【海关总署】{start_year}年-{current_year}年数据已采集完毕,总耗时:{int(minutes)}分{seconds:.1f}秒'
+        send_dingtalk_message(message)

+ 10 - 1
crossborder/shandong/selenium_shandong_download.py

@@ -12,8 +12,11 @@ from selenium.webdriver.support.ui import WebDriverWait
 from crossborder.utils.db_helper import DBHelper
 from crossborder.shandong.shandong_parse_excel import parse_excel
 from crossborder.utils.constants import DOWNLOAD_DIR
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.download_utils import configure_stealth_options, generate_month_sequence, download_excel
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import traverse_and_process
 
 # 基础配置
@@ -161,10 +164,12 @@ def handle_retry(driver):
 
 def main():
     """主入口(优化参数处理逻辑)"""
+    global target_months
     parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
     parser.add_argument('--year', type=int, default=None,
                         help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
+    start_time = time.time()
     log.info("【山东海关】数据抓取开始".center(66, "*"))
     driver = webdriver.Firefox(options=configure_stealth_options(download_dir))
     try:
@@ -196,6 +201,10 @@ def main():
         log.info("\n山东省地级市数据同比更新中...")
         db_helper = DBHelper()
         db_helper.update_prov_yoy("山东省")
+        duration = time.time() - start_time
+        minutes, seconds = divmod(duration, 60)  # 转换为分钟和秒
+        message = f'【山东海关】{len(target_months)}个月份数据已采集完毕,总耗时:{int(minutes)}分{seconds:.1f}秒'
+        send_dingtalk_message(message)
 
 
 if __name__ == "__main__":

+ 3 - 1
crossborder/shandong/shandong_parse_excel.py

@@ -7,7 +7,9 @@ import pandas as pd
 
 from crossborder.utils.db_helper import DBHelper
 from crossborder.utils.constants import DOWNLOAD_DIR, COUNTRY_CODE_MAPPING
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, \
     extract_year_month_from_path, get_previous_month_dir, find_unmatched_countries, traverse_and_process
 

+ 3 - 1
crossborder/utils/base_country_code.py

@@ -4,7 +4,9 @@ from pathlib import Path
 
 import pandas as pd
 
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 YEAR_PATTERN = re.compile(r"^\d{4}$")
 MONTH_PATTERN = re.compile(r"^(0[1-9]|1[0-2])$")

+ 3 - 1
crossborder/utils/base_mysql.py

@@ -4,7 +4,9 @@ import pymysql
 from sqlalchemy import text, create_engine
 
 from crossborder.utils.crypto_utils import AESCryptor
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 provinces = [
     "北京市", "天津市", "上海市", "重庆市",

+ 3 - 1
crossborder/utils/crawl_gov_commodity.py

@@ -1,7 +1,9 @@
 import pandas as pd
 
 from crossborder.utils import base_mysql
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 
 def generate_sql_from_excel(excel_file):

+ 3 - 1
crossborder/utils/db_helper.py

@@ -2,7 +2,9 @@ import pandas as pd
 from sqlalchemy import create_engine, text
 from crossborder.utils.crypto_utils import AESCryptor
 
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 DB_CONFIG = {
     'host': '10.130.75.149',

+ 3 - 1
crossborder/utils/download_utils.py

@@ -10,7 +10,9 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
 
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 DOWNLOAD_TIMEOUT = 60
 

+ 111 - 70
crossborder/utils/log.py

@@ -4,77 +4,118 @@ import sys
 from pathlib import Path
 import colorlog
 from logging.handlers import TimedRotatingFileHandler
-import datetime
 
-# 创建日志记录器
-log = logging.getLogger(__name__)
-log.setLevel(logging.INFO)
 
-# 获取项目根目录
 project_root = Path(os.path.abspath(os.path.dirname(__file__))).parent.parent
 
-# 日志目录设置
-if sys.platform.startswith('linux'):
-    log_dir = Path('/home/crossborder/logs')
-else:
-    log_dir = project_root / 'logs'
-
-os.makedirs(log_dir, exist_ok=True)  # 自动创建目录(如果不存在)
-
-# 创建带日期归档的文件处理器 - 关键修改点
-log_file = log_dir / 'cross.log'
-file_handler = TimedRotatingFileHandler(
-    filename=str(log_file),
-    when='midnight',  # 每天午夜切换新日志
-    interval=1,       # 每天的间隔
-    backupCount=30,    # 保留30天的日志
-    encoding='utf-8',
-    utc=False         # 使用本地时间
-)
-
-# 设置时间后缀格式 - 在日志文件名后添加日期
-file_handler.suffix = "%Y-%m-%d"
-file_handler.setLevel(logging.INFO)
-
-# 创建控制台处理器(带颜色)
-console_handler = colorlog.StreamHandler()
-console_handler.setLevel(logging.DEBUG if os.getenv('DEBUG') else logging.INFO)
-
-# 文件格式器
-plain_formatter = logging.Formatter(
-    '%(asctime)s - %(name)s:%(lineno)d - %(levelname)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
-)
-
-# 控制台颜色格式器
-color_formatter = colorlog.ColoredFormatter(
-    '%(log_color)s%(asctime)s - %(name)s:%(lineno)d - %(levelname)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S',
-    log_colors={
-        'DEBUG': 'cyan',
-        'INFO': 'green',
-        'WARNING': 'yellow',
-        'ERROR': 'red',
-        'CRITICAL': 'red,bg_white',
-    }
-)
-
-# 应用格式器
-file_handler.setFormatter(plain_formatter)
-console_handler.setFormatter(color_formatter)
-
-# 添加处理器到记录器
-log.addHandler(file_handler)
-log.addHandler(console_handler)
-
-# 禁用不必要库的日志
-logging.getLogger("urllib3").setLevel(logging.WARNING)
-logging.getLogger("selenium").setLevel(logging.WARNING)
-
-# 测试日志
-if __name__ == "__main__":
-    log.debug("DEBUG级别日志 - 通常不会显示")
-    log.info("INFO级别日志 - 程序运行信息")
-    log.warning("WARNING级别日志 - 需要注意的问题")
-    log.error("ERROR级别日志 - 错误信息")
-    log.critical("CRITICAL级别日志 - 严重错误")
+def configure_logging():
+    """配置全局日志系统(只执行一次)"""
+    # 移除之前直接创建log实例的代码
+
+    # 创建根记录器并配置
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.DEBUG)
+
+    # 清除可能存在的旧处理器
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+
+
+    # 日志目录设置
+    if sys.platform.startswith('linux'):
+        log_dir = Path('/home/crossborder/logs')
+    else:
+        log_dir = project_root / 'logs'
+
+    os.makedirs(log_dir, exist_ok=True)
+
+    # 创建带日期归档的文件处理器
+    log_file = log_dir / 'cross.log'
+    file_handler = TimedRotatingFileHandler(
+        filename=str(log_file),
+        when='midnight',
+        interval=1,
+        backupCount=30,
+        encoding='utf-8',
+        utc=False
+    )
+    file_handler.suffix = "%Y-%m-%d"
+    file_handler.setLevel(logging.INFO)
+
+    # 创建控制台处理器(带颜色)
+    console_handler = colorlog.StreamHandler()
+    console_handler.setLevel(logging.DEBUG if os.getenv('DEBUG') else logging.INFO)
+
+    # 文件格式器
+    plain_formatter = logging.Formatter(
+        '%(asctime)s - %(name)s:%(lineno)d - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+
+    # 控制台颜色格式器
+    color_formatter = colorlog.ColoredFormatter(
+        '%(log_color)s%(asctime)s - %(name)s:%(lineno)d - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S',
+        log_colors={
+            'DEBUG': 'cyan',
+            'INFO': 'green',
+            'WARNING': 'yellow',
+            'ERROR': 'red',
+            'CRITICAL': 'red,bg_white',
+        }
+    )
+
+    file_handler.setFormatter(plain_formatter)
+    console_handler.setFormatter(color_formatter)
+
+    # 添加处理器到根记录器
+    root_logger.addHandler(file_handler)
+    root_logger.addHandler(console_handler)
+
+    # 禁用不必要库的日志
+    logging.getLogger("urllib3").setLevel(logging.WARNING)
+    logging.getLogger("selenium").setLevel(logging.WARNING)
+
+    # 项目顶级记录器(用于统一控制项目日志级别)
+    project_logger = logging.getLogger("crossborder")
+    project_logger.setLevel(logging.INFO)
+
+
+def get_logger(name=None):
+    if not name:
+        name = "crossborder"
+
+    # 处理主模块
+    if name == "__main__":
+        main_module = sys.modules['__main__']
+        # 获取入口文件的绝对路径
+        if hasattr(main_module, '__file__'):
+            main_file = Path(main_module.__file__).resolve()
+            try:
+                # 计算相对于项目根目录的路径
+                relative_path = main_file.relative_to(project_root)
+                # 转换为模块名:去掉扩展名,分隔符替换为点
+                name = str(relative_path.with_suffix('')).replace(os.sep, '.')
+            except ValueError:
+                # 如果入口文件不在项目根目录下,则使用文件名(不含扩展名)
+                name = main_file.stem
+        else:
+            # 例如交互式环境,没有__file__,则保留为"__main__"
+            pass
+
+    # 确保日志系统已配置
+    if not logging.getLogger().hasHandlers():
+        configure_logging()
+
+    # 添加项目顶级命名空间(如果还没有),除非已经是crossborder开头
+    if not name.startswith("crossborder."):
+        name = "crossborder." + name
+
+    return logging.getLogger(name)
+
+
+# 在模块导入时自动配置(可选)
+configure_logging()
+
+# ====== 注意:不再直接导出log实例 ======
+# 业务模块应使用 get_logger(__name__) 获取记录器

+ 3 - 1
crossborder/utils/parse_utils.py

@@ -3,7 +3,9 @@ from datetime import datetime
 from decimal import Decimal, InvalidOperation
 from pathlib import Path
 
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 YEAR_PATTERN = re.compile(r"^\d{4}$")
 MONTH_PATTERN = re.compile(r"^(0[1-9]|1[0-2])$")

+ 3 - 1
crossborder/zhejiang/crawl_gov_zhejiang_full.py

@@ -22,7 +22,9 @@ from crossborder.zhejiang import gov_commodity_zhejiang_city
 from crossborder.zhejiang import gov_commodity_zhejiang_country
 from crossborder.zhejiang import gov_commodity_zhejiang_import_export
 from crossborder.utils import base_country_code, base_mysql
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 def configure_stealth_options():
     """增强型反检测配置[1,4](@ref)"""

+ 3 - 1
crossborder/zhejiang/gov_commodity_zhejiang_city.py

@@ -6,7 +6,9 @@ import pandas as pd
 from crossborder.zhejiang import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 city_code_map = {
     "杭州地区": "330100",

+ 3 - 1
crossborder/zhejiang/gov_commodity_zhejiang_country.py

@@ -5,7 +5,9 @@ import pandas as pd
 from crossborder.zhejiang import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 # 排除地区名单
 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",

+ 3 - 1
crossborder/zhejiang/gov_commodity_zhejiang_import_export.py

@@ -6,7 +6,9 @@ import pandas as pd
 from crossborder.zhejiang import download_dir
 from crossborder.utils import base_country_code, base_mysql
 from crossborder.utils.base_country_code import format_sql_value
-from crossborder.utils.log import log
+from crossborder.utils.log import  get_logger
+
+log = get_logger(__name__)
 
 CUSTOM_COMMODITY_REPLACEMENTS = {
     '稻谷及大米': '稻谷、大米及大米粉',