Quellcode durchsuchen

1.uat数据库更新
2.同比抓取数据为0是情况处理

01495251 vor 1 Monat
Ursprung
Commit
3e09ab931a

+ 13 - 6
crossborder/henan/henan_parse_excel.py

@@ -5,12 +5,14 @@ from pathlib import Path
 import numpy as np
 import pandas as pd
 
-from crossborder.utils.db_helper import DBHelper
 from crossborder.utils.constants import COUNTRY_CODE_MAPPING, EXCLUDE_REGIONS, DOWNLOAD_DIR
-from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, \
-    find_unmatched_countries, \
+from crossborder.utils.db_helper import DBHelper
+from crossborder.utils.log import get_logger
+from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, find_unmatched_countries, \
     extract_year_month_from_path, traverse_and_process, parse_value
 
+log = get_logger(__name__)
+
 # 常量配置(新增路径正则校验)
 PROV_CODE = "410000"
 PROV_NAME = "河南省"
@@ -36,9 +38,9 @@ def parse_excel(current_dir):
         country_file = next(current_path.glob("*国别*"), None)
         process_country_trade(country_file, year, month)
 
-        print(f"{current_dir}数据已全部成功处理")
+        log.info(f"{current_dir}数据已全部成功处理")
     except Exception as e:
-        print(f"处理失败:{current_dir},错误:{str(e)}")
+        log.error(f"处理失败:{current_dir},错误:{str(e)}")
         raise
 
 
@@ -71,7 +73,12 @@ def process_combined_trade(current_dir, year, month):
     valid_data['prov_name'] = PROV_NAME
     #进出口总值计算
     valid_data['monthly_total'] = valid_data['monthly_import'].fillna(0) + valid_data['monthly_export'].fillna(0)
-    valid_data['monthly_total'] = valid_data['monthly_total'].replace(0, np.nan)
+    # valid_data['monthly_total'] = valid_data['monthly_total'].replace(0, np.nan)
+    # 仅当两个字段均为0时转为NaN,已发布的数据有为0的存在
+    valid_data['monthly_total'] = valid_data['monthly_total'].mask(
+        (valid_data['monthly_import'].isna() & valid_data['monthly_export'].isna()),
+        np.nan
+    )
 
     # 定义目标字段
     target_cols = [

+ 1 - 1
crossborder/quanguo/parse_month_excel.py

@@ -158,5 +158,5 @@ def parse_month_table_excel(file_path):
 
 
 if __name__ == "__main__":
-    file_path = r"D:\pythonSpace\crossborder\downloads\total\2025\03\(1)2025年进出口商品总值表 B-月度表.xls"
+    file_path = r"D:\pythonSpace\crossborder\downloads\total\2023\12\(1)2023年进出口商品总值表 B-月度表.xls"
     parse_month_table_excel(file_path)

+ 7 - 2
crossborder/shandong/selenium_shandong_download.py

@@ -164,12 +164,13 @@ def handle_retry(driver):
 
 def main():
     """主入口(优化山东海关数据采集逻辑)"""
+    valid_year = None
+    valid_month = None
     parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
     parser.add_argument('--year', type=int, default=None,
                         help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
     start_time = time.time()
-    target_months = []  # 初始化避免未定义
     data_collected = False  # 标记是否成功采集数据
 
     # 初始化日志标题
@@ -237,7 +238,11 @@ def main():
         if data_collected:
             duration = time.time() - start_time
             minutes, seconds = divmod(duration, 60)
-            message = (f"【山东海关】{len(target_months)}个月份数据采集完成"
+            if args.year:
+                time_range_str = f'{valid_year}年{valid_month}月'
+            else:
+                time_range_str = f'{args.year}年1月-{valid_year}年{valid_month}月'
+            message = (f"【山东海关】{time_range_str}数据采集完成"
                        f",总耗时:{int(minutes)}分{seconds:.1f}秒")
             send_dingtalk_message(message)
 

+ 5 - 2
crossborder/shandong/shandong_parse_excel.py

@@ -97,8 +97,11 @@ def process_combined_trade(current_dir, year, month, previous_dir=None):
     #当 monthly_import 和 monthly_export 中只有一个有值时,monthly_total 取不为空的那个值,
     # 而两者都有值时相加
     valid_data['monthly_total'] = valid_data['monthly_import'].fillna(0) + valid_data['monthly_export'].fillna(0)
-    valid_data['monthly_total'] = valid_data['monthly_total'].replace(0, np.nan)
-
+    # valid_data['monthly_total'] = valid_data['monthly_total'].replace(0, np.nan)
+    valid_data['monthly_total'] = valid_data['monthly_total'].mask(
+        (valid_data['monthly_import'].isna() & valid_data['monthly_export'].isna()),
+        np.nan
+    )
     valid_data = valid_data.replace({np.nan: None})
 
     # 入库逻辑保持不变

+ 8 - 9
crossborder/utils/base_mysql.py

@@ -24,11 +24,10 @@ cryptor = AESCryptor("uat_ff419620e7047a3c372e2513c5a2b9a5")
 
 # 数据库配置
 DB_CONFIG = {
-    'host': '10.130.75.149',
+    'host': '10.130.75.139',
     'port': 3307,
-    'user': 'yto_crm',
-    'password': 'ENC(Fl9g4899OmVYddM42Rt2fA==:sDy1QG/7bmx/iHo4xEOBGQ==)',
-    # 'password': '%3sFUlsolaRI',
+    'user': 'crm_uat',
+    'password': '&8%biuKNqDYZdXe3',
     'database': 'crm_uat',
     'charset': 'utf8mb4'
 }
@@ -49,14 +48,14 @@ def get_decrypted_password():
 def initialize_engine():
     """初始化数据库引擎(包含密码解密)"""
     db_config = DB_CONFIG.copy()
-    db_config['password'] = get_decrypted_password()
-
-    # 对密码进行 URL 编码
-    encoded_password = quote_plus(db_config["password"])
+    # db_config['password'] = get_decrypted_password()
+    #
+    # # 对密码进行 URL 编码
+    # encoded_password = quote_plus(db_config["password"])
 
     # 构建 SQLAlchemy 引擎
     return create_engine(
-        f"mysql+pymysql://{db_config['user']}:{encoded_password}@{db_config['host']}:{db_config['port']}/{db_config['database']}?charset={db_config['charset']}",
+        f"mysql+pymysql://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}?charset={db_config['charset']}",
         pool_size=5,
         max_overflow=10
     )

+ 4 - 4
crossborder/utils/db_helper.py

@@ -10,10 +10,10 @@ from crossborder.utils.log import  get_logger
 log = get_logger(__name__)
 
 DB_CONFIG = {
-    'host': '10.130.75.149',
+    'host': '10.130.75.139',
     'port': 3307,
-    'user': 'yto_crm',
-    'password': 'ENC(Fl9g4899OmVYddM42Rt2fA==:sDy1QG/7bmx/iHo4xEOBGQ==)',
+    'user': 'crm_uat',
+    'password': '&8%biuKNqDYZdXe3',
     'database': 'crm_uat',
     'charset': 'utf8mb4'
 }
@@ -34,7 +34,7 @@ def get_decrypted_password():
 class DBHelper:
     def __init__(self):
         db_config = DB_CONFIG.copy()
-        db_config['password'] = get_decrypted_password()
+        # db_config['password'] = get_decrypted_password()
         self.engine = create_engine(
             f'mysql+pymysql://{db_config["user"]}:{db_config["password"]}@{db_config["host"]}:{db_config["port"]}/{db_config["database"]}?charset={db_config["charset"]}',
             pool_size=5,