Procházet zdrojové kódy

海关总署最新月份清洗脚本修改

01495251 před 11 hodinami
rodič
revize
5649ba8991

+ 9 - 6
crossborder/quanguo/data_cleaning_to_db.py

@@ -8,6 +8,7 @@ from crossborder.quanguo.parse_country_table_excel import parse_country_table_ex
 from crossborder.quanguo.parse_month_excel import parse_month_table_excel
 from crossborder.quanguo.parse_region_table_excel import parse_region_table_excel
 from crossborder.quanguo.parse_year_excel import parse_year_table_excel
+from crossborder.utils.constants import DOWNLOAD_DIR
 from crossborder.utils.log import log
 
 
@@ -76,18 +77,20 @@ def perform_data_cleanup_and_import(current_year):
                 parse_region_table_excel(full_path)
 
             elif '(15)' in file:
-                log.info(f"处理对部分国家(地区)出口类章金额表: {file}")
-                parse_commodity_country_detail(file, "export")
+                log.info(f"处理对部分国家(地区)出口类章金额表: {full_path}")
+                parse_commodity_country_detail(full_path, "export")
 
             elif '(16)' in file:
-                log.info(f"处理自部分国家(地区)进口类章金额表: {file}")
-                parse_commodity_country_detail(file, "import")
+                log.info(f"处理自部分国家(地区)进口类章金额表: {full_path}")
+                parse_commodity_country_detail(full_path, "import")
 
             else:
-                log.warning(f"未知类型文件,跳过: {file}")
+                log.warning(f"未知类型文件,跳过: {full_path}")
 
         log.info("数据清洗与入库完成!")
 
     except Exception as e:
         log.error(f"数据清洗失败: {str(e)}")
-        raise
+        raise
+if __name__ == "__main__":
+    perform_data_cleanup_and_import(2025)

+ 1 - 1
crossborder/quanguo/parse_commodity_country_detail_excel.py

@@ -70,7 +70,7 @@ def parse_commodity_country_detail(file_path, trade_type):
                        month_amount = \
                    VALUES (month_amount), cumulative_amount = \
                    VALUES (cumulative_amount) \
-                   create_time = now()
+                   ,create_time = now()
                    """
 
     # 遍历数据行(从第7行开始)

+ 18 - 18
crossborder/quanguo/parse_commodity_table_excel.py

@@ -36,6 +36,24 @@ def parse_commodity_table_excel(file_path):
     params = []
     current_class = None
 
+    # SQL模板使用命名占位符
+    sql_template = """
+        INSERT INTO `t_yujin_crossborder_commodity_trade`
+        (`year_month`, `hs_code`, `monthly_export`, `ytd_export`,
+         `monthly_import`, `ytd_import`, `ytd_yoy_export`, `ytd_yoy_import`)
+        VALUES 
+        (:year_month, :hs_code, :monthly_export, :ytd_export, 
+         :monthly_import, :ytd_import, :ytd_yoy_export, :ytd_yoy_import)
+        ON DUPLICATE KEY UPDATE
+            monthly_export = VALUES(monthly_export),
+            ytd_export = VALUES(ytd_export),
+            monthly_import = VALUES(monthly_import),
+            ytd_import = VALUES(ytd_import),
+            ytd_yoy_export = VALUES(ytd_yoy_export),
+            ytd_yoy_import = VALUES(ytd_yoy_import),
+            create_time = NOW()   -- 这里改为直接使用NOW()
+    """
+
     # 遍历数据行(从第7行开始)
     for row_idx in range(6, sheet.nrows):
         try:
@@ -51,24 +69,6 @@ def parse_commodity_table_excel(file_path):
 
             identifier = parts[0]
 
-            # SQL模板使用命名占位符
-            sql_template = """
-                INSERT INTO `t_yujin_crossborder_commodity_trade`
-                (`year_month`, `hs_code`, `monthly_export`, `ytd_export`,
-                 `monthly_import`, `ytd_import`, `ytd_yoy_export`, `ytd_yoy_import`)
-                VALUES 
-                (:year_month, :hs_code, :monthly_export, :ytd_export, 
-                 :monthly_import, :ytd_import, :ytd_yoy_export, :ytd_yoy_import)
-                ON DUPLICATE KEY UPDATE
-                    monthly_export = VALUES(monthly_export),
-                    ytd_export = VALUES(ytd_export),
-                    monthly_import = VALUES(monthly_import),
-                    ytd_import = VALUES(ytd_import),
-                    ytd_yoy_export = VALUES(ytd_yoy_export),
-                    ytd_yoy_import = VALUES(ytd_yoy_import),
-                    create_time = now()
-            """
-
             # 解析章数据
             if re.match(r'^\d+章$', identifier):
                 if current_class:

+ 1 - 0
crossborder/quanguo/parse_region_table_excel.py

@@ -43,6 +43,7 @@ def parse_region_table_excel(file_path):
                    VALUES (ytd_total), ytd_import = \
                    VALUES (ytd_import), ytd_export = \
                    VALUES (ytd_export) \
+                   ,create_time = NOW()
                    """
 
     # 从第7行开始读取(索引6)

+ 35 - 38
crossborder/quanguo/parse_year_excel.py

@@ -1,35 +1,33 @@
-from decimal import Decimal, InvalidOperation
-
-import xlrd
-import pymysql
 from datetime import datetime
-
+import xlrd
 from crossborder.utils.db_helper import DBHelper
 from crossborder.utils.parse_utils import convert_unit, parse_ratio
 
 
-
-
 def get_upsert_sql():
-    """生成带更新条件的SQL语句"""
+    """使用命名占位符并正确使用VALUES函数的SQL"""
     return """
-    INSERT INTO t_yujin_crossborder_yearly_summary 
-    (year, year_total, year_import, year_export, trade_balance, 
-     yoy_import_export, yoy_import, yoy_export, create_time)
-    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
-    ON DUPLICATE KEY UPDATE
-        year_total = VALUES(year_total),
-        year_import = VALUES(year_import),
-        year_export = VALUES(year_export),
-        trade_balance = VALUES(trade_balance),
-        yoy_import_export = VALUES(yoy_import_export),
-        yoy_import = VALUES(yoy_import),
-        yoy_export = VALUES(yoy_export),
-        create_time = VALUES(create_time)
-    """
+           INSERT INTO t_yujin_crossborder_yearly_summary
+           (year, year_total, year_import, year_export, trade_balance,
+            yoy_import_export, yoy_import, yoy_export, create_time)
+           VALUES (:year, :year_total, :year_import, :year_export, :trade_balance, \
+                   :yoy_import_export, :yoy_import, :yoy_export, :create_time) ON DUPLICATE KEY \
+           UPDATE \
+               year_total = \
+           VALUES (year_total), year_import = \
+           VALUES (year_import), year_export = \
+           VALUES (year_export), trade_balance = \
+           VALUES (trade_balance), yoy_import_export = \
+           VALUES (yoy_import_export), yoy_import = \
+           VALUES (yoy_import), yoy_export = \
+           VALUES (yoy_export), create_time = \
+           VALUES (create_time) \
+           """
+
 
 def parse_year_table_excel(file):
     db_helper = DBHelper()
+    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
 
     # 读取Excel文件
     try:
@@ -40,7 +38,6 @@ def parse_year_table_excel(file):
         return
 
     sql = get_upsert_sql()
-    current_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
     params_list = []
 
     for row_idx in range(5, sheet.nrows):
@@ -48,20 +45,19 @@ def parse_year_table_excel(file):
         if not row[1]:  # 跳过空年份
             continue
 
-        # 准备数据
-        params = (
-            row[1],  # year
-            convert_unit(row[2]),  # year_total
-            convert_unit(row[4]),  # year_import
-            convert_unit(row[3]),  # year_export
-            convert_unit(row[5]),  # trade_balance
-            parse_ratio(row[6]),   # yoy_import_export
-            parse_ratio(row[7]),   # yoy_import
-            parse_ratio(row[8]),   # yoy_export
-            current_time           # create_time
-        )
-
-        params_list.append(params)
+        # 准备数据 - 使用字典
+        param_dict = {
+            "year": row[1],
+            "year_total": convert_unit(row[2]),
+            "year_import": convert_unit(row[4]),
+            "year_export": convert_unit(row[3]),
+            "trade_balance": convert_unit(row[5]),
+            "yoy_import_export": parse_ratio(row[6]),
+            "yoy_import": parse_ratio(row[7]),
+            "yoy_export": parse_ratio(row[8]),
+            "create_time": current_time
+        }
+        params_list.append(param_dict)
 
     # 使用 DBHelper 执行 SQL 插入
     try:
@@ -69,7 +65,8 @@ def parse_year_table_excel(file):
         print(f"成功处理 {len(params_list)} 条数据,受影响行数:{affected_rows}")
     except Exception as e:
         print(f"数据库操作失败: {e}")
+        raise
 
 
 if __name__ == "__main__":
-    parse_year_table_excel('../src/downloads/20250513/(1)2025年进出口商品总值表_A年度表_3月.xls')
+    parse_year_table_excel(r'D:\pythonSpace\crossborder\downloads\total\2025\04\(1)2025年进出口商品总值表 A-年度表.xls')

+ 8 - 2
crossborder/utils/parse_utils.py

@@ -31,8 +31,14 @@ def convert_unit(value):
         return None
 
 def parse_ratio(value):
-    """处理百分比数据"""
-    return value if value not in ['-', ''] else None
+    """处理百分比数据,非空时返回 Decimal 类型"""
+    if value in ('-', '', None):
+        return None
+    try:
+        return Decimal(str(value).strip('%').replace(',', ''))
+    except (InvalidOperation, ValueError):
+        # 如果转换失败,也返回 None
+        return None
 
 def clean_commodity_name(name):
     """清洗商品名称中的特殊字符和括号注释,并替换英文括号为中文括号"""