Przeglądaj źródła

广东省商品贸易数据1月份数据确实问题修复

01495251 1 miesiąc temu
rodzic
commit
422b2fd292

+ 6 - 5
crossborder/guangdong/selenium_guangdong_download.py

@@ -96,7 +96,7 @@ def process_month_data(driver, year, month):
 
                     # 根据表格类型处理数据
                     if table_type == 'country':
-                        data = parse_country_table(driver, year, month)
+                        data = parse_page_country_data(driver, year, month)
                         df_country = pd.DataFrame(data)
                         db.bulk_insert(
                             df_country,
@@ -109,7 +109,7 @@ def process_month_data(driver, year, month):
                         found_count += 1
                     else:
                         data_type = 'export' if table_type == 'export_commodity' else 'import'
-                        commodity_data[data_type] = parse_commodity_table(driver, data_type, year, month)
+                        commodity_data[data_type] = parse_page_commodity_data(driver, data_type, year, month)
                         found_count += 1
                 except Exception as e:
                     log.info(f"表格处理失败: {e}")
@@ -119,7 +119,7 @@ def process_month_data(driver, year, month):
 
 
 
-def parse_country_table(driver, year, month):
+def parse_page_country_data(driver, year, month):
     """解析目标页面的表格数据"""
     data = []
 
@@ -221,7 +221,7 @@ def parse_country_table(driver, year, month):
 
 
 
-def parse_commodity_table(driver, data_type, year, month):
+def parse_page_commodity_data(driver, data_type, year, month):
     """解析商品表通用函数"""
     data = []
     try:
@@ -310,7 +310,8 @@ def merge_commodity_data(import_data, export_data, year, month):
     merged_df['monthly_total'] = merged_df['monthly_total'].replace(0, np.nan)
 
     merged_df['crossborder_year'] = year
-    merged_df['crossborder_year_month'] = f"{year}-{month:02d}"
+    #不为空是填充传入年月,1.2月数据在上级已经构建好
+    merged_df['crossborder_year_month'] = merged_df['crossborder_year_month'].fillna(f"{year}-{month:02d}")
     merged_df['prov_code'] = PROV_CODE
     merged_df['prov_name'] = PROV_NAME
 

+ 27 - 27
crossborder/quanguo/data_cleaning_to_db.py

@@ -130,36 +130,36 @@ def main():
                     if not file.endswith(('.xls', '.xlsx')):
                         continue
 
-                    if '(1)' in file and '年度表' in file:
-                        log.info(f"处理年度汇总表: {file}")
-                        parse_year_table_excel(full_path)
-
-                    elif '(1)' in file and '月度表' in file:
-                        log.info(f"处理月度汇总表: {file}")
-                        parse_month_table_excel(full_path)
-
-                    elif '(2)' in file:
-                        log.info(f"处理国别(地区)贸易表: {file}")
-                        parse_country_table_excel(full_path)
-
-                    elif '(4)' in file:
-                        log.info(f"处理类章贸易表: {file}")
-                        parse_commodity_table_excel(full_path)
-
-                    elif '(8)' in file:
+                    # if '(1)' in file and '年度表' in file:
+                    #     log.info(f"处理年度汇总表: {file}")
+                    #     parse_year_table_excel(full_path)
+                    #
+                    # elif '(1)' in file and '月度表' in file:
+                    #     log.info(f"处理月度汇总表: {file}")
+                    #     parse_month_table_excel(full_path)
+                    #
+                    # elif '(2)' in file:
+                    #     log.info(f"处理国别(地区)贸易表: {file}")
+                    #     parse_country_table_excel(full_path)
+                    #
+                    # elif '(4)' in file:
+                    #     log.info(f"处理类章贸易表: {file}")
+                    #     parse_commodity_table_excel(full_path)
+
+                    if '(8)' in file:
                         log.info(f"处理收发货人所在地表: {file}")
                         parse_region_table_excel(full_path)
 
-                    elif '(15)' in file:
-                        log.info(f"处理对部分国家(地区)出口类章金额表: {full_path}")
-                        parse_commodity_country_detail(full_path, "export")
-
-                    elif '(16)' in file:
-                        log.info(f"处理自部分国家(地区)进口类章金额表: {full_path}")
-                        parse_commodity_country_detail(full_path, "import")
-
-                    else:
-                        log.warning(f"未知类型文件,跳过: {full_path}")
+                    # elif '(15)' in file:
+                    #     log.info(f"处理对部分国家(地区)出口类章金额表: {full_path}")
+                    #     parse_commodity_country_detail(full_path, "export")
+                    #
+                    # elif '(16)' in file:
+                    #     log.info(f"处理自部分国家(地区)进口类章金额表: {full_path}")
+                    #     parse_commodity_country_detail(full_path, "import")
+                    #
+                    # else:
+                    #     log.warning(f"未知类型文件,跳过: {full_path}")
 
             log.info(f"{year} 年的数据处理完成!")