Explorar o código

Merge branch 'master' of wyp/crossborder into dev

wyp hai 1 semana
pai
achega
cde2de3495

+ 1 - 3
anhui/gov_commodity_anhui_country.py

@@ -2,9 +2,7 @@ from pathlib import Path
 
 import pandas as pd
 
-from com.zf.crawl import base_country_code
-from com.zf.crawl import base_mysql
-
+from utils import base_country_code, base_mysql
 from utils.base_country_code import format_sql_value
 
 # 排除地区名单

+ 5 - 1
hebei/gov_commodity_hebei_import_export.py

@@ -77,7 +77,11 @@ def save_to_database(merged_df, year, month):
 
             monthly_import = round(row['import'], 4)
             monthly_export = round(row['export'], 4)
-            monthly_total = round(monthly_import + monthly_export, 4)
+            monthly_total = round(
+                (0 if pd.isna(monthly_import) else monthly_import) +
+                (0 if pd.isna(monthly_export) else monthly_export),
+                4
+            )
 
             if year_month == '2023-02':
                 monthly_import = round(monthly_import / 2, 4)

+ 4 - 4
jiangsu/gov_commodity_jiangsu_import_export.py

@@ -61,14 +61,17 @@ def process_folder(path, all_records):
 
                 temp_df = df[[1, 5]].rename(columns={1: 'commodity', 5: 'import'})
                 temp_df['import'] = pd.to_numeric(temp_df['import'].replace('--', 0), errors='coerce')
+                temp_df['import'] = temp_df['import'] * 10
                 import_df = pd.concat([import_df, temp_df])
 
                 temp_df = df[[1, 3]].rename(columns={1: 'commodity', 3: 'export'})
                 temp_df['export'] = pd.to_numeric(temp_df['export'].replace('--', 0), errors='coerce')
+                temp_df['export'] = temp_df['export'] * 10
                 export_df = pd.concat([export_df, temp_df])
 
                 temp_df = df[[1, 2]].rename(columns={1: 'commodity', 2: 'total'})
                 temp_df['total'] = pd.to_numeric(temp_df['total'].replace('--', 0), errors='coerce')
+                temp_df['total'] = temp_df['total'] * 10
                 total_df = pd.concat([total_df, temp_df])
                 break
 
@@ -111,14 +114,11 @@ def save_to_database(import_df, export_df, total_df, year, month, all_records):
 
         sql = (f"INSERT INTO t_yujin_crossborder_prov_commodity_trade "
                f"(crossborder_year, crossborder_year_month, prov_code, prov_name, commodity_code, commodity_name, monthly_total, monthly_export, monthly_import, create_time, commodity_source) VALUES "
-               f"('{year}', '{year_month}', '320000', '江苏省', '{commodity_code}', '{category_name}', {format_sql_value(monthly_total)}, {format_sql_value(monthly_export)}, {format_sql_value(monthly_import)}, now(), 1);")
+               f"('{year}', '{year_month}', '320000', '江苏省', '{commodity_code}', '{category_name}', {monthly_total}, {monthly_export}, {monthly_import}, now(), 1);")
         sql_arr.append(sql)
 
         processed_commodities.add(commodity_code)
-    # except Exception as e:
-    #     print(f"{year_month} 生成SQL时发生异常: {str(e)}")
 
-    # 原有SQL执行逻辑
     print(f"√ {year_month} 成功生成SQL文件 size {len(sql_arr)} ")
     base_mysql.bulk_insert(sql_arr)
     print(f"√ {year_month} prov_commodity_trade SQL 存表完成!")

+ 2 - 1
zhejiang/crawl_gov_zhejiangi_full.py

@@ -61,7 +61,8 @@ def crawl_by_year_tabs(driver, base_url):
     year_tabs = driver.find_elements(By.XPATH, '//ul[@class="nav_sj"]//li//a')
     for tab in year_tabs:
         year_text = tab.text.strip()
-        if year_text not in years:
+        if int(year_text[:4]) <= 2022:
+            print(f"{year_text} 后的数据无需下载")
             continue
 
         year_url = tab.get_attribute("href")

+ 3 - 4
zhejiang/gov_commodity_zhejiang_city.py

@@ -52,19 +52,19 @@ def get_df(path, year_month):
     total_df = pd.DataFrame()
     temp_df = df[[1, 2]].rename(columns={1: 'commodity', 2: 'total'})
     temp_df['total'] = pd.to_numeric(temp_df['total'].replace('--', 0), errors='coerce').astype(float)
-    if temp_df['total'] and year_month and year_month == '2024-07':
+    if year_month and year_month == '2024-07':
         temp_df['total'] = temp_df['total'] / 10000
     total_df = pd.concat([total_df, temp_df])
 
     temp_df = df[[1, 3]].rename(columns={1: 'commodity', 3: 'import'})
     temp_df['import'] = pd.to_numeric(temp_df['import'].replace('--', 0), errors='coerce').astype(float)
-    if temp_df['import'] and year_month and year_month == '2024-07':
+    if year_month and year_month == '2024-07':
         temp_df['import'] = temp_df['import'] / 10000
     import_df = pd.concat([import_df, temp_df])
 
     temp_df = df[[1, 4]].rename(columns={1: 'commodity', 4: 'export'})
     temp_df['export'] = pd.to_numeric(temp_df['export'].replace('--', 0), errors='coerce').astype(float)
-    if temp_df['export'] and year_month and year_month == '2024-07':
+    if year_month and year_month == '2024-07':
         temp_df['export'] = temp_df['export'] / 10000
     export_df = pd.concat([export_df, temp_df])
 
@@ -138,7 +138,6 @@ def process_folder(path):
         sql_arr.append(sql)
 
     print(f"√ {year_month} prov_region_trade 成功生成 SQL 文件 size {len(sql_arr)} ")
-    # 解析完后生成sql文件批量入库
     base_mysql.bulk_insert(sql_arr)
     print(f"√ {year_month} prov_region_trade SQL 存表完成!")
 

+ 10 - 2
zhejiang/gov_commodity_zhejiang_import_export.py

@@ -174,11 +174,19 @@ def save_to_database(merged_df, year, month):
             if year == 2025 or (year == 2024 and month in [7, 8, 9, 10, 11, 12]):
                 monthly_import = round(row['import'], 4)
                 monthly_export = round(row['export'], 4)
-                monthly_total = round(monthly_import + monthly_export, 4)
+                monthly_total = round(
+                    (0 if pd.isna(monthly_import) else monthly_import) +
+                    (0 if pd.isna(monthly_export) else monthly_export),
+                    4
+                )
             else:
                 monthly_import = round(row['import'] / 10000, 4)
                 monthly_export = round(row['export'] / 10000, 4)
-                monthly_total = round((monthly_import + monthly_export) / 10000, 4)
+                monthly_total = round(
+                    (0 if pd.isna(monthly_import) else monthly_import) +
+                    (0 if pd.isna(monthly_export) else monthly_export),
+                    4
+                )
 
             sql = (f"INSERT INTO t_yujin_crossborder_prov_commodity_trade "
                    f"(crossborder_year, crossborder_year_month, prov_code, prov_name, commodity_code, commodity_name, monthly_total, monthly_export, monthly_import, create_time) VALUES "