|
@@ -1,3 +1,4 @@
|
|
|
|
+import argparse
|
|
import re
|
|
import re
|
|
from pathlib import Path
|
|
from pathlib import Path
|
|
|
|
|
|
@@ -5,8 +6,9 @@ import pandas as pd
|
|
|
|
|
|
from crossborder.utils.db_helper import DBHelper
|
|
from crossborder.utils.db_helper import DBHelper
|
|
from crossborder.utils.constants import COUNTRY_CODE_MAPPING, EXCLUDE_REGIONS, DOWNLOAD_DIR
|
|
from crossborder.utils.constants import COUNTRY_CODE_MAPPING, EXCLUDE_REGIONS, DOWNLOAD_DIR
|
|
-from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, find_unmatched_countries, \
|
|
|
|
- extract_year_month_from_path, traverse_and_process
|
|
|
|
|
|
+from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, \
|
|
|
|
+ find_unmatched_countries, \
|
|
|
|
+ extract_year_month_from_path, traverse_and_process, parse_value
|
|
|
|
|
|
# 常量配置(新增路径正则校验)
|
|
# 常量配置(新增路径正则校验)
|
|
PROV_CODE = "410000"
|
|
PROV_CODE = "410000"
|
|
@@ -241,8 +243,8 @@ def read_trade_pair(import_path, export_path):
|
|
))
|
|
))
|
|
|
|
|
|
merged = pd.merge(df_import, df_export, on="commodity_name", how="outer").fillna(0)
|
|
merged = pd.merge(df_import, df_export, on="commodity_name", how="outer").fillna(0)
|
|
- merged["monthly_import"] = merged["monthly_import"].apply(convert_wan_to_yuan)
|
|
|
|
- merged["monthly_export"] = merged["monthly_export"].apply(convert_wan_to_yuan)
|
|
|
|
|
|
+ merged["monthly_import"] = merged["monthly_import"].apply(parse_value)
|
|
|
|
+ merged["monthly_export"] = merged["monthly_export"].apply(parse_value)
|
|
return merged
|
|
return merged
|
|
|
|
|
|
|
|
|
|
@@ -262,4 +264,10 @@ def calculate_monthly_values(current_data, prev_data):
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
- traverse_and_process(download_dir, parse_excel, province_name="henan")
|
|
|
|
|
|
+
|
|
|
|
+ parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
|
|
|
|
+ parser.add_argument('--year', type=int, default=None,
|
|
|
|
+ help='终止年份(如2023),未指定时清洗最新一个月数据')
|
|
|
|
+ args = parser.parse_args()
|
|
|
|
+
|
|
|
|
+ traverse_and_process(download_dir, parse_excel, province_name="henan", year=args.year)
|