| 
					
				 | 
			
			
				@@ -1,3 +1,4 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import argparse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import re 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from pathlib import Path 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -5,8 +6,9 @@ import pandas as pd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils.db_helper import DBHelper 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils.constants import COUNTRY_CODE_MAPPING, EXCLUDE_REGIONS, DOWNLOAD_DIR 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, find_unmatched_countries, \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    extract_year_month_from_path, traverse_and_process 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils.parse_utils import clean_county_name, clean_commodity_name, convert_wan_to_yuan, \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    find_unmatched_countries, \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    extract_year_month_from_path, traverse_and_process, parse_value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # 常量配置(新增路径正则校验) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 PROV_CODE = "410000" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -241,8 +243,8 @@ def read_trade_pair(import_path, export_path): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     )) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     merged = pd.merge(df_import, df_export, on="commodity_name", how="outer").fillna(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    merged["monthly_import"] = merged["monthly_import"].apply(convert_wan_to_yuan) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    merged["monthly_export"] = merged["monthly_export"].apply(convert_wan_to_yuan) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    merged["monthly_import"] = merged["monthly_import"].apply(parse_value) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    merged["monthly_export"] = merged["monthly_export"].apply(parse_value) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return merged 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -262,4 +264,10 @@ def calculate_monthly_values(current_data, prev_data): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == "__main__": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    traverse_and_process(download_dir, parse_excel, province_name="henan") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    parser = argparse.ArgumentParser(description='海关数据智能抓取系统') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    parser.add_argument('--year', type=int, default=None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        help='终止年份(如2023),未指定时清洗最新一个月数据') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    traverse_and_process(download_dir, parse_excel, province_name="henan",  year=args.year) 
			 |