| 
					
				 | 
			
			
				@@ -1,8 +1,7 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from decimal import Decimal 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from decimal import Decimal, InvalidOperation 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from pathlib import Path 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import pandas as pd 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from openpyxl import load_workbook 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from db_helper import DBHelper 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from utils.constants import DOWNLOAD_DIR, GUANGDONG_CITY 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -16,9 +15,16 @@ download_dir = DOWNLOAD_DIR / "guangdong" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 db = DBHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+_zhanjiang_first_month = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 广州海关:万元 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 深圳海关:亿元 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 汕头海关:万元 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 黄埔海关:万元 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 江门海关:亿元 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+# 湛江海关:万元 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def match_customs_file(filename, customs_name, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """匹配海关文件""" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -41,74 +47,119 @@ def match_customs_file(filename, customs_name, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-def process_guangzhou_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """处理广州海关数据""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 读取Excel文件 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        wb = load_workbook(file_path, data_only=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sheet = wb.worksheets[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 查找包含月份的表头行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        month_str = f"{year}年{month}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        header_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i in range(1, 4):  # 检查前3行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            row_values = [str(cell.value).strip() if cell.value else "" for cell in sheet[i]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if any(month_str in val for val in row_values): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def find_header_and_columns(df, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    查找匹配月份的表头行并定位对应的列索引。 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    支持三种基础格式: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        - "2024年12月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        - "2024年12月-2024年12月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        - "2024-12-01 00:00:00" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    如果都未找到,则尝试匹配特殊格式:"2023年01月-2023年02月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    candidate_month_strs = [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        f"{year}年{month:02d}月", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        f"{year}年{month:02d}月-{year}年{month:02d}月", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        f"{year}-{month:02d}-01 00:00:00" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    header_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for i in range(min(3, len(df))): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        row_cells = [str(cell).strip() for cell in df.iloc[i]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for cell_val in row_cells: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if any(s == cell_val for s in candidate_month_strs): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 header_row = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if header_row is not None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 如果没找到常规格式,尝试特殊格式:2023年01月-2023年02月 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    special_format = "2023年01月-2023年02月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if header_row is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.warning(f"未找到常规格式,尝试匹配特殊格式: {special_format}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for i in range(min(3, len(df))): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            row_cells = [str(cell).strip() for cell in df.iloc[i]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for cell_val in row_cells: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if cell_val == special_format: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    header_row = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    log.info(f"成功匹配特殊格式: {special_format} 行号={i}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if header_row is not None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if header_row is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            log.error(f"未找到 {month_str} 的表头") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if header_row is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.error("未找到任何支持的表头格式") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return None, [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 确定数据列位置 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        data_cols = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for cell in sheet[header_row]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if cell.value and month_str in str(cell.value): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                data_cols.append(cell.column - 1)  # 转换为0-based索引 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 确定数据列位置(包含所有候选格式) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    data_cols = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    for col in range(len(df.columns)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        cell_val = str(df.iloc[header_row, col]).strip() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if cell_val in candidate_month_strs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data_cols.append(col) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if len(data_cols) < 6: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            log.error(f"未找到足够的 {month_str} 数据列") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if not data_cols: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for col in range(len(df.columns)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cell_val = str(df.iloc[header_row, col]).strip() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if cell_val in [special_format]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data_cols.append(col) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if not data_cols: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.error("未找到对应的数据列") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return header_row, [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return header_row, data_cols 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def process_guangzhou_customs(file_path, year, month,customs_type='guangzhou'): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """处理广州海关数据""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 读取Excel文件 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df = pd.read_excel(file_path, sheet_name=0, header=None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"处理广州海关文件: {file_path.name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        header_row,data_cols = find_header_and_columns(df, year, month) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 提取7地市数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         results = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        target_cities = ["广州市", "深圳市", "东莞市", "汕头市", "江门市", "湛江市", "茂名市"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for row in sheet.iter_rows(min_row=header_row + 1): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            city_cell = row[0].value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if city_cell and "广东省" in str(city_cell): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                city_name = str(city_cell).replace("广东省", "").strip() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        target_cities = ["广州市", "韶关市", "佛山市", "肇庆市", "河源市", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                         "清远市", "汕头市", "梅州市", "汕尾市", "潮州市", "揭阳市", "云浮市"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for idx in range(header_row + 1, len(df)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            row = df.iloc[idx] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            city_cell = str(row[0]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if "广东省" in city_cell: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                city_name = city_cell.replace("广东省", "").strip() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if city_name in target_cities: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        # 获取各列值 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        total = row[data_cols[0]].value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        export = row[data_cols[1]].value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        import_val = row[data_cols[2]].value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        yoy_total = row[data_cols[3]].value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        yoy_export = row[data_cols[4]].value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        yoy_import = row[data_cols[5]].value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        # 转换数据类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        def convert_value(val): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            if isinstance(val, (int, float)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                return Decimal(str(val)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            elif isinstance(val, str) and val.replace(".", "").isdigit(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                return Decimal(val) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            return Decimal(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        # 添加到结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if len(data_cols)>3: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            monthly_total = Decimal(str(row[data_cols[0]]))  # 进出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            monthly_export = Decimal(str(row[data_cols[4]]))  # 出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            monthly_import = Decimal(str(row[data_cols[8]]))  # 进口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            yoy_import_export = Decimal(str(row[data_cols[1]]))  # 进出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            yoy_export = Decimal(str(row[data_cols[5]]))  # 出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            yoy_import = Decimal(str(row[data_cols[9]]))  # 进口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            monthly_total = Decimal(str(row[data_cols[0]])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            monthly_export = Decimal(str(row[data_cols[1]])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            monthly_import = Decimal(str(row[data_cols[2]])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            yoy_import_export = Decimal(str(row[data_cols[0]+1]))  # 进出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            yoy_export = Decimal(str(row[data_cols[1]+1]))  # 出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            yoy_import = Decimal(str(row[data_cols[2]+1]))  # 进口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         results.append({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             "city_name": city_name, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            "monthly_total": convert_value(total), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            "monthly_import": convert_value(import_val), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            "monthly_export": convert_value(export), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            "yoy_import_export": convert_value(yoy_total), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            "yoy_import": convert_value(yoy_import), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            "yoy_export": convert_value(yoy_export) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "monthly_total": monthly_total, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "monthly_import": monthly_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "monthly_export": monthly_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "yoy_import_export": yoy_import_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "yoy_import": yoy_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            "yoy_export": yoy_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        log.error(f"处理城市 {city_name} 出错: {e}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        log.error(f"处理行 {idx} 出错: {e}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return pd.DataFrame(results) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -118,79 +169,67 @@ def process_guangzhou_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def process_shenzhen_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """处理深圳海关数据""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """处理深圳海关数据(完整6指标版)""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        wb = load_workbook(file_path, data_only=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"处理深圳海关文件: {file_path.name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         results = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 处理深圳和惠州两个sheet 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for city, sheet_name in [("深圳市", "深圳市进出口(贸易方式)"), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                  ("惠州市", "惠州市进出口(贸易方式)")]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if sheet_name in wb.sheetnames: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    sheet = wb[sheet_name] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    log.warning(f"未找到sheet: {sheet_name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # 查找总值行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                total_row_idx = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                for i, row in enumerate(sheet.iter_rows(values_only=True), 1): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if row and "总值" in str(row[0]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        total_row_idx = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if total_row_idx is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    log.error(f"未找到总值行: {sheet_name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # 查找包含月份的表头 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                month_str = f"{year}年{month}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                header_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                data_col = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                for i, row in enumerate(sheet.iter_rows(max_row=3, values_only=True), 1): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if any(month_str in str(cell) for cell in row if cell): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        header_row = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        for col_idx, cell_val in enumerate(row): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            if cell_val and month_str in str(cell_val): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                data_col = col_idx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if data_col is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    log.error(f"未找到 {month_str} 列") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # 获取数据值 (亿元转换为万元) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                total_value = sheet.cell(row=total_row_idx, column=data_col + 1).value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                yoy_value = sheet.cell(row=total_row_idx, column=data_col + 2).value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if total_value is None or yoy_value is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    log.error(f"{city} 数据为空") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # 转换数据类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                def convert_value(val): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if isinstance(val, (int, float)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        return Decimal(str(val)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    elif isinstance(val, str) and val.replace(".", "").isdigit(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        return Decimal(val) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    return Decimal(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                # 添加到结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                df = pd.read_excel(file_path, sheet_name=sheet_name, header=None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.warning(f"未找到sheet: {sheet_name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # 查找总值行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            total_row_idx = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for idx in range(len(df)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if "总值" in str(df.iloc[idx, 0]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    total_row_idx = idx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if total_row_idx is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.error(f"未找到总值行: {sheet_name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 列索引映射(基于您提供的完整数据结构) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 进出口 | 出口 | 进口 的数值和同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_total = convert_unit(str(df.iloc[total_row_idx, 1])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                yoy_total = Decimal(str(df.iloc[total_row_idx, 2])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_export = convert_unit(str(df.iloc[total_row_idx, 3])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                yoy_export = Decimal(str(df.iloc[total_row_idx, 4])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_import = convert_unit(str(df.iloc[total_row_idx, 5])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                yoy_import = Decimal(str(df.iloc[total_row_idx, 6])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 results.append({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     "city_name": city, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    "monthly_total": convert_value(total_value) * Decimal('10000'), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    "monthly_import": None,  # 没有单独的进口/出口数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    "monthly_export": None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    "yoy_import_export": convert_value(yoy_value), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    "yoy_import": Decimal(0), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    "yoy_export": Decimal(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_total": monthly_total, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_export": monthly_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_import": monthly_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "yoy_import_export": yoy_total,  # 进出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "yoy_export": yoy_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "yoy_import": yoy_import 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                log.error(f"处理 {city} 数据出错: {str(e)}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.error(f"处理 {city} 数据出错: {e}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # 尝试部分提取(回退方案) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    monthly_total = Decimal(str(df.iloc[total_row_idx, 1])) * Decimal('10000') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    results.append({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "city_name": city, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "monthly_total": monthly_total, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "monthly_export": None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "monthly_import": None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "yoy_import_export": Decimal('0'), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "yoy_export": Decimal('0'), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        "yoy_import": Decimal('0') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                except: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    log.error(f"连基础进出口总值都无法提取: {sheet_name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return pd.DataFrame(results) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -202,67 +241,100 @@ def process_shenzhen_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def process_shantou_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """处理汕头海关数据 (逻辑同广州海关)""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     log.info(f"处理汕头海关文件: {file_path.name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return process_guangzhou_customs(file_path, year, month) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return process_guangzhou_customs(file_path, year, month,customs_type='shantou') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def process_huangpu_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """处理黄埔海关数据""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        wb = load_workbook(file_path, data_only=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sheet = wb.active 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df = pd.read_excel(file_path, sheet_name=0, header=None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"处理黄埔海关文件: {file_path.name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 查找合计行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         total_row_idx = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i, row in enumerate(sheet.iter_rows(values_only=True), 1): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if row and "合计" in str(row[0]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                total_row_idx = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for idx in range(len(df)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if "合计" in str(df.iloc[idx, 0]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                total_row_idx = idx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if total_row_idx is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             log.error("未找到合计行") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 查找包含月份的表头 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        month_str = f"{year}年{month}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        header_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        data_cols = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 查找包含月份的表头,匹配23年1月-23年多种格式 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i in range(1, 4):  # 检查前3行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            row_values = [str(cell.value) if cell.value else "" for cell in sheet[i]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if any(month_str in val and "人民币" in val for val in row_values): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if year == 2024 and month == 12: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            month_str = '45627' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        elif year == 2023 and month == 12: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            month_str = '45261' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        elif year == 2023 and month == 3: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            month_str = f"{year}年{month:02d}月-{year}年{month:02d}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            month_str = f'{year}-{month:02d}-01 00:00:00' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        header_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for i in range(min(3, len(df))): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            row_cells = [str(cell).strip() for cell in df.iloc[i]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if any(month_str in cell  in cell for cell in row_cells): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 header_row = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                for col_idx, val in enumerate(row_values): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if val and month_str in val and "人民币" in val: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        data_cols.append(col_idx) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if len(data_cols) < 6: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if header_row is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.error(f"未找到 {month_str} 人民币表头") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 确定数据列位置 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        data_cols = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for col in range(len(df.columns)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cell_val = str(df.iloc[header_row, col]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if month_str in cell_val : 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data_cols.append(col) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if len(data_cols) < 3: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             log.error(f"未找到足够的 {month_str} 人民币数据列") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 获取合计行数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        row_values = [cell.value for cell in sheet[total_row_idx]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 转换数据类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        def convert_value(val): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if isinstance(val, (int, float)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return Decimal(str(val)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            elif isinstance(val, str) and val.replace(".", "").isdigit(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return Decimal(val) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return Decimal(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 提取数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        results = [{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "city_name": "东莞市", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_total": convert_value(row_values[data_cols[0]]),  # 进出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_export": convert_value(row_values[data_cols[1]]),  # 出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_import": convert_value(row_values[data_cols[2]]),  # 进口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_import_export": convert_value(row_values[data_cols[3]]),  # 进出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_export": convert_value(row_values[data_cols[4]]),  # 出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_import": convert_value(row_values[data_cols[5]])  # 进口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        }] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            result = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # 提取数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            row = df.iloc[total_row_idx] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            monthly_total = Decimal(str(row[data_cols[0]]))  # 进出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            monthly_export = Decimal(str(row[data_cols[1]]))  # 出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            monthly_import = Decimal(str(row[data_cols[2]]))  # 进口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            yoy_import_export = str(row[data_cols[0]+1])  # 进出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            yoy_export = str(row[data_cols[1]+1])  # 出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            yoy_import = str(row[data_cols[2]+1])  # 进口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            result.append({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "crossborder_year_month": f'{year}-{month:02d}', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "city_name": "东莞市", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "monthly_total": monthly_total, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "monthly_import": monthly_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "monthly_export": monthly_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "yoy_import_export": yoy_import_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "yoy_import": yoy_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                "yoy_export": yoy_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            #东莞市一月数据比较特殊 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if month == 2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_total_sum = Decimal(str(row[data_cols[0]+4]))  # 进出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_export_sum = Decimal(str(row[data_cols[1]+4]))  # 出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_import_sum = Decimal(str(row[data_cols[2]+4]))  # 进口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                january_monthly_total = monthly_total_sum - monthly_total 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                january_monthly_export = monthly_export_sum - monthly_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                january_monthly_import = monthly_import_sum - monthly_import 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                result.append({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "crossborder_year_month": f'{year}-01', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "city_name": "东莞市", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_total": january_monthly_total, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_import": january_monthly_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_export": january_monthly_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return pd.DataFrame(results) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return pd.DataFrame(result) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.error(f"提取数据出错: {e}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         log.error(f"处理黄埔海关文件出错: {str(e)}") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -272,18 +344,18 @@ def process_huangpu_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def process_jiangmen_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """处理江门海关数据""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        wb = load_workbook(file_path, data_only=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sheet = wb.active 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df = pd.read_excel(file_path, sheet_name=0, header=None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"处理江门海关文件: {file_path.name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 从文件名确定城市 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         city_name = "江门市" if "江门市" in file_path.name else "阳江市" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        target_row_name = "江门市进出口商品" if city_name == "江门市" else "阳江市进出口商品总值" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        target_row_name = "江门市进出口商品总值" if city_name == "江门市" else "阳江市进出口商品总值" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 查找目标行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         target_row_idx = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i, row in enumerate(sheet.iter_rows(values_only=True), 1): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if row and target_row_name in str(row[0]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                target_row_idx = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for idx in range(len(df)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if target_row_name in str(df.iloc[idx, 0]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                target_row_idx = idx 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if target_row_idx is None: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -291,43 +363,48 @@ def process_jiangmen_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 查找包含月份的表头 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        month_str = f"{year}年{month}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if month == 2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+           month_str = f"1-{month}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+           month_str = f"{month}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         header_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        data_cols = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i in range(1, 4):  # 检查前3行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            row_values = [str(cell.value) if cell.value else "" for cell in sheet[i]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if any(month_str in val for val in row_values): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for i in range(min(6, len(df))): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if any(month_str == str(cell).strip() for cell in df.iloc[i]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 header_row = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                for col_idx, val in enumerate(row_values): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    if val and month_str in val: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        data_cols.append(col_idx) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if len(data_cols) < 6: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            log.error(f"未找到足够的 {month_str} 数据列") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if header_row is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.error(f"未找到 {month_str} 表头") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 获取目标行数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        row_values = [cell.value for cell in sheet[target_row_idx]] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 确定数据列位置 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        data_cols = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for col in range(len(df.columns)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cell_val = str(df.iloc[header_row, col]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if cell_val.strip() == month_str: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data_cols.append(col) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 转换数据类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        def convert_value(val): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if isinstance(val, (int, float)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return Decimal(str(val)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            elif isinstance(val, str) and val.replace(".", "").isdigit(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return Decimal(val) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return Decimal(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if len(data_cols) < 3: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.error(f"未找到足够的 {month_str} 数据列") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 提取数据 (亿元转换为万元) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        row = df.iloc[target_row_idx] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        monthly_total = convert_unit(str(row[data_cols[0]])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        monthly_export = convert_unit(str(row[data_cols[1]])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        monthly_import = convert_unit(str(row[data_cols[2]])) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        yoy_import_export = str(row[data_cols[0]+1]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        yoy_export = str(row[data_cols[1]+1]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        yoy_import = str(row[data_cols[2]+1]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return pd.DataFrame([{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             "city_name": city_name, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_total": convert_value(row_values[data_cols[0]]) * Decimal('10000'),  # 进出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_export": convert_value(row_values[data_cols[1]]) * Decimal('10000'),  # 出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_import": convert_value(row_values[data_cols[2]]) * Decimal('10000'),  # 进口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_import_export": convert_value(row_values[data_cols[3]]),  # 进出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_export": convert_value(row_values[data_cols[4]]),  # 出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_import": convert_value(row_values[data_cols[5]])  # 进口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "monthly_total": monthly_total, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "monthly_import": monthly_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "monthly_export": monthly_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "yoy_import_export": yoy_import_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "yoy_import": yoy_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            "yoy_export": yoy_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         }]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except Exception as e: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -336,61 +413,94 @@ def process_jiangmen_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def process_zhanjiang_customs(file_path, year, month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    """处理湛江海关数据""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """处理湛江海关数据 满足「是第一次调用」或者「month == 12」任意一个条件""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    global _zhanjiang_first_month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    # 判断是否应执行核心逻辑 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if _zhanjiang_first_month is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 第一次调用,记录初始月份 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        _zhanjiang_first_month = month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        should_execute = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 后续调用仅在以下情况下执行: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # - 与初次调用的 month 相同(允许多城市同时处理) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # - 或者 month == 12 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        should_execute = (month == _zhanjiang_first_month) or (month == 12) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if not should_execute: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.warning(f"跳过湛江海关{year}年{month}文件: {file_path.name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        wb = load_workbook(file_path, data_only=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        sheet = wb.worksheets[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        df = pd.read_excel(file_path, sheet_name=0, header=None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"处理湛江海关文件: {file_path.name}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 从文件名确定城市 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         city_name = "湛江市" if "湛江市" in file_path.name else "茂名市" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 查找月度数据表格 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        table_start_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        month_str = f"{year}年{month}月" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i, row in enumerate(sheet.iter_rows(values_only=True), 1): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if row and any(month_str in str(cell) for cell in row if cell): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                table_start_row = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        month_str = f"{year}年前{month}个月{city_name}进出口数据(月度)" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # target_header_row = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # # 查找表头行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # for i in range(min(3, len(df))):  # 在前5行找表头 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        #     if any(month_str in str(cell) for cell in df.iloc[i]): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        #         target_header_row = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        #         break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # if target_header_row is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        #     log.error(f"未找到 {month_str} 表头") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        #     return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        target_header_row =1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if table_start_row is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            log.error(f"未找到 {month_str} 月度数据表") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 确定数据列位置 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        data_cols = {} 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for col in range(len(df.columns)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            cell_val = str(df.iloc[target_header_row+1, col]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            data_cols["year_month"] = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if "进出口" in cell_val : 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data_cols["total"] = col 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            elif "出口" in cell_val : 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data_cols["export"] = col 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            elif "进口" in cell_val : 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                data_cols["import"] = col 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if len(data_cols) < 1: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.error(f"未找到足够的 {month_str} 数据列") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 查找目标行(城市名所在行) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        target_row_idx = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for i in range(table_start_row, table_start_row + 20):  # 在后续行中查找 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            row_val = sheet.cell(row=i, column=1).value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if row_val and city_name in str(row_val): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                target_row_idx = i 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        start_row = target_header_row + 4 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        end_row = start_row + month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if target_row_idx is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            log.error(f"未找到 {city_name} 数据行") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return pd.DataFrame() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 提取数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 提取多行数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        rows = df.iloc[start_row:end_row] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         results = [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        for col in [2, 3, 4, 5, 6, 7]:  # 依次为进出口、出口、进口、进出口同比、出口同比、进口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            cell_value = sheet.cell(row=target_row_idx, column=col).value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            results.append(cell_value) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 转换数据类型 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        def convert_value(val): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if isinstance(val, (int, float)): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return Decimal(str(val)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            elif isinstance(val, str) and val.replace(".", "").isdigit(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                return Decimal(val) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return Decimal(0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        return pd.DataFrame([{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "city_name": city_name, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_total": convert_value(results[0]), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_export": convert_value(results[1]), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "monthly_import": convert_value(results[2]), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_import_export": convert_value(results[3]), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_export": convert_value(results[4]), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            "yoy_import": convert_value(results[5]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        }]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for _, row in rows.iterrows(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                year_month = str(row[data_cols["year_month"]]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                formatted_year_month = f"{year_month[:4]}-{year_month[4:]}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_total = Decimal(str(row[data_cols["total"]]))  # 进出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_export = Decimal(str(row[data_cols["export"]]))  # 出口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                monthly_import = Decimal(str(row[data_cols["import"]]))  # 进口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                yoy_import_export = Decimal(str(row[data_cols["total"] + 1]))  # 进出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                yoy_export = Decimal(str(row[data_cols["export"] + 1]))  # 出口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                yoy_import = Decimal(str(row[data_cols["import"] + 1]))  # 进口同比 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                results.append({ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "crossborder_year_month":formatted_year_month, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "city_name": city_name, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_total": monthly_total, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_import": monthly_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "monthly_export": monthly_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "yoy_import_export": yoy_import_export, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "yoy_import": yoy_import, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    "yoy_export": yoy_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                }) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.error(f"解析某一行数据出错: {e}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                continue  # 单行错误不影响整体处理 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return pd.DataFrame(results) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         log.error(f"处理湛江海关文件出错: {str(e)}") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -468,15 +578,18 @@ def parse_excel(current_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 创建1月份数据 (取2月份数据的一半) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     df_half = df_full.copy() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     for col in ['monthly_total', 'monthly_import', 'monthly_export']: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        df_half[col] = df_half[col] / 2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        # 注意:只有数值列才进行减半操作,避免对字符串操作 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if col in df_half.columns: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            df_half[col] = df_half[col] / 2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    # 设置1月份数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    df_half['month'] = 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    # 设置1月份 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    df_half['crossborder_year_month'] = f'{year}-01' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 设置2月份数据 (取2月份数据的一半) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    df_full['month'] = 2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    df_full['crossborder_year_month'] = f'{year}-02' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     for col in ['monthly_total', 'monthly_import', 'monthly_export']: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        df_full[col] = df_full[col] / 2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if col in df_full.columns: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            df_full[col] = df_full[col] / 2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # 合并数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     df_customs = pd.concat([df_half, df_full]) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -497,37 +610,39 @@ def parse_excel(current_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 添加公共字段 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         all_results['prov_code'] = PROV_CODE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         all_results['prov_name'] = PROV_NAME 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        all_results['year'] = year 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        all_results['crossborder_year'] = year 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        all_results['city_code'] = all_results['city_name'].astype(str).map(GUANGDONG_CITY).fillna('0000') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         all_results['month'] = all_results.get('month', month) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        all_results['crossborder_year_month'] = all_results['year'].astype(str) + '-' + all_results['month'].astype( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            str).str.zfill(2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # 添加城市编码 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        def get_city_code(row): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            return GUANGDONG_CITY.get(row['city_name'], '0000') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        all_results['city_code'] = all_results.apply(get_city_code, axis=1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if 'crossborder_year_month' in all_results.columns: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            all_results['crossborder_year_month'] = ( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                all_results['crossborder_year_month'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                .replace('', pd.NA) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                .fillna(f'{year}-{month:02d}') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            all_results['crossborder_year_month'] = f'{year}-{month:02d}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 排序并删除重复项 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        all_results = all_results.sort_values(by=['city_code', 'crossborder_year_month']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        all_results = all_results.drop_duplicates(subset=['crossborder_year_month', 'city_code'], keep='last') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # all_results = all_results.sort_values(by=['city_code', 'crossborder_year_month']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # all_results = all_results.drop_duplicates(subset=['crossborder_year_month', 'city_code'], keep='last') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 打印处理结果 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        log.info(f"处理完成,共获得 {len(all_results)} 条数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"处理完成,共获得广东省 {len(all_results)} 条地级市数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 选择入库字段 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         final_df = all_results[[ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             'crossborder_year_month', 'prov_code', 'prov_name', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            'city_code', 'city_name', 'monthly_total', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            'monthly_import', 'monthly_export', 'yoy_import_export', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            'yoy_import', 'yoy_export' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            'crossborder_year','city_code', 'city_name', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            'monthly_total','monthly_import', 'monthly_export', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            'yoy_import_export','yoy_import', 'yoy_export' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         ]].copy() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        final_df = final_df.where(pd.notna(final_df), None) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 打印前几条数据 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        log.info(f"处理后数据示例:\n{final_df.head()}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # log.debug(f"处理后数据示例:\n{final_df.head()}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 这里调用DBHelper入库(实际使用时请取消注释) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         from db_helper import DBHelper 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         db = DBHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         db.bulk_insert( 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -537,7 +652,6 @@ def parse_excel(current_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             update_columns=['monthly_total', 'monthly_import', 'monthly_export', 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             'yoy_import_export', 'yoy_import', 'yoy_export'] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         log.info(f"{current_dir}数据已全部成功处理") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -545,12 +659,20 @@ def parse_excel(current_dir): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         log.error(f"处理失败:{current_dir},错误:{str(e)}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         raise 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-# 遍历目录的函数(原样保留) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+def convert_unit(value): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    """亿元转万元,处理空值""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 如果 value 不是特殊的无效值,进行转换并保留4位小数 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return round(Decimal(value) * 10000, 4) if value not in ['-', ''] else None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    except (InvalidOperation, ValueError): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        # 捕获异常,返回 None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # 测试入口 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == "__main__": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    traverse_and_process(download_dir, parse_excel, province_name="guangdong") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    traverse_and_process(download_dir, parse_excel, province_name="guangdong") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    db_helper = DBHelper() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    db_helper.update_prov_yoy("广东省") 
			 |