# ---------------------------- 核心解析逻辑修改 ---------------------------- from quanguo.CountryTradeYear import COUNTRY_CODE_MAPPING from utils.parse_utils import clean_county_name def chinese_class_to_number(class_str): """精确匹配中文类名""" cn_num_map = { '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9, '十': 10, '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15, '十六': 16, '十七': 17, '十八': 18, '十九': 19, '二十': 20, '二十一': 21, '二十二': 22 } match = re.match(r'^第([一二三四五六七八九十百千万]+)类$', class_str) return cn_num_map.get(match.group(1), 0) if match else 0 def parse_hscode(row_value): """增强版HS编码解析,支持更多格式""" global current_class # 清洗输入值(新增删除中文括号内容) cell_value = str(row_value).strip() cell_value = re.sub(r'\s+', ' ', cell_value) # 统一空格 cell_value = re.sub(r'[((].*[))]', '', cell_value) # 删除括号内容 if not cell_value: return None # 匹配中文类名(如 “第一类 动物”) class_cn_match = re.match(r'^第([一二三四五六七八九十百千万]+)类', cell_value) if class_cn_match: class_num = chinese_class_to_number(f"第{class_cn_match.group(1)}类") current_class = f"{class_num:02d}" return current_class # 匹配数字类名(如 "第5类 矿产品") class_digit_match = re.match(r'^第\s*(\d+)\s*类', cell_value) if class_digit_match: class_num = class_digit_match.group(1) current_class = f"{int(class_num):02d}" return current_class # 匹配独立章名(如 "01章 活动物" 或 "第一章 工业制成品") chapter_match = re.match(r'^(?:第)?\s*([一二三四五六七八九十百千万]+|\d{1,2})\s*章', cell_value) if chapter_match: chapter_str = chapter_match.group(1) if not chapter_str.isdigit(): chapter_digit = chinese_class_to_number(chapter_str + "章") else: chapter_digit = int(chapter_str) if current_class: return f"{current_class}{chapter_digit:02d}" else: return f"{chapter_digit:02d}" return None # ---------------------------- 完整代码集成 ---------------------------- import re import openpyxl from decimal import Decimal import pandas as pd # ---------------------------- 全局配置 ---------------------------- current_class = None year_month = '2025-04' def parse_value(value): """增强型数值解析,处理空值和特殊符号""" try: cleaned = str(value).strip().replace(',', '') # 处理空值和"-" if cleaned in ('', '-', 'NA', 'None'): return 0 # 关键修改:将"-"转换为0 return Decimal(cleaned).quantize(Decimal('0.0000')) except Exception: return 0 # 所有异常情况返回0 def generate_country_sql(file_path, trade_type): """生成国家维度SQL脚本(支持数字类名)""" df = pd.read_excel(file_path, engine='xlrd') converted_path = f"converted_{trade_type}.xlsx" df.to_excel(converted_path, index=False) wb = openpyxl.load_workbook(converted_path) sheet = wb.active sql_scripts = [] global current_class # 遍历行数据(从第7行开始) for row_idx, row in enumerate(sheet.iter_rows(min_row=7, values_only=True), start=7): if not row[1]: continue # 解析HS编码(核心修改) hs_code = parse_hscode(row[1]) if not hs_code: print(f"[行{row_idx}] 忽略无效行:{row[1]}") continue # 国家数据处理(保持原逻辑) country_data = [] for col_idx in range(2, len(row), 2): # 获取国家名称(假设国家名在表头第4行) country_name = sheet.cell(row=4, column=col_idx + 1).value if not country_name: continue country_name = clean_county_name(country_name) country_code = COUNTRY_CODE_MAPPING.get(country_name, 'XX') if country_code == 'XX': # print(f"[行{row_idx}] 忽略未知国家:{country_name}") continue # 关键修改:读取国家对应的month和cumulative列 month_val = parse_value(row[col_idx]) cumulative_val = parse_value(row[col_idx + 1]) if col_idx + 1 < len(row) else 0 country_data.append({ 'country_code': country_code, 'country_name': country_name, 'month': month_val, 'cumulative': cumulative_val # 添加累计值 }) # 生成SQL(新增数字类兼容) if country_data: try: for country in country_data: # 使用国家专属的month/cumulative值(关键修改) sql = f"""INSERT INTO `t_yujin_crossborder_commodity_country` (`year_month`,`country_code`,`country_name`,`hs_code`,`trade_type`, `month_amount`,`cumulative_amount`) VALUES ( '{year_month}', '{country['country_code']}', '{country['country_name']}', '{hs_code}', '{trade_type}', {country['month']}, # 使用国家维度数据 {country['cumulative']} # 使用国家维度数据 ) ON DUPLICATE KEY UPDATE month_amount = VALUES(month_amount), cumulative_amount = VALUES(cumulative_amount);""" sql_scripts.append(sql) except Exception as e: print(f"[行{row_idx}] 数据异常:{str(e)}") return sql_scripts # 执行入口保持不变 if __name__ == "__main__": export_sql = generate_country_sql(r"C:/Users/admin/PyCharmMiscProject/cross/2025051809572148978.xls", "export") import_sql = generate_country_sql(r"C:/Users/admin/PyCharmMiscProject/cross/2025051809593876500.xls", "import") with open('../downloads/commodity_country04.sql', 'w', encoding='utf-8') as f: f.write('\n'.join(export_sql + import_sql)) print(f"生成完成,出口{len(export_sql)}条,进口{len(import_sql)}条")