from decimal import Decimal import xlrd # 完整省级行政区划映射(GB/T 2260-2023) REGION_MAPPING = { # 直辖市 "北京市": {"code": "110000", "type": "province"}, "天津市": {"code": "120000", "type": "province"}, "上海市": {"code": "310000", "type": "province"}, "重庆市": {"code": "500000", "type": "province"}, # 省份(23个) "河北省": {"code": "130000", "type": "province"}, "山西省": {"code": "140000", "type": "province"}, "辽宁省": {"code": "210000", "type": "province"}, "吉林省": {"code": "220000", "type": "province"}, "黑龙江省": {"code": "230000", "type": "province"}, "江苏省": {"code": "320000", "type": "province"}, "浙江省": {"code": "330000", "type": "province"}, "安徽省": {"code": "340000", "type": "province"}, "福建省": {"code": "350000", "type": "province"}, "江西省": {"code": "360000", "type": "province"}, "山东省": {"code": "370000", "type": "province"}, "河南省": {"code": "410000", "type": "province"}, "湖北省": {"code": "420000", "type": "province"}, "湖南省": {"code": "430000", "type": "province"}, "广东省": {"code": "440000", "type": "province"}, "海南省": {"code": "460000", "type": "province"}, "四川省": {"code": "510000", "type": "province"}, "贵州省": {"code": "520000", "type": "province"}, "云南省": {"code": "530000", "type": "province"}, "陕西省": {"code": "610000", "type": "province"}, "甘肃省": {"code": "620000", "type": "province"}, "青海省": {"code": "630000", "type": "province"}, "台湾省": {"code": "710000", "type": "province"}, # 自治区(5个) "内蒙古自治区": {"code": "150000", "type": "province"}, "广西壮族自治区": {"code": "450000", "type": "province"}, "西藏自治区": {"code": "540000", "type": "province"}, "宁夏回族自治区": {"code": "640000", "type": "province"}, "新疆维吾尔自治区": {"code": "650000", "type": "province"}, # 特别行政区 "香港特别行政区": {"code": "810000", "type": "province"}, "澳门特别行政区": {"code": "820000", "type": "province"} } def parse_numeric(value): """增强型数值解析函数(处理空值和特殊符号)[1,8](@ref)""" try: cleaned = str(value).strip().replace(',', '') if cleaned in ('-', '', 'NA', 'NaN'): return Decimal('0.00') return Decimal(cleaned).quantize(Decimal('0.00')) except Exception as e: print(f"数值解析失败:{value},错误:{str(e)}") return Decimal('0.00') def generate_region_sql(input_file, output_file, year_month): """生成行政区划贸易数据SQL脚本(支持.xls格式)[1,7](@ref)""" workbook = xlrd.open_workbook(input_file) sheet = workbook.sheet_by_index(0) sql_scripts = [] for row_idx in range(6, sheet.nrows): row = sheet.row_values(row_idx) region_name = str(row[1]).strip() if len(row) > 1 else "" if not region_name: continue # 处理地区简称(保持原有逻辑) region_info = REGION_MAPPING.get(region_name) or next( (v for k, v in REGION_MAPPING.items() if region_name in k), None ) if not region_info: print(f"警告:未找到地区编码映射 - {region_name}") continue data_fields = { 'monthly_total': parse_numeric(row[2]), 'monthly_export': parse_numeric(row[4]), 'monthly_import': parse_numeric(row[6]), 'ytd_total': parse_numeric(row[8]), 'ytd_export': parse_numeric(row[9]), 'ytd_import': parse_numeric(row[10]) } # 生成SQL模板[5](@ref) sql_template = f""" INSERT INTO `t_yujin_crossborder_region_trade` (`year_month`, `region_code`, `region_name`, `region_type`, `monthly_total`, `monthly_import`, `monthly_export`, `ytd_total`, `ytd_import`, `ytd_export`) VALUES ( '{year_month}', '{region_info['code']}', '{region_name}', '{region_info['type']}', {data_fields['monthly_total']}, {data_fields['monthly_import']}, {data_fields['monthly_export']}, {data_fields['ytd_total']}, {data_fields['ytd_import']}, {data_fields['ytd_export']} ) ON DUPLICATE KEY UPDATE monthly_total = VALUES(monthly_total), monthly_import = VALUES(monthly_import), monthly_export = VALUES(monthly_export), ytd_total = VALUES(ytd_total), ytd_import = VALUES(ytd_import), ytd_export = VALUES(ytd_export); """ sql_scripts.append(sql_template) # 写入SQL文件 with open(output_file, 'w', encoding='utf-8') as f: f.write('\n'.join(sql_scripts)) return len(sql_scripts) # 执行示例(需提前安装xlrd==1.2.0) if __name__ == "__main__": input_excel = r"D:/Downloads/2025051809411226553.xls" output_sql = "../downloads/region_trade04.sql" year_month = "2025-04" try: count = generate_region_sql(input_excel, output_sql, year_month) print(f"成功生成{count}条SQL语句,已保存至{output_sql}") except Exception as e: print(f"生成失败:{str(e)}")