import os from decimal import Decimal import pymysql import xlrd from pymysql import Error YEAR = 2025 DB_CONFIG = { 'host': '10.130.75.149', 'port': 3307, 'user': 'yto_crm', 'password': '%3sFUlsolaRI', 'database': 'crm_uat', 'charset': 'utf8mb4' } # 完整省级行政区划映射(GB/T 2260-2023) REGION_MAPPING = { # 直辖市 "北京市": {"code": "110000", "type": "province"}, "天津市": {"code": "120000", "type": "province"}, "上海市": {"code": "310000", "type": "province"}, "重庆市": {"code": "500000", "type": "province"}, # 省份(23个) "河北省": {"code": "130000", "type": "province"}, "山西省": {"code": "140000", "type": "province"}, "辽宁省": {"code": "210000", "type": "province"}, "吉林省": {"code": "220000", "type": "province"}, "黑龙江省": {"code": "230000", "type": "province"}, "江苏省": {"code": "320000", "type": "province"}, "浙江省": {"code": "330000", "type": "province"}, "安徽省": {"code": "340000", "type": "province"}, "福建省": {"code": "350000", "type": "province"}, "江西省": {"code": "360000", "type": "province"}, "山东省": {"code": "370000", "type": "province"}, "河南省": {"code": "410000", "type": "province"}, "湖北省": {"code": "420000", "type": "province"}, "湖南省": {"code": "430000", "type": "province"}, "广东省": {"code": "440000", "type": "province"}, "海南省": {"code": "460000", "type": "province"}, "四川省": {"code": "510000", "type": "province"}, "贵州省": {"code": "520000", "type": "province"}, "云南省": {"code": "530000", "type": "province"}, "陕西省": {"code": "610000", "type": "province"}, "甘肃省": {"code": "620000", "type": "province"}, "青海省": {"code": "630000", "type": "province"}, "台湾省": {"code": "710000", "type": "province"}, # 自治区(5个) "内蒙古自治区": {"code": "150000", "type": "province"}, "广西壮族自治区": {"code": "450000", "type": "province"}, "西藏自治区": {"code": "540000", "type": "province"}, "宁夏回族自治区": {"code": "640000", "type": "province"}, "新疆维吾尔自治区": {"code": "650000", "type": "province"}, # 特别行政区 "香港特别行政区": {"code": "810000", "type": "province"}, "澳门特别行政区": {"code": "820000", "type": "province"} } def parse_numeric(value): """数值解析(增强容错),保留四位小数""" try: cleaned = str(value).strip().replace(',', '').replace('—', '-') if cleaned in ('-', '', 'NA', 'N/A'): return Decimal('0.0000') return Decimal(cleaned).quantize(Decimal('0.0000')) except Exception as e: print(f"数值解析失败:{value},错误:{str(e)}") return Decimal('0.0000') def batch_upsert(conn, file_path, year_month): """批量更新插入数据""" try: workbook = xlrd.open_workbook(file_path) sheet = workbook.sheet_by_index(0) except Exception as e: print(f"文件读取失败:{file_path}\n错误:{str(e)}") return 0 cursor = conn.cursor() processed = 0 # 从第7行开始读取(索引6) for row_idx in range(6, sheet.nrows): try: row = sheet.row_values(row_idx) region_name = str(row[1]).strip() # 跳过空行和标题行 if not region_name or region_name == "地区": continue # 精确匹配行政区划 region_info = REGION_MAPPING.get(region_name, None) if not region_info: # 尝试模糊匹配(处理名称变体) region_info = next( (v for k in REGION_MAPPING if region_name in k), None) if not region_info: # print(f"地区匹配失败:{region_name}") continue print(f"处理地区 - {region_name}") # 构建参数元组 params = ( year_month, region_info['code'], # region_code region_name, region_info['type'], parse_numeric(row[2]), # monthly_total parse_numeric(row[4]), # monthly_import parse_numeric(row[6]), # monthly_export parse_numeric(row[8]), # ytd_total parse_numeric(row[9]), # ytd_import parse_numeric(row[10]) # ytd_export ) # 执行插入更新 cursor.execute( """ INSERT INTO `t_yujin_crossborder_region_trade` (`year_month`, `region_code`, `region_name`, `region_type`, `monthly_total`,`monthly_export`, `monthly_import`, `ytd_total`, `ytd_export`, `ytd_import`) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY UPDATE region_name = VALUES (region_name), region_type = VALUES (region_type), monthly_total = VALUES (monthly_total), monthly_import = VALUES (monthly_import), monthly_export = VALUES (monthly_export), ytd_total = VALUES (ytd_total), ytd_import = VALUES (ytd_import), ytd_export = VALUES (ytd_export) """, params) processed += 1 except Exception as e: print(f"行{row_idx}处理失败:{str(e)}") continue conn.commit() cursor.close() return processed def main(): """主执行流程""" try: # 建立数据库连接 conn = pymysql.connect(**DB_CONFIG) # 按月份顺序处理 for month in range(1, 13): # 构建文件路径 folder = f"downloads/{YEAR}/{month:02d}月" file_name = f"(8){YEAR}年进出口商品收发货人所在地总值表.xls" file_path = os.path.join(folder, file_name) file_path = os.path.normpath(file_path) # 检查文件存在性 if not os.path.isfile(file_path): print(f"文件不存在,跳过:{file_path}") continue # 生成年月标识 year_month = f"{YEAR}-{month:02d}" # print(f"⌛ 正在处理 {year_month} 数据...") try: count = batch_upsert(conn, file_path, year_month) print(f"✅ 成功更新 {year_month} 数据,影响{count}条记录") except Exception as e: print(f"❌ {year_month} 处理失败:{str(e)}") conn.rollback() except Error as e: print(f"数据库连接失败:{str(e)}") finally: if conn and conn.open: conn.close() print("🏁 所有月份处理完成") if __name__ == "__main__": main()