123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190 |
- import os
- from decimal import Decimal
- import pymysql
- import xlrd
- from pymysql import Error
- YEAR = 2025
- DB_CONFIG = {
- 'host': '10.130.75.149',
- 'port': 3307,
- 'user': 'yto_crm',
- 'password': '%3sFUlsolaRI',
- 'database': 'crm_uat',
- 'charset': 'utf8mb4'
- }
- # 完整省级行政区划映射(GB/T 2260-2023)
- REGION_MAPPING = {
- # 直辖市
- "北京市": {"code": "110000", "type": "province"},
- "天津市": {"code": "120000", "type": "province"},
- "上海市": {"code": "310000", "type": "province"},
- "重庆市": {"code": "500000", "type": "province"},
- # 省份(23个)
- "河北省": {"code": "130000", "type": "province"},
- "山西省": {"code": "140000", "type": "province"},
- "辽宁省": {"code": "210000", "type": "province"},
- "吉林省": {"code": "220000", "type": "province"},
- "黑龙江省": {"code": "230000", "type": "province"},
- "江苏省": {"code": "320000", "type": "province"},
- "浙江省": {"code": "330000", "type": "province"},
- "安徽省": {"code": "340000", "type": "province"},
- "福建省": {"code": "350000", "type": "province"},
- "江西省": {"code": "360000", "type": "province"},
- "山东省": {"code": "370000", "type": "province"},
- "河南省": {"code": "410000", "type": "province"},
- "湖北省": {"code": "420000", "type": "province"},
- "湖南省": {"code": "430000", "type": "province"},
- "广东省": {"code": "440000", "type": "province"},
- "海南省": {"code": "460000", "type": "province"},
- "四川省": {"code": "510000", "type": "province"},
- "贵州省": {"code": "520000", "type": "province"},
- "云南省": {"code": "530000", "type": "province"},
- "陕西省": {"code": "610000", "type": "province"},
- "甘肃省": {"code": "620000", "type": "province"},
- "青海省": {"code": "630000", "type": "province"},
- "台湾省": {"code": "710000", "type": "province"},
- # 自治区(5个)
- "内蒙古自治区": {"code": "150000", "type": "province"},
- "广西壮族自治区": {"code": "450000", "type": "province"},
- "西藏自治区": {"code": "540000", "type": "province"},
- "宁夏回族自治区": {"code": "640000", "type": "province"},
- "新疆维吾尔自治区": {"code": "650000", "type": "province"},
- # 特别行政区
- "香港特别行政区": {"code": "810000", "type": "province"},
- "澳门特别行政区": {"code": "820000", "type": "province"}
- }
- def parse_numeric(value):
- """数值解析(增强容错),保留四位小数"""
- try:
- cleaned = str(value).strip().replace(',', '').replace('—', '-')
- if cleaned in ('-', '', 'NA', 'N/A'):
- return Decimal('0.0000')
- return Decimal(cleaned).quantize(Decimal('0.0000'))
- except Exception as e:
- print(f"数值解析失败:{value},错误:{str(e)}")
- return Decimal('0.0000')
- def batch_upsert(conn, file_path, year_month):
- """批量更新插入数据"""
- try:
- workbook = xlrd.open_workbook(file_path)
- sheet = workbook.sheet_by_index(0)
- except Exception as e:
- print(f"文件读取失败:{file_path}\n错误:{str(e)}")
- return 0
- cursor = conn.cursor()
- processed = 0
- # 从第7行开始读取(索引6)
- for row_idx in range(6, sheet.nrows):
- try:
- row = sheet.row_values(row_idx)
- region_name = str(row[1]).strip()
- # 跳过空行和标题行
- if not region_name or region_name == "地区":
- continue
- # 精确匹配行政区划
- region_info = REGION_MAPPING.get(region_name, None)
- if not region_info:
- # 尝试模糊匹配(处理名称变体)
- region_info = next(
- (v for k in REGION_MAPPING if region_name in k), None)
- if not region_info:
- # print(f"地区匹配失败:{region_name}")
- continue
- print(f"处理地区 - {region_name}")
- # 构建参数元组
- params = (
- year_month,
- region_info['code'], # region_code
- region_name,
- region_info['type'],
- parse_numeric(row[2]), # monthly_total
- parse_numeric(row[4]), # monthly_import
- parse_numeric(row[6]), # monthly_export
- parse_numeric(row[8]), # ytd_total
- parse_numeric(row[9]), # ytd_import
- parse_numeric(row[10]) # ytd_export
- )
- # 执行插入更新
- cursor.execute(
- """
- INSERT INTO `t_yujin_crossborder_region_trade`
- (`year_month`, `region_code`, `region_name`, `region_type`,
- `monthly_total`,`monthly_export`, `monthly_import`,
- `ytd_total`, `ytd_export`, `ytd_import`)
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY
- UPDATE
- region_name =
- VALUES (region_name), region_type =
- VALUES (region_type), monthly_total =
- VALUES (monthly_total), monthly_import =
- VALUES (monthly_import), monthly_export =
- VALUES (monthly_export), ytd_total =
- VALUES (ytd_total), ytd_import =
- VALUES (ytd_import), ytd_export =
- VALUES (ytd_export)
- """, params)
- processed += 1
- except Exception as e:
- print(f"行{row_idx}处理失败:{str(e)}")
- continue
- conn.commit()
- cursor.close()
- return processed
- def main():
- """主执行流程"""
- try:
- # 建立数据库连接
- conn = pymysql.connect(**DB_CONFIG)
- # 按月份顺序处理
- for month in range(1, 13):
- # 构建文件路径
- folder = f"downloads/{YEAR}/{month:02d}月"
- file_name = f"(8){YEAR}年进出口商品收发货人所在地总值表.xls"
- file_path = os.path.join(folder, file_name)
- file_path = os.path.normpath(file_path)
- # 检查文件存在性
- if not os.path.isfile(file_path):
- print(f"文件不存在,跳过:{file_path}")
- continue
- # 生成年月标识
- year_month = f"{YEAR}-{month:02d}"
- # print(f"⌛ 正在处理 {year_month} 数据...")
- try:
- count = batch_upsert(conn, file_path, year_month)
- print(f"✅ 成功更新 {year_month} 数据,影响{count}条记录")
- except Exception as e:
- print(f"❌ {year_month} 处理失败:{str(e)}")
- conn.rollback()
- except Error as e:
- print(f"数据库连接失败:{str(e)}")
- finally:
- if conn and conn.open:
- conn.close()
- print("🏁 所有月份处理完成")
- if __name__ == "__main__":
- main()
|