region_trade2024.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import os
  2. from decimal import Decimal
  3. import pymysql
  4. import xlrd
  5. from pymysql import Error
  6. YEAR = 2025
  7. DB_CONFIG = {
  8. 'host': '10.130.75.149',
  9. 'port': 3307,
  10. 'user': 'yto_crm',
  11. 'password': '%3sFUlsolaRI',
  12. 'database': 'crm_uat',
  13. 'charset': 'utf8mb4'
  14. }
  15. # 完整省级行政区划映射(GB/T 2260-2023)
  16. REGION_MAPPING = {
  17. # 直辖市
  18. "北京市": {"code": "110000", "type": "province"},
  19. "天津市": {"code": "120000", "type": "province"},
  20. "上海市": {"code": "310000", "type": "province"},
  21. "重庆市": {"code": "500000", "type": "province"},
  22. # 省份(23个)
  23. "河北省": {"code": "130000", "type": "province"},
  24. "山西省": {"code": "140000", "type": "province"},
  25. "辽宁省": {"code": "210000", "type": "province"},
  26. "吉林省": {"code": "220000", "type": "province"},
  27. "黑龙江省": {"code": "230000", "type": "province"},
  28. "江苏省": {"code": "320000", "type": "province"},
  29. "浙江省": {"code": "330000", "type": "province"},
  30. "安徽省": {"code": "340000", "type": "province"},
  31. "福建省": {"code": "350000", "type": "province"},
  32. "江西省": {"code": "360000", "type": "province"},
  33. "山东省": {"code": "370000", "type": "province"},
  34. "河南省": {"code": "410000", "type": "province"},
  35. "湖北省": {"code": "420000", "type": "province"},
  36. "湖南省": {"code": "430000", "type": "province"},
  37. "广东省": {"code": "440000", "type": "province"},
  38. "海南省": {"code": "460000", "type": "province"},
  39. "四川省": {"code": "510000", "type": "province"},
  40. "贵州省": {"code": "520000", "type": "province"},
  41. "云南省": {"code": "530000", "type": "province"},
  42. "陕西省": {"code": "610000", "type": "province"},
  43. "甘肃省": {"code": "620000", "type": "province"},
  44. "青海省": {"code": "630000", "type": "province"},
  45. "台湾省": {"code": "710000", "type": "province"},
  46. # 自治区(5个)
  47. "内蒙古自治区": {"code": "150000", "type": "province"},
  48. "广西壮族自治区": {"code": "450000", "type": "province"},
  49. "西藏自治区": {"code": "540000", "type": "province"},
  50. "宁夏回族自治区": {"code": "640000", "type": "province"},
  51. "新疆维吾尔自治区": {"code": "650000", "type": "province"},
  52. # 特别行政区
  53. "香港特别行政区": {"code": "810000", "type": "province"},
  54. "澳门特别行政区": {"code": "820000", "type": "province"}
  55. }
  56. def parse_numeric(value):
  57. """数值解析(增强容错),保留四位小数"""
  58. try:
  59. cleaned = str(value).strip().replace(',', '').replace('—', '-')
  60. if cleaned in ('-', '', 'NA', 'N/A'):
  61. return Decimal('0.0000')
  62. return Decimal(cleaned).quantize(Decimal('0.0000'))
  63. except Exception as e:
  64. print(f"数值解析失败:{value},错误:{str(e)}")
  65. return Decimal('0.0000')
  66. def batch_upsert(conn, file_path, year_month):
  67. """批量更新插入数据"""
  68. try:
  69. workbook = xlrd.open_workbook(file_path)
  70. sheet = workbook.sheet_by_index(0)
  71. except Exception as e:
  72. print(f"文件读取失败:{file_path}\n错误:{str(e)}")
  73. return 0
  74. cursor = conn.cursor()
  75. processed = 0
  76. # 从第7行开始读取(索引6)
  77. for row_idx in range(6, sheet.nrows):
  78. try:
  79. row = sheet.row_values(row_idx)
  80. region_name = str(row[1]).strip()
  81. # 跳过空行和标题行
  82. if not region_name or region_name == "地区":
  83. continue
  84. # 精确匹配行政区划
  85. region_info = REGION_MAPPING.get(region_name, None)
  86. if not region_info:
  87. # 尝试模糊匹配(处理名称变体)
  88. region_info = next(
  89. (v for k in REGION_MAPPING if region_name in k), None)
  90. if not region_info:
  91. # print(f"地区匹配失败:{region_name}")
  92. continue
  93. print(f"处理地区 - {region_name}")
  94. # 构建参数元组
  95. params = (
  96. year_month,
  97. region_info['code'], # region_code
  98. region_name,
  99. region_info['type'],
  100. parse_numeric(row[2]), # monthly_total
  101. parse_numeric(row[4]), # monthly_import
  102. parse_numeric(row[6]), # monthly_export
  103. parse_numeric(row[8]), # ytd_total
  104. parse_numeric(row[9]), # ytd_import
  105. parse_numeric(row[10]) # ytd_export
  106. )
  107. # 执行插入更新
  108. cursor.execute(
  109. """
  110. INSERT INTO `t_yujin_crossborder_region_trade`
  111. (`year_month`, `region_code`, `region_name`, `region_type`,
  112. `monthly_total`,`monthly_export`, `monthly_import`,
  113. `ytd_total`, `ytd_export`, `ytd_import`)
  114. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s) ON DUPLICATE KEY
  115. UPDATE
  116. region_name =
  117. VALUES (region_name), region_type =
  118. VALUES (region_type), monthly_total =
  119. VALUES (monthly_total), monthly_import =
  120. VALUES (monthly_import), monthly_export =
  121. VALUES (monthly_export), ytd_total =
  122. VALUES (ytd_total), ytd_import =
  123. VALUES (ytd_import), ytd_export =
  124. VALUES (ytd_export)
  125. """, params)
  126. processed += 1
  127. except Exception as e:
  128. print(f"行{row_idx}处理失败:{str(e)}")
  129. continue
  130. conn.commit()
  131. cursor.close()
  132. return processed
  133. def main():
  134. """主执行流程"""
  135. try:
  136. # 建立数据库连接
  137. conn = pymysql.connect(**DB_CONFIG)
  138. # 按月份顺序处理
  139. for month in range(1, 13):
  140. # 构建文件路径
  141. folder = f"downloads/{YEAR}/{month:02d}月"
  142. file_name = f"(8){YEAR}年进出口商品收发货人所在地总值表.xls"
  143. file_path = os.path.join(folder, file_name)
  144. file_path = os.path.normpath(file_path)
  145. # 检查文件存在性
  146. if not os.path.isfile(file_path):
  147. print(f"文件不存在,跳过:{file_path}")
  148. continue
  149. # 生成年月标识
  150. year_month = f"{YEAR}-{month:02d}"
  151. # print(f"⌛ 正在处理 {year_month} 数据...")
  152. try:
  153. count = batch_upsert(conn, file_path, year_month)
  154. print(f"✅ 成功更新 {year_month} 数据,影响{count}条记录")
  155. except Exception as e:
  156. print(f"❌ {year_month} 处理失败:{str(e)}")
  157. conn.rollback()
  158. except Error as e:
  159. print(f"数据库连接失败:{str(e)}")
  160. finally:
  161. if conn and conn.open:
  162. conn.close()
  163. print("🏁 所有月份处理完成")
  164. if __name__ == "__main__":
  165. main()