commodity_trade.py 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import re
  2. import openpyxl
  3. from decimal import Decimal
  4. import pandas as pd
  5. from quanguo.detail import parse_hscode
  6. def chinese_class_to_number(class_str):
  7. """精确匹配中文类名"""
  8. cn_num_map = {
  9. '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9,
  10. '十': 10, '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15, '十六': 16,
  11. '十七': 17, '十八': 18, '十九': 19, '二十': 20, '二十一': 21, '二十二': 22
  12. }
  13. match = re.match(r'^第([一二三四五六七八九十百千万]+)类$', class_str)
  14. return cn_num_map.get(match.group(1), 0) if match else 0
  15. def parse_value(val):
  16. """数值处理(包含科学计数法处理)"""
  17. if val in ('-', None, 'None', 'null'):
  18. return 'NULL'
  19. try:
  20. # 处理科学计数法(如1.2E+5)
  21. if 'E' in str(val).upper():
  22. return Decimal(f"{float(val):.2f}").quantize(Decimal('0.00'))
  23. return Decimal(str(val).replace(',', '')).quantize(Decimal('0.00'))
  24. except Exception as e:
  25. print(f"数值解析错误:{val},错误:{e}")
  26. return 'NULL'
  27. # 数据预处理
  28. df = pd.read_excel(r"D:/Downloads/2025051809313028789.xls", engine="xlrd")
  29. df.to_excel("converted.xlsx", index=False)
  30. wb = openpyxl.load_workbook('converted.xlsx')
  31. sheet = wb.active
  32. current_class = None
  33. year_month = '2025-04'
  34. sql_scripts = []
  35. # 遍历所有数据行(示例数据从第7行开始)
  36. for row_idx, row in enumerate(sheet.iter_rows(min_row=7, values_only=True), start=7):
  37. if not row[1]: continue
  38. hs_code = parse_hscode(row[1])
  39. if not hs_code:
  40. print(f"[行{row_idx}] 忽略无效行:{row[1]}")
  41. continue
  42. # 类行数据插入
  43. try:
  44. monthly_export = parse_value(row[2])
  45. ytd_export = parse_value(row[3])
  46. monthly_import = parse_value(row[4])
  47. ytd_import = parse_value(row[5])
  48. ytd_yoy_export = parse_value(row[6])
  49. ytd_yoy_import = parse_value(row[7])
  50. except IndexError as e:
  51. print(f"行{row_idx} 列索引错误:{e}")
  52. continue
  53. sql = f"""INSERT INTO `t_yujin_crossborder_commodity_trade`
  54. (`year_month`, `hs_code`, `monthly_import`, `monthly_export`,
  55. `ytd_import`, `ytd_export`, `ytd_yoy_import`, `ytd_yoy_export`)
  56. VALUES ('{year_month}', '{hs_code}',
  57. {monthly_import}, {monthly_export},
  58. {ytd_import}, {ytd_export},
  59. {ytd_yoy_import}, {ytd_yoy_export})
  60. ON DUPLICATE KEY UPDATE
  61. monthly_import = VALUES(monthly_import),
  62. monthly_export = VALUES(monthly_export),
  63. ytd_import = VALUES(ytd_import),
  64. ytd_export = VALUES(ytd_export),
  65. ytd_yoy_import = VALUES(ytd_yoy_import),
  66. ytd_yoy_export = VALUES(ytd_yoy_export);"""
  67. sql_scripts.append(sql)
  68. continue
  69. # 结果输出
  70. with open('../downloads/commodity_trade04.sql', 'w', encoding='utf-8') as f:
  71. f.write('\n'.join(sql_scripts))
  72. print(f"生成完成,共处理 {len(sql_scripts)} 条有效记录")