monthData.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126
  1. import re
  2. import xlrd
  3. OUTPUT_SQL = "../downloads/trade_monthly04.sql" # 输出文件名
  4. def is_2025_data(date_str):
  5. """判断是否为2025年数据[1,5](@ref)"""
  6. try:
  7. return date_str.startswith('2025.')
  8. except:
  9. return False
  10. def convert_unit(value):
  11. """亿元转万元,处理空值和异常[1,4](@ref)"""
  12. try:
  13. return round(float(value) * 10000, 4) if value not in ['-', ''] else None
  14. except:
  15. return None
  16. def parse_ratio(value):
  17. """处理百分比数据[5](@ref)"""
  18. return f"'{value}'" if value not in ['-', ''] else 'NULL'
  19. def generate_sql(data_group):
  20. """动态生成SQL"""
  21. # 基础数据行
  22. base_row = data_group[0]
  23. year_month = base_row[1].replace('.', '-')
  24. # 月度数据转换
  25. monthly_total = convert_unit(base_row[2])
  26. monthly_import = convert_unit(base_row[4]) # 第5列是进口
  27. monthly_export = convert_unit(base_row[3]) # 第4列是出口
  28. trade_balance = convert_unit(base_row[5])
  29. ytd_total = convert_unit(base_row[6])
  30. ytd_import = convert_unit(base_row[8])
  31. ytd_export = convert_unit(base_row[7])
  32. ytd_balance = convert_unit(base_row[9])
  33. # 初始化同比数据
  34. yoy_data = {'import_export': 'NULL', 'import': 'NULL', 'export': 'NULL'}
  35. # 处理2025年三行数据
  36. if len(data_group) == 3:
  37. # 同比数据在第2行的3-5列
  38. yoy_row = data_group[1]
  39. yoy_data = {
  40. 'import_export': parse_ratio(yoy_row[2]), # 第4列
  41. 'import': parse_ratio(yoy_row[4]), # 第5列
  42. 'export': parse_ratio(yoy_row[3]) # 第6列
  43. }
  44. return f"""
  45. INSERT INTO `t_yujin_crossborder_monthly_summary`
  46. (`year_month`, `monthly_total`, `monthly_import`, `monthly_export`, `trade_balance`,
  47. `ytd_total`, `ytd_import`, `ytd_export`, `ytd_trade_balance`,
  48. `yoy_import_export`, `yoy_import`, `yoy_export`, `create_time`)
  49. VALUES (
  50. '{year_month}',
  51. {monthly_total or 'NULL'},
  52. {monthly_import or 'NULL'},
  53. {monthly_export or 'NULL'},
  54. {trade_balance or 'NULL'},
  55. {ytd_total or 'NULL'},
  56. {ytd_import or 'NULL'},
  57. {ytd_export or 'NULL'},
  58. {ytd_balance or 'NULL'},
  59. {yoy_data['import_export']},
  60. {yoy_data['import']},
  61. {yoy_data['export']},
  62. NOW()
  63. );
  64. """
  65. def main():
  66. workbook = xlrd.open_workbook('D:/Downloads/2025051809220574256.xls')
  67. sheet = workbook.sheet_by_index(0)
  68. with open(OUTPUT_SQL, 'w', encoding='utf-8') as f:
  69. pass
  70. row_idx = 5 # 数据起始行
  71. while row_idx < sheet.nrows:
  72. try:
  73. # 获取基础行
  74. base_row = sheet.row_values(row_idx)
  75. date_cell = str(base_row[1])
  76. if not re.match(r"202[0-9]\.\d{2}", date_cell): # 跳过无效行
  77. row_idx += 1
  78. continue
  79. # 动态读取数据组
  80. if is_2025_data(date_cell):
  81. data_group = [
  82. base_row,
  83. sheet.row_values(row_idx + 1),
  84. sheet.row_values(row_idx + 2)
  85. ]
  86. step = 3
  87. else:
  88. data_group = [base_row]
  89. step = 1
  90. sql = generate_sql(data_group)
  91. with open(OUTPUT_SQL, "a", encoding="utf-8") as f:
  92. f.write(sql + "\n")
  93. print(f"成功生成 {len(sql)} 条SQL语句,保存至:{OUTPUT_SQL}")
  94. print(sql)
  95. row_idx += step
  96. except IndexError:
  97. print(f"行{row_idx}数据不完整,已跳过")
  98. row_idx += 1
  99. except Exception as e:
  100. print(f"处理行{row_idx}出错:{str(e)}")
  101. row_idx += 1
  102. if __name__ == "__main__":
  103. main()