detail.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. # ---------------------------- 核心解析逻辑修改 ----------------------------
  2. from quanguo.CountryTradeYear import COUNTRY_CODE_MAPPING
  3. from utils.parse_utils import clean_county_name
  4. def chinese_class_to_number(class_str):
  5. """精确匹配中文类名"""
  6. cn_num_map = {
  7. '一': 1, '二': 2, '三': 3, '四': 4, '五': 5, '六': 6, '七': 7, '八': 8, '九': 9,
  8. '十': 10, '十一': 11, '十二': 12, '十三': 13, '十四': 14, '十五': 15, '十六': 16,
  9. '十七': 17, '十八': 18, '十九': 19, '二十': 20, '二十一': 21, '二十二': 22
  10. }
  11. match = re.match(r'^第([一二三四五六七八九十百千万]+)类$', class_str)
  12. return cn_num_map.get(match.group(1), 0) if match else 0
  13. def parse_hscode(row_value):
  14. """增强版HS编码解析,支持更多格式"""
  15. global current_class
  16. # 清洗输入值(新增删除中文括号内容)
  17. cell_value = str(row_value).strip()
  18. cell_value = re.sub(r'\s+', ' ', cell_value) # 统一空格
  19. cell_value = re.sub(r'[((].*[))]', '', cell_value) # 删除括号内容
  20. if not cell_value:
  21. return None
  22. # 匹配中文类名(如 “第一类 动物”)
  23. class_cn_match = re.match(r'^第([一二三四五六七八九十百千万]+)类', cell_value)
  24. if class_cn_match:
  25. class_num = chinese_class_to_number(f"第{class_cn_match.group(1)}类")
  26. current_class = f"{class_num:02d}"
  27. return current_class
  28. # 匹配数字类名(如 "第5类 矿产品")
  29. class_digit_match = re.match(r'^第\s*(\d+)\s*类', cell_value)
  30. if class_digit_match:
  31. class_num = class_digit_match.group(1)
  32. current_class = f"{int(class_num):02d}"
  33. return current_class
  34. # 匹配独立章名(如 "01章 活动物" 或 "第一章 工业制成品")
  35. chapter_match = re.match(r'^(?:第)?\s*([一二三四五六七八九十百千万]+|\d{1,2})\s*章', cell_value)
  36. if chapter_match:
  37. chapter_str = chapter_match.group(1)
  38. if not chapter_str.isdigit():
  39. chapter_digit = chinese_class_to_number(chapter_str + "章")
  40. else:
  41. chapter_digit = int(chapter_str)
  42. if current_class:
  43. return f"{current_class}{chapter_digit:02d}"
  44. else:
  45. return f"{chapter_digit:02d}"
  46. return None
  47. # ---------------------------- 完整代码集成 ----------------------------
  48. import re
  49. import openpyxl
  50. from decimal import Decimal
  51. import pandas as pd
  52. # ---------------------------- 全局配置 ----------------------------
  53. current_class = None
  54. year_month = '2025-04'
  55. def parse_value(value):
  56. """增强型数值解析,处理空值和特殊符号"""
  57. try:
  58. cleaned = str(value).strip().replace(',', '')
  59. # 处理空值和"-"
  60. if cleaned in ('', '-', 'NA', 'None'):
  61. return 0 # 关键修改:将"-"转换为0
  62. return Decimal(cleaned).quantize(Decimal('0.0000'))
  63. except Exception:
  64. return 0 # 所有异常情况返回0
  65. def generate_country_sql(file_path, trade_type):
  66. """生成国家维度SQL脚本(支持数字类名)"""
  67. df = pd.read_excel(file_path, engine='xlrd')
  68. converted_path = f"converted_{trade_type}.xlsx"
  69. df.to_excel(converted_path, index=False)
  70. wb = openpyxl.load_workbook(converted_path)
  71. sheet = wb.active
  72. sql_scripts = []
  73. global current_class
  74. # 遍历行数据(从第7行开始)
  75. for row_idx, row in enumerate(sheet.iter_rows(min_row=7, values_only=True), start=7):
  76. if not row[1]:
  77. continue
  78. # 解析HS编码(核心修改)
  79. hs_code = parse_hscode(row[1])
  80. if not hs_code:
  81. print(f"[行{row_idx}] 忽略无效行:{row[1]}")
  82. continue
  83. # 国家数据处理(保持原逻辑)
  84. country_data = []
  85. for col_idx in range(2, len(row), 2):
  86. # 获取国家名称(假设国家名在表头第4行)
  87. country_name = sheet.cell(row=4, column=col_idx + 1).value
  88. if not country_name:
  89. continue
  90. country_name = clean_county_name(country_name)
  91. country_code = COUNTRY_CODE_MAPPING.get(country_name, 'XX')
  92. if country_code == 'XX':
  93. # print(f"[行{row_idx}] 忽略未知国家:{country_name}")
  94. continue
  95. # 关键修改:读取国家对应的month和cumulative列
  96. month_val = parse_value(row[col_idx])
  97. cumulative_val = parse_value(row[col_idx + 1]) if col_idx + 1 < len(row) else 0
  98. country_data.append({
  99. 'country_code': country_code,
  100. 'country_name': country_name,
  101. 'month': month_val,
  102. 'cumulative': cumulative_val # 添加累计值
  103. })
  104. # 生成SQL(新增数字类兼容)
  105. if country_data:
  106. try:
  107. for country in country_data:
  108. # 使用国家专属的month/cumulative值(关键修改)
  109. sql = f"""INSERT INTO `t_yujin_crossborder_commodity_country`
  110. (`year_month`,`country_code`,`country_name`,`hs_code`,`trade_type`,
  111. `month_amount`,`cumulative_amount`)
  112. VALUES (
  113. '{year_month}',
  114. '{country['country_code']}',
  115. '{country['country_name']}',
  116. '{hs_code}',
  117. '{trade_type}',
  118. {country['month']}, # 使用国家维度数据
  119. {country['cumulative']} # 使用国家维度数据
  120. ) ON DUPLICATE KEY UPDATE
  121. month_amount = VALUES(month_amount),
  122. cumulative_amount = VALUES(cumulative_amount);"""
  123. sql_scripts.append(sql)
  124. except Exception as e:
  125. print(f"[行{row_idx}] 数据异常:{str(e)}")
  126. return sql_scripts
  127. # 执行入口保持不变
  128. if __name__ == "__main__":
  129. export_sql = generate_country_sql(r"C:/Users/admin/PyCharmMiscProject/cross/2025051809572148978.xls", "export")
  130. import_sql = generate_country_sql(r"C:/Users/admin/PyCharmMiscProject/cross/2025051809593876500.xls", "import")
  131. with open('../downloads/commodity_country04.sql', 'w', encoding='utf-8') as f:
  132. f.write('\n'.join(export_sql + import_sql))
  133. print(f"生成完成,出口{len(export_sql)}条,进口{len(import_sql)}条")