crawl_gov_commodity.py 1.3 KB

123456789101112131415161718192021222324252627282930313233343536
  1. import pandas as pd
  2. from utils import base_mysql
  3. from utils.log import log
  4. def generate_sql_from_excel(excel_file):
  5. # 读取 Excel 文件
  6. df = pd.read_excel(excel_file)
  7. sql_arr = []
  8. auto_increment = 0
  9. code = 0
  10. for index, row in df.iterrows():
  11. commodity_code = str(row.values[4])
  12. level = 0
  13. for i in range(4):
  14. if not pd.isna(row.values[i]):
  15. if 0 == i:
  16. auto_increment += 1
  17. code = 'C0' + str(auto_increment)
  18. level = i + 1
  19. commodity_name = str(row.values[i]).rstrip('*')
  20. commodity_name = commodity_name.replace('(', '(').replace(')', ')')
  21. sql = f"INSERT INTO t_yujin_crossborder_prov_commodity_category (code, level,commodity_code, commodity_name, create_time) VALUES ('{code}', {level}, '{commodity_code}', '{commodity_name}', now());"
  22. sql_arr.append(sql)
  23. log.info(sql)
  24. log.info(f"√ 成功生成 commodity category SQL 文件 size {len(sql_arr)} ")
  25. # 解析完后生成sql文件批量入库
  26. base_mysql.bulk_insert(sql_arr)
  27. log.info("trade SQL 存表完成!")
  28. # 商品书数据初始化
  29. excel_file = 'C:\\Users\\admin\\Desktop\\海关总署数据梳理.xlsx' # 输入的 Excel 文件
  30. generate_sql_from_excel(excel_file)