crawl_gov_commodity.py 1.3 KB

123456789101112131415161718192021222324252627282930313233
  1. import pandas as pd
  2. from com.zf.crawl import base_mysql
  3. def generate_sql_from_excel(excel_file):
  4. # 读取 Excel 文件
  5. df = pd.read_excel(excel_file)
  6. sql_arr = []
  7. auto_increment = 0
  8. code = 0
  9. for index, row in df.iterrows():
  10. commodity_code = str(row.values[4])
  11. level = 0
  12. for i in range(4):
  13. if not pd.isna(row.values[i]):
  14. if 0 == i:
  15. auto_increment += 1
  16. code = 'C0' + str(auto_increment)
  17. level = i + 1
  18. commodity_name = str(row.values[i]).rstrip('*')
  19. commodity_name = commodity_name.replace('(', '(').replace(')', ')')
  20. sql = f"INSERT INTO t_yujin_crossborder_prov_commodity_category (code, level,commodity_code, commodity_name, create_time) VALUES ('{code}', {level}, '{commodity_code}', '{commodity_name}', now());"
  21. sql_arr.append(sql)
  22. print(sql)
  23. print(f"√ 成功生成 commodity category SQL 文件 size {len(sql_arr)} ")
  24. # 解析完后生成sql文件批量入库
  25. base_mysql.bulk_insert(sql_arr)
  26. print("trade SQL 存表完成!")
  27. # 商品书数据初始化
  28. excel_file = 'C:\\Users\\admin\\Desktop\\海关总署数据梳理.xlsx' # 输入的 Excel 文件
  29. generate_sql_from_excel(excel_file)