crawl_gov_commodity.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import pandas as pd
  2. from crossborder.utils import base_mysql
  3. from crossborder.utils.log import get_logger
  4. log = get_logger(__name__)
  5. def generate_sql_from_excel(excel_file):
  6. # 读取 Excel 文件
  7. df = pd.read_excel(excel_file)
  8. sql_arr = []
  9. auto_increment = 0
  10. code = 0
  11. for index, row in df.iterrows():
  12. commodity_code = str(row.values[4])
  13. level = 0
  14. for i in range(4):
  15. if not pd.isna(row.values[i]):
  16. if 0 == i:
  17. auto_increment += 1
  18. code = 'C0' + str(auto_increment)
  19. level = i + 1
  20. commodity_name = str(row.values[i]).rstrip('*')
  21. commodity_name = commodity_name.replace('(', '(').replace(')', ')')
  22. sql = f"INSERT INTO t_yujin_crossborder_prov_commodity_category (code, level,commodity_code, commodity_name, create_time) VALUES ('{code}', {level}, '{commodity_code}', '{commodity_name}', now());"
  23. sql_arr.append(sql)
  24. log.info(sql)
  25. log.info(f"√ 成功生成 commodity category SQL 文件 size {len(sql_arr)} ")
  26. # 解析完后生成sql文件批量入库
  27. base_mysql.bulk_insert(sql_arr)
  28. log.info("trade SQL 存表完成!")
  29. # 商品书数据初始化
  30. excel_file = 'C:\\Users\\admin\\Desktop\\海关总署数据梳理.xlsx' # 输入的 Excel 文件
  31. generate_sql_from_excel(excel_file)