|
@@ -15,17 +15,17 @@ from selenium.webdriver import FirefoxOptions
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.common.by import By
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.webdriver.support import expected_conditions as EC
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
from selenium.webdriver.support.ui import WebDriverWait
|
|
-import gov_commodity_jiangsu_country
|
|
|
|
-import gov_commodity_jiangsu_city
|
|
|
|
-import gov_commodity_jiangsu_import_export
|
|
|
|
|
|
+
|
|
|
|
+from jiangsu import download_dir
|
|
|
|
+from jiangsu import gov_commodity_jiangsu_country
|
|
|
|
+from jiangsu import gov_commodity_jiangsu_city
|
|
|
|
+from jiangsu import gov_commodity_jiangsu_import_export
|
|
|
|
|
|
from utils import base_country_code, base_mysql
|
|
from utils import base_country_code, base_mysql
|
|
from utils.log import log
|
|
from utils.log import log
|
|
|
|
|
|
# rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe"
|
|
# rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe"
|
|
rarfile.UNRAR_EXECUTABLE = "unrar"
|
|
rarfile.UNRAR_EXECUTABLE = "unrar"
|
|
-download_dir = base_country_code.download_dir
|
|
|
|
-Path(download_dir).mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
def configure_stealth_options():
|
|
def configure_stealth_options():
|
|
"""增强型反检测配置[1,4](@ref)"""
|
|
"""增强型反检测配置[1,4](@ref)"""
|
|
@@ -217,7 +217,8 @@ def crawl_with_selenium(url, mark):
|
|
res = detect_latest_month(driver, url)
|
|
res = detect_latest_month(driver, url)
|
|
if res is None:
|
|
if res is None:
|
|
log.info("江苏省海关没有最新数据更新")
|
|
log.info("江苏省海关没有最新数据更新")
|
|
- sys.exit(0)
|
|
|
|
|
|
+ # sys.exit(0)
|
|
|
|
+ return
|
|
year_month = res
|
|
year_month = res
|
|
print(f"检测到最新有效数据:{year_month}")
|
|
print(f"检测到最新有效数据:{year_month}")
|
|
|
|
|
|
@@ -266,6 +267,16 @@ def crawl_with_selenium(url, mark):
|
|
|
|
|
|
finally:
|
|
finally:
|
|
driver.quit()
|
|
driver.quit()
|
|
|
|
+ log.info(f"江苏南京海关全量数据下载任务完成")
|
|
|
|
+ # 等待5s后执行
|
|
|
|
+ time.sleep(5)
|
|
|
|
+ all_records = base_mysql.get_hs_all()
|
|
|
|
+ hierarchical_traversal(download_dir, all_records)
|
|
|
|
+ log.info("江苏南京海关类章、国家、城市所有文件处理完成!")
|
|
|
|
+ time.sleep(5)
|
|
|
|
+ base_mysql.update_january_yoy('江苏省')
|
|
|
|
+ base_mysql.update_shandong_yoy('江苏省')
|
|
|
|
+ log.info("江苏南京海关城市同比sql处理完成")
|
|
|
|
|
|
|
|
|
|
def wait_for_download_complete(timeout=30, existing_files=None):
|
|
def wait_for_download_complete(timeout=30, existing_files=None):
|
|
@@ -321,17 +332,19 @@ def hierarchical_traversal(root_path, all_records):
|
|
gov_commodity_jiangsu_country.process_folder(md['path'])
|
|
gov_commodity_jiangsu_country.process_folder(md['path'])
|
|
gov_commodity_jiangsu_city.process_folder(md['path'])
|
|
gov_commodity_jiangsu_city.process_folder(md['path'])
|
|
|
|
|
|
-if __name__ == "__main__":
|
|
|
|
|
|
+def main():
|
|
crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'all')
|
|
crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'all')
|
|
# crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'increment')
|
|
# crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html', 'increment')
|
|
- log.info(f"江苏南京海关全量数据下载任务完成")
|
|
|
|
- # 等待5s后执行
|
|
|
|
- time.sleep(5)
|
|
|
|
- all_records = base_mysql.get_hs_all()
|
|
|
|
- hierarchical_traversal(base_country_code.download_dir, all_records)
|
|
|
|
- log.info("江苏南京海关类章、国家、城市所有文件处理完成!")
|
|
|
|
- time.sleep(5)
|
|
|
|
- base_mysql.update_january_yoy('江苏省')
|
|
|
|
- base_mysql.update_shandong_yoy('江苏省')
|
|
|
|
- log.info("江苏南京海关城市同比sql处理完成")
|
|
|
|
-
|
|
|
|
|
|
+ # log.info(f"江苏南京海关全量数据下载任务完成")
|
|
|
|
+ # # 等待5s后执行
|
|
|
|
+ # time.sleep(5)
|
|
|
|
+ # all_records = base_mysql.get_hs_all()
|
|
|
|
+ # hierarchical_traversal(base_country_code.download_dir, all_records)
|
|
|
|
+ # log.info("江苏南京海关类章、国家、城市所有文件处理完成!")
|
|
|
|
+ # time.sleep(5)
|
|
|
|
+ # base_mysql.update_january_yoy('江苏省')
|
|
|
|
+ # base_mysql.update_shandong_yoy('江苏省')
|
|
|
|
+ # log.info("江苏南京海关城市同比sql处理完成")
|
|
|
|
+
|
|
|
|
+if __name__ == '__main__':
|
|
|
|
+ main()
|