| 
					
				 | 
			
			
				@@ -1,3 +1,4 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+import argparse 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import os 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import random 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import re 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -15,12 +16,12 @@ from selenium.webdriver.common.by import By 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.support import expected_conditions as EC 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from selenium.webdriver.support.ui import WebDriverWait 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from zhejiang import download_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from zhejiang import gov_commodity_zhejiang_city 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from zhejiang import gov_commodity_zhejiang_country 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from zhejiang import gov_commodity_zhejiang_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from utils.log import log 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.zhejiang import download_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.zhejiang import gov_commodity_zhejiang_city 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.zhejiang import gov_commodity_zhejiang_country 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.zhejiang import gov_commodity_zhejiang_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils.log import log 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def configure_stealth_options(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """增强型反检测配置[1,4](@ref)""" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -120,7 +121,7 @@ def process_month_tabs(driver, year, base_url, year_month): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         retry_count += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if tar_month != month_text: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        log.info(f"{year}年 {month_text} 月份跳过, increment tar: {year_month}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        log.info(f"{year}年 {month_text} 月份跳过, auto tar: {year_month}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 a_tag.click() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -283,7 +284,7 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     driver = webdriver.Firefox(options=configure_stealth_options()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     year_month = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if 'increment' == mark: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if 'auto' == mark: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         res = detect_latest_month(driver, url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if res is None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             log.info("浙江省海关没有最新数据更新") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -380,17 +381,14 @@ def hierarchical_traversal(root_path): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 gov_commodity_zhejiang_city.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'increment') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # log.info(f"浙江杭州海关全量数据下载任务完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # # 等待5s后执行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # hierarchical_traversal(download_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # log.info("浙江杭州海关类章、国家、城市所有文件处理完成!") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # base_mysql.update_january_yoy('浙江省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # base_mysql.update_shandong_yoy('浙江省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    # log.info("浙江杭州海关城市同比sql处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    parser.add_argument("mode", choices=["all", "auto"], help="运行模式") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    if args.mode == "all": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     main() 
			 |