| 
					
				 | 
			
			
				@@ -17,6 +17,8 @@ from crossborder.anhui import gov_commodity_anhui_city, download_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.anhui import gov_commodity_anhui_country 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.anhui import gov_commodity_anhui_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils.base_country_code import get_last_month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from crossborder.utils.base_mysql import get_commodity_trade_by_prov_year_month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils.dingtalk import send_dingtalk_message 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from crossborder.utils.log import  get_logger 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -236,7 +238,7 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 获取下一页的URL 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             next_page_url = next_page_btn.get_attribute("onclick") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if not next_page_url: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                log.info("已到达最后一页,停止爬取") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info("已到达最后一页,停止采集") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 从onclick属性中提取URL 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             next_page_url = re.search(r"'(.*?)'", next_page_url).group(1) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -247,7 +249,7 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 访问下一页 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             driver.get(next_page_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            log.info(f"开始爬取 {next_page_url} 页面数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info(f"开始采集 {next_page_url} 页面数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     finally: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.quit() 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -322,22 +324,34 @@ def hierarchical_traversal(root_path): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 gov_commodity_anhui_city.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    parser = argparse.ArgumentParser(description='海关数据智能抓取系统') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    start_time = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    if args.year == 2023: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        log.info("正在全量爬取安徽省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','all') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        duration = time.time() - start_time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        send_dingtalk_message(f'安徽省海关全量数据爬取完成,耗时 {duration:.2f} 秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        log.info("正在增量爬取安徽省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        res = crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if res == 'finish': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        parser = argparse.ArgumentParser(description='海关数据智能抓取系统') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        args = parser.parse_args() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        start_time = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if args.year == 2023: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info("正在全量采集安徽省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','all') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             duration = time.time() - start_time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            send_dingtalk_message(f'安徽省海关增量数据爬取完成,耗时 {duration:.2f} 秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            minutes, seconds = divmod(duration, 60) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            send_dingtalk_message(f'【安徽省海关】全量数据采集完成,耗时 {int(minutes)}分{seconds:.1f}秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info("正在增量采集安徽省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            res = crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if res == 'finish': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                duration = time.time() - start_time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                minutes, seconds = divmod(duration, 60) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                send_dingtalk_message(f'【安徽省海关】增量数据采集完成,{int(minutes)}分{seconds:.1f}秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                res = get_commodity_trade_by_prov_year_month('安徽省', get_last_month()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if res is not None: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    send_dingtalk_message(f"【安徽省海关】 commodity_trade 查询到 {len(res)} 条记录,文件已生成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    send_dingtalk_message("【安徽省海关】 未查询到任何记录或发生错误") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        send_dingtalk_message(f'【安徽省海关】发生错误:{e}') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     main() 
			 |