| 
					
				 | 
			
			
				@@ -256,7 +256,7 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 获取下一页的URL 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             next_page_url = next_page_btn.get_attribute("onclick") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if not next_page_url: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                log.info("已到达最后一页,停止爬取") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info("已到达最后一页,停止采集") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 从onclick属性中提取URL 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             next_page_url = re.search(r"'(.*?)'", next_page_url).group(1) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -267,7 +267,7 @@ def crawl_with_selenium(url, mark): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 访问下一页 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             driver.get(next_page_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            log.info(f"开始爬取 {next_page_url} 页面数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info(f"开始采集 {next_page_url} 页面数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     finally: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.quit() 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -343,16 +343,16 @@ def main(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     start_time = time.time() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if args.year == 2023: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        log.info("正在全量爬取江苏省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info("正在全量采集江苏省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','all') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         duration = time.time() - start_time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        send_dingtalk_message(f'江苏省海关全量数据爬取完成,耗时 {duration:.2f} 秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        send_dingtalk_message(f'【江苏省海关】全量数据采集完成,耗时 {duration:.2f} 秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        log.info("正在增量爬取江苏省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info("正在增量采集江苏省海关数据") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         res = crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if res == 'finish': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             duration = time.time() - start_time 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            send_dingtalk_message(f'江苏省海关增量数据爬取完成,耗时 {duration:.2f} 秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            send_dingtalk_message(f'【江苏省海关】增量数据采集完成,耗时 {duration:.2f} 秒') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == '__main__': 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     main() 
			 |