| 
					
				 | 
			
			
				@@ -18,6 +18,7 @@ import gov_commodity_zhejiang_city 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import gov_commodity_zhejiang_country 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 import gov_commodity_zhejiang_import_export 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from utils import base_country_code, base_mysql 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from utils.log import log 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 download_dir = base_country_code.download_dir 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 Path(download_dir).mkdir(parents=True, exist_ok=True) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -25,7 +26,7 @@ Path(download_dir).mkdir(parents=True, exist_ok=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def configure_stealth_options(): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     """增强型反检测配置[1,4](@ref)""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     opts = FirefoxOptions() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print("当前下载路径:", Path(download_dir).resolve()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    log.info("当前下载路径:", Path(download_dir).resolve()) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 文件下载配置 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     opts.set_preference("browser.download.dir", download_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     opts.set_preference("browser.download.folderList", 2) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -62,7 +63,7 @@ def crawl_by_year_tabs(driver, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for tab in year_tabs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         year_text = tab.text.strip() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if int(year_text[:4]) <= 2022: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(f"{year_text} 后的数据无需下载") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info(f"{year_text} 后的数据无需下载") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         year_url = tab.get_attribute("href") 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -72,7 +73,7 @@ def crawl_by_year_tabs(driver, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 新标签页打开年份页面 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.execute_script("window.open(arguments[0]);", year_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         driver.switch_to.window(driver.window_handles[-1]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print(f"\n正在处理 {year_text} 年份页面") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"\n正在处理 {year_text} 年份页面") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         process_month_tabs(driver, year_text, base_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -98,7 +99,7 @@ def process_month_tabs(driver, year, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 全量获取所有月份Tab 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             month_items = driver.find_elements(By.XPATH, '//ul[@class="nav_tab"]//li') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if not month_items: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"{year}年没有月份Tab,停止处理") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"{year}年没有月份Tab,停止处理") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             all_found = True 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -114,7 +115,7 @@ def process_month_tabs(driver, year, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if not month_text in target_months: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     continue  # 跳过已处理月份 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"点击月份Tab:{year}-{month_text}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"点击月份Tab:{year}-{month_text}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 a_tag.click() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 # 处理详情页逻辑 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -123,9 +124,9 @@ def process_month_tabs(driver, year, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 detail_link_arr = get_behind_detail_link(driver, base_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if not detail_link_arr: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    print(f"{year}-{month_text} 未找到详情链接") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    log.info(f"{year}-{month_text} 未找到详情链接") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 for detail_link in detail_link_arr: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    print(f"{year}-{month_text} 详情链接:{detail_link}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    log.info(f"{year}-{month_text} 详情链接:{detail_link}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     driver.get(detail_link) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     download_file_from_detail_page(driver) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     driver.back() 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -137,24 +138,24 @@ def process_month_tabs(driver, year, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 found = True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if not found: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"{year}年未找到 {month_text} Tab") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"{year}年未找到 {month_text} Tab") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 all_found = False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if all_found: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"{year}年所有目标月份处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"{year}年所有目标月份处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 # 部分月份未找到,重新获取元素 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 # retry_count += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"第 {retry_count} 次重试获取月份Tab...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"第 {retry_count} 次重试获取月份Tab...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 time.sleep(2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         except StaleElementReferenceException: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print("页面刷新,重新获取月份Tab列表...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info("页面刷新,重新获取月份Tab列表...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # retry_count += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             time.sleep(2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(f"{year}年最终处理的月份:{processed_months}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    log.info(f"{year}年最终处理的月份:{processed_months}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def get_behind_detail_link(driver, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				    """获取点击月份Tab后 conList_ul 下所有 li 的 a 标签完整链接""" 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -170,7 +171,7 @@ def get_behind_detail_link(driver, base_url): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				            href_arr.append(full_url) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        return href_arr 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				    except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-       print(f"获取详情链接失败: {str(e)}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+       log.info(f"获取详情链接失败: {str(e)}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				        return [] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def download_file_from_detail_page(driver): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -181,7 +182,7 @@ def download_file_from_detail_page(driver): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         elements = driver.find_elements(By.XPATH, '//div[@class="easysite-news-content"]//div[@id="easysiteText"]//p//a') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if not elements: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print("详情页未找到目标文件链接") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info("详情页未找到目标文件链接") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for download_btn in elements: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -191,10 +192,10 @@ def download_file_from_detail_page(driver): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             file_url = download_btn.get_attribute("href") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if not file_url.lower().endswith(('.xls', '.xlsx')): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"跳过非 Excel 文件: {file_url}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"跳过非 Excel 文件: {file_url}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(f"正在下载: {file_name} → {file_url}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info(f"正在下载: {file_name} → {file_url}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # 记录下载前的文件列表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             existing_files = set(f.name for f in Path(download_dir).glob('*')) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -207,17 +208,17 @@ def download_file_from_detail_page(driver): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             year, start_month, month = extract_year_and_month(file_name) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             final_path = Path(download_dir) / year / month / f"{file_name}" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if os.path.exists(final_path): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"文件已存在:{file_name} 正在覆盖...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"文件已存在:{file_name} 正在覆盖...") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 os.unlink(final_path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             final_dir = Path(download_dir) / year / month 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             final_dir.mkdir(parents=True, exist_ok=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(f"√ 正在移动文件 {downloaded_file} 至 {final_path}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info(f"√ 正在移动文件 {downloaded_file} 至 {final_path}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             downloaded_file.rename(final_path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            print(f"√ 下载成功:{final_path}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            log.info(f"√ 下载成功:{final_path}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     except Exception as e: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print(f"详情页处理异常: {str(e)}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"详情页处理异常: {str(e)}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def extract_year_and_month(file_name): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 支持两种格式: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -250,10 +251,10 @@ def extract_rar(rar_path, extract_to): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if result.returncode == 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print(f"解压成功: {rar_path} → {extract_to}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"解压成功: {rar_path} → {extract_to}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return True 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print(f"解压失败: {result.stderr.decode('gbk')}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"解压失败: {result.stderr.decode('gbk')}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return False 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -320,7 +321,7 @@ def hierarchical_traversal(root_path): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 按年倒序 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     for year_dir in sorted(year_dirs, key=lambda x: x.name, reverse=True): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 构造完整的路径:download/shandong/2025/03 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        print(f"\n年份:{year_dir.name} | 省份:jiangsu") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        log.info(f"\n年份:{year_dir.name} | 省份:zhejiang") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 提取月份目录 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         month_dirs = [] 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -333,20 +334,20 @@ def hierarchical_traversal(root_path): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # 按月倒序输出 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if month_dirs: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             for md in sorted(month_dirs, key=lambda x: x["month"], reverse=True): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                print(f"  月份:{md['month']:02d} | 路径:{md['path']}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                log.info(f"  月份:{md['month']:02d} | 路径:{md['path']}") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 gov_commodity_zhejiang_import_export.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 gov_commodity_zhejiang_country.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 gov_commodity_zhejiang_city.process_folder(md['path']) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 if __name__ == "__main__": 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print(f"浙江杭州海关全量数据下载任务完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    log.info(f"浙江杭州海关全量数据下载任务完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 等待5s后执行 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     hierarchical_traversal(base_country_code.download_dir) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print("浙江杭州海关类章、国家、城市所有文件处理完成!") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    log.info("浙江杭州海关类章、国家、城市所有文件处理完成!") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     time.sleep(5) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     base_mysql.update_january_yoy('浙江省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     base_mysql.update_shandong_yoy('浙江省') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    print("浙江杭州海关城市同比sql处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    log.info("浙江杭州海关城市同比sql处理完成") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 |