소스 검색

crawl add duration

zhangfan 1 개월 전
부모
커밋
b5ce1ecfdd

+ 7 - 4
crossborder/anhui/crawl_gov_anhui_full.py

@@ -187,9 +187,9 @@ def detect_latest_month(driver, url):
                 continue
             return f"{check_year}年{check_month}月"
         except:
-            log.info(f"未找到 {target_title}")
+            log.error(f"未找到 {target_title}")
             continue
-    log.info("三个月内未找到有效数据")
+    log.error("三个月内未找到有效数据")
     return None
 
 def crawl_with_selenium(url, mark):
@@ -324,15 +324,18 @@ def main():
     parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
+    start_time = time.time()
     if args.year == 2023:
         log.info("正在全量爬取安徽省海关数据")
         crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','all')
-        send_dingtalk_message('安徽省海关全量数据爬取完成')
+        duration = time.time() - start_time
+        send_dingtalk_message(f'安徽省海关全量数据爬取完成,耗时 {duration:.2f} 秒')
     else:
         log.info("正在增量爬取安徽省海关数据")
         res = crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')
         if res == 'finish':
-            send_dingtalk_message('安徽省海关增量数据爬取完成')
+            duration = time.time() - start_time
+            send_dingtalk_message(f'安徽省海关增量数据爬取完成,耗时 {duration:.2f} 秒')
 
 if __name__ == '__main__':
     main()

+ 7 - 4
crossborder/hebei/crawl_gov_hebei_full.py

@@ -165,9 +165,9 @@ def detect_latest_month(driver, url):
                 continue
             return f"{check_year}年{check_month}月"
         except:
-            log.info(f"未找到 {target_title}")
+            log.error(f"未找到 {target_title}")
             continue
-    log.info("三个月内未找到有效数据")
+    log.error("三个月内未找到有效数据")
     return None
 
 def crawl_with_selenium(url, mark):
@@ -302,15 +302,18 @@ def main():
     parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
+    start_time = time.time()
     if args.year == 2023:
         log.info("正在全量爬取河北省海关数据")
         crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html', 'all')
-        send_dingtalk_message('河北省海关全量数据爬取完成')
+        duration = time.time() - start_time
+        send_dingtalk_message(f'河北省海关全量数据爬取完成,耗时 {duration:.2f} 秒')
     else:
         log.info("正在增量爬取河北省海关数据")
         res = crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html','auto')
         if res == 'finish':
-            send_dingtalk_message('河北省海关增量数据爬取完成')
+            duration = time.time() - start_time
+            send_dingtalk_message(f'河北省海关增量数据爬取完成,耗时 {duration:.2f} 秒')
 
 if __name__ == '__main__':
     main()

+ 7 - 4
crossborder/jiangsu/crawl_gov_jiangsu_full.py

@@ -206,9 +206,9 @@ def detect_latest_month(driver, url):
                 continue
             return f"{check_year}年{check_month}月"
         except:
-            log.info(f"未找到 {target_title}")
+            log.error(f"未找到 {target_title}")
             continue
-    log.info("三个月内未找到有效数据")
+    log.error("三个月内未找到有效数据")
     return None
 
 def crawl_with_selenium(url, mark):
@@ -339,15 +339,18 @@ def main():
     parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
+    start_time = time.time()
     if args.year == 2023:
         log.info("正在全量爬取江苏省海关数据")
         crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','all')
-        send_dingtalk_message('江苏省海关全量数据爬取完成')
+        duration = time.time() - start_time
+        send_dingtalk_message(f'江苏省海关全量数据爬取完成,耗时 {duration:.2f} 秒')
     else:
         log.info("正在增量爬取江苏省海关数据")
         res = crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto')
         if res == 'finish':
-            send_dingtalk_message('江苏省海关增量数据爬取完成')
+            duration = time.time() - start_time
+            send_dingtalk_message(f'江苏省海关增量数据爬取完成,耗时 {duration:.2f} 秒')
 
 if __name__ == '__main__':
     main()

+ 7 - 4
crossborder/zhejiang/crawl_gov_zhejiang_full.py

@@ -276,9 +276,9 @@ def detect_latest_month(driver, url):
                 continue
             return f"{check_year}年-{check_month}月"
         except:
-            log.info(f"未找到 {target_title}")
+            log.error(f"未找到 {target_title}")
             continue
-    log.info("三个月内未找到有效数据")
+    log.error("三个月内未找到有效数据")
     return None
 
 def crawl_with_selenium(url, mark):
@@ -386,15 +386,18 @@ def main():
     parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
+    start_time = time.time()
     if args.year == 2023:
         log.info("正在全量爬取浙江省海关数据")
         crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
-        send_dingtalk_message('浙江省海关全量数据爬取完成')
+        duration = time.time() - start_time
+        send_dingtalk_message(f'浙江省海关全量数据爬取完成,耗时 {duration:.2f} 秒')
     else:
         log.info("正在增量爬取浙江省海关数据")
         res = crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
         if res == 'finish':
-            send_dingtalk_message('浙江省海关增量数据爬取完成')
+            duration = time.time() - start_time
+            send_dingtalk_message(f'浙江省海关增量数据爬取完成,耗时 {duration:.2f} 秒')
 
 if __name__ == '__main__':
     main()