Kaynağa Gözat

crawl fix wen an

zhangfan 1 ay önce
ebeveyn
işleme
6f2551524f

+ 7 - 5
crossborder/anhui/crawl_gov_anhui_full.py

@@ -260,7 +260,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('安徽省')
         print("安徽省合肥海关城市同比sql处理完成")
-    return 'finish'
+    return 'finish', year_month
 
 
 def wait_for_download_complete(timeout=30, existing_files=None):
@@ -345,10 +345,12 @@ def main():
         else:
             log.info("正在增量采集安徽省海关数据")
             res = crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')
-            if res == 'finish':
-                duration = time.time() - start_time
-                minutes, seconds = divmod(duration, 60)
-                send_dingtalk_message(f'【安徽省海关】增量数据采集完成,{int(minutes)}分{seconds:.1f}秒')
+            if res is not None:
+                r1, r2 = res
+                if r1 == 'finish':
+                    duration = time.time() - start_time
+                    minutes, seconds = divmod(duration, 60)
+                    send_dingtalk_message(f'【安徽省海关】 {r2} 增量数据采集完成,{int(minutes)}分{seconds:.1f}秒')
 
     except Exception as e:
         send_dingtalk_message(f'【安徽省海关】发生错误:{e}')

+ 7 - 5
crossborder/hebei/crawl_gov_hebei_full.py

@@ -236,7 +236,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('河北省')
         log.info("河北省海关城市同比sql处理完成")
-    return 'finish'
+    return 'finish', year_month
 
 def wait_for_download_complete(timeout=30, existing_files=None):
     """
@@ -321,10 +321,12 @@ def main():
         else:
             log.info("正在增量采集河北省海关数据")
             res = crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html','auto')
-            if res == 'finish':
-                duration = time.time() - start_time
-                minutes, seconds = divmod(duration, 60)
-                send_dingtalk_message(f'【河北省海关】增量数据采集完成,耗时 {int(minutes)}分{seconds:.1f}秒')
+            if res is not None:
+                r1, r2 = res
+                if r1 == 'finish':
+                    duration = time.time() - start_time
+                    minutes, seconds = divmod(duration, 60)
+                    send_dingtalk_message(f'【河北省海关】增量数据采集完成,耗时 {int(minutes)}分{seconds:.1f}秒')
 
     except Exception as e:
         send_dingtalk_message(f"【河北省海关】发生错误:{e}")

+ 7 - 5
crossborder/jiangsu/crawl_gov_jiangsu_full.py

@@ -279,7 +279,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('江苏省')
         log.info("江苏省海关城市同比sql处理完成")
-    return 'finish'
+    return 'finish', year_month
 
 
 def wait_for_download_complete(timeout=30, existing_files=None):
@@ -358,10 +358,12 @@ def main():
         else:
             log.info("正在增量采集江苏省海关数据")
             res = crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto')
-            if res == 'finish':
-                duration = time.time() - start_time
-                minutes, seconds = divmod(duration, 60)
-                send_dingtalk_message(f'【江苏省海关】增量数据采集完成,耗时 {int(minutes)}分{seconds:.1f}秒')
+            if res is not None:
+                r1, r2 = res
+                if r1 == 'finish':
+                    duration = time.time() - start_time
+                    minutes, seconds = divmod(duration, 60)
+                    send_dingtalk_message(f'【江苏省海关】增量数据采集完成,耗时 {int(minutes)}分{seconds:.1f}秒')
 
     except Exception as e:
         send_dingtalk_message(f"【江苏省海关】发生错误:{e}")

+ 7 - 5
crossborder/zhejiang/crawl_gov_zhejiang_full.py

@@ -322,7 +322,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('浙江省')
         log.info("浙江省海关城市同比sql处理完成")
-    return 'finish'
+    return 'finish', year_month
 
 def wait_for_download_complete(timeout=30, existing_files=None):
     """
@@ -406,10 +406,12 @@ def main():
         else:
             log.info("正在增量采集浙江省海关数据")
             res = crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
-            if res == 'finish':
-                duration = time.time() - start_time
-                minutes, seconds = divmod(duration, 60)
-                send_dingtalk_message(f'【浙江省海关】增量数据采集完成,耗时 {int(minutes)}分{seconds:.1f}秒')
+            if res is not None:
+                r1, r2 = res
+                if r1 == 'finish':
+                    duration = time.time() - start_time
+                    minutes, seconds = divmod(duration, 60)
+                    send_dingtalk_message(f'【浙江省海关】增量数据采集完成,耗时 {int(minutes)}分{seconds:.1f}秒')
 
     except Exception as e:
         send_dingtalk_message(f"【浙江省海关】发生错误:{e}")