zhangfan před 1 měsícem
rodič
revize
fd66f200dc

+ 8 - 3
crossborder/anhui/crawl_gov_anhui_full.py

@@ -17,6 +17,7 @@ from crossborder.anhui import gov_commodity_anhui_city, download_dir
 from crossborder.anhui import gov_commodity_anhui_country
 from crossborder.anhui import gov_commodity_anhui_import_export
 from crossborder.utils import base_country_code, base_mysql
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.log import log
 
 def configure_stealth_options():
@@ -199,7 +200,6 @@ def crawl_with_selenium(url, mark):
         res = detect_latest_month(driver, url)
         if res is None:
             log.info("安徽省海关没有最新数据更新")
-            # sys.exit(0)
             return None
         year_month = res
         print(f"检测到最新有效数据:{year_month}")
@@ -257,6 +257,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('安徽省')
         print("安徽合肥海关城市同比sql处理完成")
+    return 'finish'
 
 
 def wait_for_download_complete(timeout=30, existing_files=None):
@@ -323,11 +324,15 @@ def main():
     parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
-    print(f'anhui args: {args}')
     if args.year == 2023:
+        log.info("正在全量爬取安徽海关数据")
         crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','all')
+        send_dingtalk_message('安徽海关全量数据爬取完成')
     else:
-        crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')
+        log.info("正在增量爬取安徽海关数据")
+        res = crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')
+        if res == 'finish':
+            send_dingtalk_message('安徽海关增量数据爬取完成')
 
 if __name__ == '__main__':
     main()

+ 8 - 2
crossborder/hebei/crawl_gov_hebei_full.py

@@ -18,6 +18,7 @@ from crossborder.hebei import gov_commodity_hebei_city
 from crossborder.hebei import gov_commodity_hebei_country
 from crossborder.hebei import gov_commodity_hebei_import_export
 from crossborder.utils import base_country_code, base_mysql
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.log import log
 
 
@@ -233,6 +234,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('河北省')
         log.info("河北石家庄海关城市同比sql处理完成")
+    return 'finish'
 
 def wait_for_download_complete(timeout=30, existing_files=None):
     """
@@ -301,10 +303,14 @@ def main():
     args = parser.parse_args()
 
     if args.year == 2023:
-        print("正在爬取河北海关全量数据")
+        log.info("正在全量爬取河北海关数据")
         crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html', 'all')
+        send_dingtalk_message('河北海关全量数据爬取完成')
     else:
-        crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html','auto')
+        log.info("正在增量爬取河北海关数据")
+        res = crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html','auto')
+        if res == 'finish':
+            send_dingtalk_message('河北海关增量数据爬取完成')
 
 if __name__ == '__main__':
     main()

+ 8 - 1
crossborder/jiangsu/crawl_gov_jiangsu_full.py

@@ -23,6 +23,7 @@ from crossborder.jiangsu import gov_commodity_jiangsu_city
 from crossborder.jiangsu import gov_commodity_jiangsu_import_export
 
 from crossborder.utils import base_country_code, base_mysql
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.utils.log import log
 
 # rarfile.UNRAR_EXECUTABLE = r"C:\Program Files\WinRAR\UnRAR.exe"
@@ -277,6 +278,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('江苏省')
         log.info("江苏南京海关城市同比sql处理完成")
+    return 'finish'
 
 
 def wait_for_download_complete(timeout=30, existing_files=None):
@@ -338,9 +340,14 @@ def main():
     args = parser.parse_args()
 
     if args.year == 2023:
+        log.info("正在全量爬取江苏海关数据")
         crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','all')
+        send_dingtalk_message('江苏海关全量数据爬取完成')
     else:
-        crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto')
+        log.info("正在增量爬取江苏海关数据")
+        res = crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto')
+        if res == 'finish':
+            send_dingtalk_message('江苏海关增量数据爬取完成')
 
 if __name__ == '__main__':
     main()

+ 34 - 0
crossborder/utils/dingtalk.py

@@ -0,0 +1,34 @@
+import requests
+
+webhook_url = "https://oapi.dingtalk.com/robot/send?access_token=86f0016712dac5836e8cae7f7c6564d103de3de603228a3fad744e6bfc3e22fd"
+
+def send_dingtalk_message(message):
+    """
+    发送钉钉消息到指定群
+    :param message: 要发送的文本消息内容
+    """
+    headers = {
+        "Content-Type": "application/json;charset=utf-8"
+    }
+    data = {
+        "msgtype": "text",
+        "text": {
+            "content": message
+        }
+    }
+
+    try:
+        response = requests.post(webhook_url, json=data, headers=headers)
+        if response.status_code == 200:
+            print("钉钉消息发送成功")
+        else:
+            print(f"钉钉消息发送失败: {response.text}")
+    except Exception as e:
+        print(f"发送钉钉消息时发生异常: {e}")
+
+
+if __name__ == '__main__':
+    # 发送钉钉消息
+    # webhook_url = "https://oapi.dingtalk.com/robot/send?access_token=86f0016712dac5836e8cae7f7c6564d103de3de603228a3fad744e6bfc3e22fd"
+    message = "安徽合肥海关数据下载任务已完成"
+    send_dingtalk_message(message)

+ 8 - 1
crossborder/zhejiang/crawl_gov_zhejiang_full.py

@@ -16,6 +16,7 @@ from selenium.webdriver.common.by import By
 from selenium.webdriver.support import expected_conditions as EC
 from selenium.webdriver.support.ui import WebDriverWait
 
+from crossborder.utils.dingtalk import send_dingtalk_message
 from crossborder.zhejiang import download_dir
 from crossborder.zhejiang import gov_commodity_zhejiang_city
 from crossborder.zhejiang import gov_commodity_zhejiang_country
@@ -319,6 +320,7 @@ def crawl_with_selenium(url, mark):
         time.sleep(5)
         base_mysql.update_shandong_yoy('浙江省')
         log.info("浙江杭州海关城市同比sql处理完成")
+    return 'finish'
 
 def wait_for_download_complete(timeout=30, existing_files=None):
     """
@@ -385,9 +387,14 @@ def main():
     args = parser.parse_args()
 
     if args.year == 2023:
+        log.info("正在全量爬取浙江海关数据")
         crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
+        send_dingtalk_message('浙江海关全量数据爬取完成')
     else:
-        crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
+        log.info("正在增量爬取浙江海关数据")
+        res = crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')
+        if res == 'finish':
+            send_dingtalk_message('浙江海关增量数据爬取完成')
 
 if __name__ == '__main__':
     main()