瀏覽代碼

crawl args fix

zhangfan 1 天之前
父節點
當前提交
7598be507b

+ 3 - 3
crossborder/anhui/crawl_gov_anhui_full.py

@@ -320,11 +320,11 @@ def hierarchical_traversal(root_path):
                 gov_commodity_anhui_city.process_folder(md['path'])
 
 def main():
-    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
-    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
+    parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
+    parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
-    if args.mode == "all":
+    if args.year == 2023:
         crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','all')
     else:
         crawl_with_selenium('http://hefei.customs.gov.cn/hefei_customs/zfxxgkzl59/3169584/479584/479585/index.html','auto')

+ 3 - 3
crossborder/hebei/crawl_gov_hebei_full.py

@@ -297,11 +297,11 @@ def hierarchical_traversal(root_path):
 
 
 def main():
-    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
-    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
+    parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
+    parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
-    if args.mode == "all":
+    if args.year == 2023:
         crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html', 'all')
     else:
         crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html','auto')

+ 3 - 3
crossborder/jiangsu/crawl_gov_jiangsu_full.py

@@ -334,11 +334,11 @@ def hierarchical_traversal(root_path, all_records):
                 gov_commodity_jiangsu_city.process_folder(md['path'])
 
 def main():
-    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
-    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
+    parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
+    parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
-    if args.mode == "all":
+    if args.year == 2023:
         crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','all')
     else:
         crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto')

+ 3 - 3
crossborder/zhejiang/crawl_gov_zhejiang_full.py

@@ -381,11 +381,11 @@ def hierarchical_traversal(root_path):
                 gov_commodity_zhejiang_city.process_folder(md['path'])
 
 def main():
-    parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
-    parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
+    parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
+    parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
     args = parser.parse_args()
 
-    if args.mode == "all":
+    if args.year == 2023:
         crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html', 'all')
     else:
         crawl_with_selenium('http://hangzhou.customs.gov.cn/hangzhou_customs/575609/zlbd/575612/575612/6430241/6430315/index.html','auto')