|
@@ -334,11 +334,11 @@ def hierarchical_traversal(root_path, all_records):
|
|
|
gov_commodity_jiangsu_city.process_folder(md['path'])
|
|
|
|
|
|
def main():
|
|
|
- parser = argparse.ArgumentParser(description="爬取模式: 全量(all) 或 增量(auto)")
|
|
|
- parser.add_argument("mode", choices=["all", "auto"], help="运行模式")
|
|
|
+ parser = argparse.ArgumentParser(description='海关数据智能抓取系统')
|
|
|
+ parser.add_argument('--year', type=int, default=None, help='终止年份(如2023),未指定时抓取最新两个月')
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
- if args.mode == "all":
|
|
|
+ if args.year == 2023:
|
|
|
crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','all')
|
|
|
else:
|
|
|
crawl_with_selenium('http://nanjing.customs.gov.cn/nanjing_customs/zfxxgk58/fdzdgknr95/3010051/589289/7e2fcc72-1.html','auto')
|