Selaa lähdekoodia

crawl run path fix

zhangfan 1 kuukausi sitten
vanhempi
commit
52d16a43ca

+ 1 - 1
crossborder/anhui/__init__.py

@@ -3,7 +3,7 @@ from pathlib import Path
 
 script_dir = os.getcwd()
 # script_dir = os.path.dirname(os.path.abspath(__file__))
-download_dir = os.path.join(script_dir, 'downloads')
+download_dir = os.path.join(script_dir, 'downloads', 'anhui')
 # 创建目录(如果不存在)
 os.makedirs(download_dir, exist_ok=True)
 # 切换当前工作目录到 download_dir

+ 1 - 1
crossborder/hebei/__init__.py

@@ -3,7 +3,7 @@ from pathlib import Path
 
 script_dir = os.getcwd()
 # script_dir = os.path.dirname(os.path.abspath(__file__))
-download_dir = os.path.join(script_dir, 'downloads')
+download_dir = os.path.join(script_dir, 'downloads', 'hebei')
 # 创建目录(如果不存在)
 os.makedirs(download_dir, exist_ok=True)
 # 切换当前工作目录到 download_dir

+ 1 - 0
crossborder/hebei/crawl_gov_hebei_full.py

@@ -302,6 +302,7 @@ def main():
     args = parser.parse_args()
 
     if args.year == 2023:
+        print("正在爬取河北海关全量数据")
         crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html', 'all')
     else:
         crawl_with_selenium('http://shijiazhuang.customs.gov.cn/shijiazhuang_customs/zfxxgk43/2988665/2988681/index.html','auto')

+ 1 - 1
crossborder/jiangsu/__init__.py

@@ -3,7 +3,7 @@ from pathlib import Path
 
 script_dir = os.getcwd()
 # script_dir = os.path.dirname(os.path.abspath(__file__))
-download_dir = os.path.join(script_dir, 'downloads')
+download_dir = os.path.join(script_dir, 'downloads','jiangsu')
 # 创建目录(如果不存在)
 os.makedirs(download_dir, exist_ok=True)
 # 切换当前工作目录到 download_dir

+ 8 - 8
crossborder/utils/constants.py

@@ -2,17 +2,17 @@ import os
 import sys
 from pathlib import Path
 
-PROJECT_ROOT = Path(os.path.abspath(os.path.dirname(__file__))).parent.parent
+# PROJECT_ROOT = Path(os.path.abspath(os.path.dirname(__file__))).parent.parent
 
-if sys.platform.startswith('linux'):
-    # Linux环境指定为/home目录
-    DOWNLOAD_DIR = Path('/home/downloads')
-else:
-    # Windows保持原有结构(项目根目录下的downloads文件夹)
-    DOWNLOAD_DIR = PROJECT_ROOT / 'downloads'
+# if sys.platform.startswith('linux'):
+#     # Linux环境指定为/home目录
+#     DOWNLOAD_DIR = Path('/home/downloads')
+# else:
+#     # Windows保持原有结构(项目根目录下的downloads文件夹)
+#     DOWNLOAD_DIR = PROJECT_ROOT / 'downloads'
+DOWNLOAD_DIR = Path(os.getcwd(), '/downloads')
 DOWNLOAD_DIR.mkdir(exist_ok=True, parents=True)
 
-
 EXCLUDE_REGIONS = ["亚洲", "非洲", "欧洲", "拉丁美洲", "北美洲", "大洋洲", "南极洲",
                    "东南亚国家联盟", "欧洲联盟", "亚太经济合作组织",
                    "区域全面经济伙伴关系协定(RCEP)成员国", "共建“一带一路”国家和地区",

+ 1 - 1
crossborder/zhejiang/__init__.py

@@ -3,7 +3,7 @@ from pathlib import Path
 
 script_dir = os.getcwd()
 # script_dir = os.path.dirname(os.path.abspath(__file__))
-download_dir = os.path.join(script_dir, 'downloads')
+download_dir = os.path.join(script_dir, 'downloads', 'zhejiang')
 # 创建目录(如果不存在)
 os.makedirs(download_dir, exist_ok=True)
 # 切换当前工作目录到 download_dir