diff --git a/.gitignore b/.gitignore index 27364661..c20a3d28 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ dist/ ghidra_assistant.egg-info/ out.json +*.pyc diff --git a/automate.sh b/automate.sh index c843ade5..c3c96913 100755 --- a/automate.sh +++ b/automate.sh @@ -1,14 +1,16 @@ #! /bin/bash CURR_DIR=$(pwd) -SCRAPE_DIR=${CURR_DIR}/scrape -REACT_DIR=${CURR_DIR}/react_usse +APP_DIR=/mnt/wintergreen_ssd1/projects/usse/usse +SCRAPE_DIR=${APP_DIR}/scrape +REACT_DIR=${APP_DIR}/react_usse # Move panda_dump.bin -mv ${SCRAPE_DIR}/panda_dump.bin ${SCRAPE_DIR}/_panda_dump.bin +mv ${SCRAPE_DIR}/panda_dump.bin ${SCRAPE_DIR}/_panda_dump.bin +cd ${SCRAPE_DIR} source ${SCRAPE_DIR}/venv/bin/activate && python ${SCRAPE_DIR}/usse.py && deactivate cp ${SCRAPE_DIR}/out.json ${REACT_DIR}/src/locations.json # Deploy new container cd ${REACT_DIR} -./use_docker.sh recreate \ No newline at end of file +./use_docker.sh rebuild diff --git a/react_usse/Dockerfile b/react_usse/Dockerfile index cda99865..529778a3 100644 --- a/react_usse/Dockerfile +++ b/react_usse/Dockerfile @@ -7,8 +7,9 @@ WORKDIR /usr/src/app # Step 3: Copy package.json and package-lock.json (or yarn.lock) COPY package*.json ./ +RUN npm config set fetch-retry-maxtimeout 6000000 && npm config set fetch-retry-mintimeout 1000000 # Step 4: Install dependencies -RUN npm install +RUN npm install --no-audit # Step 5: Copy the rest of your app's source code COPY . . diff --git a/scrape/usse.py b/scrape/usse.py index c7bff049..6c69f4e4 100644 --- a/scrape/usse.py +++ b/scrape/usse.py @@ -24,7 +24,7 @@ out = [] # URL = "https://www.funda.nl/zoeken/koop?selected_area=%5B%22utrecht,15km%22%5D&price=%22-400000%22&object_type=%5B%22house%22%5D" URL = "https://www.funda.nl/zoeken/koop?selected_area=%5B%22utrecht,30km%22%5D&price=%22-500000%22&object_type=%5B%22house%22%5D" -NUM_PAGES = 1 +NUM_PAGES = 150 def get_funda_data(): # scraper = FundaScraper(url="nijkerk/beschikbaar/100000-400000/woonhuis/tuin/eengezinswoning/landhuis/+30km/", find_past=False, n_pages=81) scraper = FundaScraper(url=URL, find_past=False, n_pages=NUM_PAGES) @@ -101,4 +101,4 @@ if __name__ == "__main__": else: data = get_funda_data() pickle.dump(data, open('panda_dump.bin', 'wb')) - generate_json(data) \ No newline at end of file + generate_json(data)