Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
880b04e
1
Parent(s):
971aff7
crawler: only move to csv file once fully written
Browse filesSigned-off-by: Niv Sardi <[email protected]>
- crawler/main.py +7 -1
crawler/main.py
CHANGED
@@ -1,6 +1,8 @@
|
|
1 |
import csv
|
2 |
import pathlib
|
3 |
import requests
|
|
|
|
|
4 |
from bs4 import BeautifulSoup
|
5 |
from progress.bar import ChargingBar
|
6 |
|
@@ -9,12 +11,13 @@ from common import selectors
|
|
9 |
|
10 |
pathlib.Path(f"{Entity._DATA_PATH}/logos").mkdir(parents=True, exist_ok=True)
|
11 |
|
|
|
12 |
URL = "http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp"
|
13 |
page = requests.get(URL)
|
14 |
soup = BeautifulSoup(page.content, "html.parser")
|
15 |
|
16 |
options = soup.find(class_="form-control").find_all('option')
|
17 |
-
with open(
|
18 |
writer = csv.writer(csvfile)
|
19 |
writer.writerow(Entity.row_names())
|
20 |
|
@@ -49,3 +52,6 @@ with open('./data/entidades.csv', 'w', newline='') as csvfile:
|
|
49 |
writer.writerow(e.to_row())
|
50 |
bar.next()
|
51 |
bar.finish()
|
|
|
|
|
|
|
|
1 |
import csv
|
2 |
import pathlib
|
3 |
import requests
|
4 |
+
import shutil
|
5 |
+
|
6 |
from bs4 import BeautifulSoup
|
7 |
from progress.bar import ChargingBar
|
8 |
|
|
|
11 |
|
12 |
pathlib.Path(f"{Entity._DATA_PATH}/logos").mkdir(parents=True, exist_ok=True)
|
13 |
|
14 |
+
DATA_FILE = './data/entidades.csv'
|
15 |
URL = "http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp"
|
16 |
page = requests.get(URL)
|
17 |
soup = BeautifulSoup(page.content, "html.parser")
|
18 |
|
19 |
options = soup.find(class_="form-control").find_all('option')
|
20 |
+
with open(f"{DATA_FILE}.tmp", 'w', newline='') as csvfile:
|
21 |
writer = csv.writer(csvfile)
|
22 |
writer.writerow(Entity.row_names())
|
23 |
|
|
|
52 |
writer.writerow(e.to_row())
|
53 |
bar.next()
|
54 |
bar.finish()
|
55 |
+
|
56 |
+
shutil.move(f"{DATA_FILE}.tmp", DATA_FILE)
|
57 |
+
print("scrape finished")
|