Spaces:
Runtime error
Runtime error
Niv Sardi
commited on
Commit
·
bbf5506
1
Parent(s):
fac6e9b
python/get_entities: moar asserts and checks
Browse files- python/get_entities.py +5 -2
python/get_entities.py
CHANGED
@@ -24,21 +24,24 @@ with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
|
|
24 |
|
25 |
bar = ChargingBar('get entities', max=len(options))
|
26 |
for o in options[1:]:
|
|
|
27 |
def get_bco():
|
28 |
(name, bco)= (o.text, o.attrs['value'])
|
29 |
page = requests.post(URL, data={'bco': bco})
|
30 |
soup = BeautifulSoup(page.content, 'html.parser')
|
|
|
31 |
try:
|
32 |
img = soup.select_one(selectors.logosbancos).attrs['src']
|
33 |
img = img.replace('../', 'https://www.bcra.gob.ar/')
|
34 |
fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
|
35 |
web.get_img_logo(img, fn)
|
36 |
except AttributeError as err:
|
37 |
-
print('img
|
38 |
img = None
|
39 |
|
40 |
a = soup.select_one(selectors.entity_http)
|
41 |
try:
|
|
|
42 |
a = a.attrs['href']
|
43 |
except AttributeError:
|
44 |
a = soup.select_one(selectors.entity_mailto)
|
@@ -54,7 +57,7 @@ with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
|
|
54 |
try:
|
55 |
get_bco()
|
56 |
except Exception as e:
|
57 |
-
print(f'Error processing: {
|
58 |
|
59 |
i+=1
|
60 |
bar.next()
|
|
|
24 |
|
25 |
bar = ChargingBar('get entities', max=len(options))
|
26 |
for o in options[1:]:
|
27 |
+
assert(o)
|
28 |
def get_bco():
|
29 |
(name, bco)= (o.text, o.attrs['value'])
|
30 |
page = requests.post(URL, data={'bco': bco})
|
31 |
soup = BeautifulSoup(page.content, 'html.parser')
|
32 |
+
img = None
|
33 |
try:
|
34 |
img = soup.select_one(selectors.logosbancos).attrs['src']
|
35 |
img = img.replace('../', 'https://www.bcra.gob.ar/')
|
36 |
fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
|
37 |
web.get_img_logo(img, fn)
|
38 |
except AttributeError as err:
|
39 |
+
print(f'couldnt extract image from {img}: {err}')
|
40 |
img = None
|
41 |
|
42 |
a = soup.select_one(selectors.entity_http)
|
43 |
try:
|
44 |
+
assert(a)
|
45 |
a = a.attrs['href']
|
46 |
except AttributeError:
|
47 |
a = soup.select_one(selectors.entity_mailto)
|
|
|
57 |
try:
|
58 |
get_bco()
|
59 |
except Exception as e:
|
60 |
+
print(f'Error processing: {o.url}')
|
61 |
|
62 |
i+=1
|
63 |
bar.next()
|