Spaces:
Runtime error
Runtime error
File size: 1,770 Bytes
485f76b 880b04e 485f76b 74a29fd 485f76b ae7097b 485f76b 60ec487 485f76b 60ec487 485f76b 60ec487 ae7097b 485f76b 60ec487 485f76b 60ec487 485f76b 60ec487 74a29fd 485f76b 60ec487 485f76b ae7097b 485f76b 60ec487 485f76b 880b04e ae7097b 60ec487 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import csv
import requests
import shutil
from bs4 import BeautifulSoup
from progress.bar import ChargingBar
import web
from entity import Entity
from common import selectors, defaults, mkdir
URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
options = soup.find(class_='form-control').find_all('option')
mkdir.make_dirs([defaults.DATA_PATH])
with open(f'{defaults.MAIN_CSV_PATH}.tmp', 'w', newline='') as csvfile:
writer = csv.writer(csvfile)
writer.writerow(Entity.row_names())
i = 0
bar = ChargingBar('Processing', max=len(options))
for o in options[1:]:
(name, bco)= (o.text, o.attrs['value'])
page = requests.post(URL, data={'bco': bco})
soup = BeautifulSoup(page.content, 'html.parser')
try:
img = soup.select_one(selectors.logosbancos).attrs['src']
img = img.replace('../', 'https://www.bcra.gob.ar/')
fn = f"{defaults.LOGOS_DATA_PATH}/{bco}.0.png"
web.get_img_logo(img, fn)
except AttributeError as err:
print('img', name, err)
img = None
a = soup.select_one(selectors.entity_http)
try:
a = a.attrs['href']
except AttributeError:
a = soup.select_one(selectors.entity_mailto)
try:
a = 'http://' + a.attrs['href'].split('@')[1]
except TypeError:
print('ERROR', a)
e = Entity(name, id=i, bco=bco, logo=str(img), url=str(a))
writer.writerow(e.to_row())
i+=1
bar.next()
bar.finish()
shutil.move(f'{defaults.MAIN_CSV_PATH}.tmp', defaults.MAIN_CSV_PATH)
print('scrape finished')
|