Spaces:

mediaparty2023
/

spoof-detect

Runtime error

Niv Sardi

make Entity a NamedTuple

60ec487 over 2 years ago

1.72 kB

	import csv
	import pathlib
	import requests
	import shutil

	from bs4 import BeautifulSoup
	from progress.bar import ChargingBar

	from entity import Entity
	from common import selectors
	from common import defaults

	pathlib.Path(f'{defaults.DATA_PATH}/logos').mkdir(parents=True, exist_ok=True)

	DATA_FILE = './data/entidades.csv'
	URL = 'http://www.bcra.gob.ar/SistemasFinancierosYdePagos/Entidades_financieras.asp'
	page = requests.get(URL)
	soup = BeautifulSoup(page.content, 'html.parser')

	options = soup.find(class_='form-control').find_all('option')
	with open(f'{DATA_FILE}.tmp', 'w', newline='') as csvfile:
	writer = csv.writer(csvfile)
	writer.writerow(Entity.row_names())

	i = 0
	bar = ChargingBar('Processing', max=len(options))
	for o in options[1:]:
	(name, bco)= (o.text, o.attrs['value'])
	page = requests.post(URL, data={'bco': bco})
	soup = BeautifulSoup(page.content, 'html.parser')
	try:
	img = soup.select_one(selectors.logosbancos).attrs['src']
	img = img.replace('../', 'https://www.bcra.gob.ar/')
	except AttributeError as err:
	print('img', name, err)
	img = None

	a = soup.select_one(selectors.entity_http)
	try:
	a = a.attrs['href']
	except AttributeError:
	a = soup.select_one(selectors.entity_mailto)
	try:
	a = 'http://' + a.attrs['href'].split('@')[1]

	except TypeError:
	print('ERROR', a)

	e = Entity(name, id=i, bco=bco, logo=img, url=a)
	writer.writerow(e.to_row())
	i+=1
	bar.next()
	bar.finish()

	shutil.move(f'{DATA_FILE}.tmp', DATA_FILE)
	print('scrape finished')