bullm commited on
Commit
bb112ef
·
1 Parent(s): 025632f

Upload Scheduler_Covid.py

Browse files
Files changed (1) hide show
  1. Scheduler/Scheduler_Covid.py +116 -3
Scheduler/Scheduler_Covid.py CHANGED
@@ -1,3 +1,116 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec9e9a702f0ebc24463fc8e550cbd22c8ca38be3da21a90e660b4b629e3d8253
3
- size 4915
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Thu Sep 23 09:27:21 2021
5
+
6
+ @author: benjaminull
7
+ """
8
+
9
+ import pandas as pd
10
+ from datetime import datetime
11
+ import numpy as np
12
+ from datetime import timedelta
13
+ import requests
14
+ import io
15
+ import openpyxl
16
+ from pandas import ExcelWriter
17
+ import requests
18
+ from bs4 import BeautifulSoup
19
+ from bs4 import BeautifulSoup
20
+ import pandas as pd
21
+ from selenium import webdriver
22
+ import requests
23
+
24
+
25
+
26
+ def GenerarExcel(ruta_guardado, Pestañas, Data):
27
+ wb = openpyxl.Workbook()
28
+ writer = ExcelWriter(ruta_guardado)
29
+ for pestaña in Pestañas:
30
+ wb.create_sheet(pestaña)
31
+ std = wb.get_sheet_by_name('Sheet')
32
+ wb.remove_sheet(std)
33
+ wb.save(ruta_guardado)
34
+ for i in range(len(Pestañas)):
35
+ print(Data[i])
36
+ Data[i].to_excel(writer, sheet_name=Pestañas[i])
37
+ writer.save()
38
+
39
+
40
+ def run_data_covid():
41
+ options = webdriver.ChromeOptions()
42
+ options.binary_location = r'C:/Program Files/Google/Chrome/Application/chrome.exe'
43
+ path_to_chromedriver = r'C:/Users/bullm/larrainvial.com/Equipo Quant - Area Estrategias Cuantitativas 2.0/Codigos\Data Alternativa/Transcripts/chromedriver.exe'
44
+ browser = webdriver.Chrome(executable_path=path_to_chromedriver, chrome_options=options)
45
+ # Ir a página deseada
46
+ url = 'https://covid19.apple.com/mobility'
47
+ browser.get(url)
48
+ page = requests.get(url)
49
+ html=browser.page_source
50
+ soup = BeautifulSoup(html, "html.parser")
51
+ link = str(soup.find_all("a")[1]).split('"')[1]
52
+ link_apple = "https://covid19.apple.com/mobility"
53
+ r = requests.get(link_apple)
54
+ soup = BeautifulSoup(r.text, 'lxml')
55
+ data_agg = pd.DataFrame()
56
+ i = 0
57
+ for chunk in pd.read_csv(
58
+ 'https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv',
59
+ usecols=['country_region', 'date',
60
+ 'retail_and_recreation_percent_change_from_baseline',
61
+ 'grocery_and_pharmacy_percent_change_from_baseline',
62
+ 'parks_percent_change_from_baseline',
63
+ 'transit_stations_percent_change_from_baseline',
64
+ 'workplaces_percent_change_from_baseline'],
65
+ dtype = {"workplaces_percent_change_from_baseline":
66
+ "float32",
67
+ "parks_percent_change_from_baseline": "float32",
68
+ "retail_and_recreation_percent_change_from_baseline":
69
+ "float32",
70
+ 'transit_stations_percent_change_from_baseline':
71
+ "float32",
72
+ },chunksize = 150000):
73
+ if i == 65:
74
+ break
75
+ i=i+1
76
+ data_agg = pd.concat([data_agg, chunk], ignore_index=True)
77
+ data_agg.info(memory_usage="deep")
78
+ data_agg.set_index(['country_region', 'date'], inplace=True)
79
+ data_agg = data_agg.groupby(level=[0, 1]).mean()
80
+ data_agg.columns = data_agg.columns.str.replace('_percent_change_from_baseline', '_google')
81
+ yesterday = (datetime.today() - timedelta(2)).strftime("%Y-%m-%d")
82
+ url=f''+link
83
+ CONFIRMED_CONTENT = requests.get(url).content
84
+ data_app = pd.read_csv(io.StringIO(CONFIRMED_CONTENT.decode('utf-8')),
85
+ error_bad_lines=False)
86
+ # Dejamos solo la data a nivel pais
87
+ data_app.info(memory_usage="deep")
88
+ data_app = data_app.loc[data_app['geo_type'] == 'country/region']
89
+ data_app = data_app.drop(columns=['geo_type', 'country',
90
+ 'alternative_name', 'sub-region'])
91
+ data_app = data_app.set_index(['region', 'transportation_type']).stack()
92
+ data_app = data_app.unstack(level='transportation_type') - 100
93
+ data_app.index.names = data_agg.index.names
94
+ data_agg = data_agg.join(data_app)
95
+ print(data_app.columns)
96
+ mob_idx_cols = ['retail_and_recreation_google', 'grocery_and_pharmacy_google',
97
+ 'parks_google', 'transit_stations_google',
98
+ 'workplaces_google', 'driving', 'transit', 'walking']
99
+ data_agg['Mobility Index'] = data_agg[mob_idx_cols].mean(1)
100
+ regiones = {}
101
+ regiones['Latam'] = ['Argentina', 'Brazil', 'Chile', 'Colombia',
102
+ 'Mexico', 'Peru']
103
+ regiones['Europa'] = ['Italy', 'Spain', 'Germany', 'United Kingdom', 'France']
104
+ regiones['Asia Emergente'] = ['South Korea', 'Taiwan', 'Hong Kong', 'India',
105
+ 'Thailand', 'Indonesia']
106
+ regiones['USA'] = ['United States']
107
+ # regiones['Israel'] = ['Israel']
108
+ data_dict = {}
109
+ for col in data_agg.columns:
110
+ df = data_agg[col].unstack().T.rolling(7, 3).mean()
111
+ for region, paises in regiones.items():
112
+ df[region] = df[paises].mean(1)
113
+ data_dict[col] = df
114
+ GenerarExcel("Scheduler/Movilidad_desagrada.xlsx", list(data_dict.keys()),
115
+ list(data_dict.values()))
116
+ np.save('Scheduler/dict_movilidad.npy', data_dict)