Krypto1 / app_crypto_scrape.py
KatGaw's picture
adding new reddit group
05a3e2c
#Import packages
import pandas as pd
import numpy as np
from pycoingecko import CoinGeckoAPI
cg = CoinGeckoAPI()
import pandas as pd
import numpy as np
from pytrends.request import TrendReq
pytrends = TrendReq(hl='en-US')
import scrape_utils as su
from dotenv import load_dotenv
import os
load_dotenv()
COINMARKET_API_KEY=os.environ["COINMARKET_API_KEY"]
def scrape_crypto(currency, ticker):
# 1. Scrape historical Price and Volume currency data
from datetime import date
today = date.today()
Day_end = today.strftime("%d")
Month_end = today.strftime("%m")
Year_end = today.strftime("%Y")
from datetime import date
from datetime import timedelta
past=today-timedelta(days=300)
Day_start = past.strftime("%d")
Month_start = past.strftime("%m")
Year_start = past.strftime("%Y")
date_start=[Year_start,Month_start,Day_start]
date_end=[Year_end,Month_end,Day_end]
import datetime
df_ts_coins=su.scrape_historical_series([currency],ticker,date_start,date_end)[0]
print(df_ts_coins)
df_today_row=su.scrape_historical_series([currency],ticker,date_start,date_end)[1]
#print(df_today_row)
if len(df_ts_coins)>0:
print(df_today_row)
#df_today_row=df_today_row.drop(0)
df_ts_coins=df_ts_coins[['id','date','prices','market_caps','total_vol']]
df_ts_coins=pd.concat([df_ts_coins,df_today_row],axis=0)
df_ts_coins.set_index('date',inplace=True)
df_ts_coins.index=[pd.to_datetime(df_ts_coins.index[i]).strftime("%Y-%m-%d %H:%M:%S") for i in range(len(df_ts_coins))]
# 2. Scrape macro
df_cli=su.scrape_cli(past,today)
df_cpi=su.scrape_cpi_employment()
print(f'I have scraped CLI and L, CPI')
# 3. Scrape google-trends
google_data=su.scrape_google_trends(currency,ticker)
print(f'Google trend dataset')
# 4. Scrape Yahoo-Finance
df_finance=su.scrape_stocks(past,today)
print(f'yahoo dataset. I am done scraping !!!!!!!')
#==== 5. CONCAT DATAFRAMES TOGETHER
df_ts_coins.index=pd.to_datetime(df_ts_coins.index).strftime("%Y-%m-%d")
df_cli.index=pd.to_datetime(df_cli.index).strftime("%Y-%m-%d")
if len(df_cpi)>0:
df_cpi.index=pd.to_datetime(df_cpi.index).strftime("%Y-%m-%d")
else:
print('MISSING CPI')
df_cpi=pd.DataFrame({'CPI':np.repeat(0,len(df_cli)),'Employment':np.repeat(0,len(df_cli))})
df_cpi.index=df_cli.index
google_data.index=pd.to_datetime(google_data.index).strftime("%Y-%m-%d")
df_finance.index=pd.to_datetime(df_finance.index).strftime("%Y-%m-%d")
df_all=pd.concat([df_ts_coins,df_cli,df_cpi,google_data,df_finance],axis=1)
df_all=df_all.sort_index()
else:
print('No data available.')
df_all=pd.DataFrame()
return df_all