File size: 2,887 Bytes
05a3e2c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82

#Import packages
import pandas as pd
import numpy as np
from pycoingecko import CoinGeckoAPI
cg = CoinGeckoAPI()
import pandas as pd
import numpy as np
from pytrends.request import TrendReq
pytrends = TrendReq(hl='en-US')
import scrape_utils as su
from dotenv import load_dotenv
import os
load_dotenv()

COINMARKET_API_KEY=os.environ["COINMARKET_API_KEY"]

def scrape_crypto(currency, ticker):

    # 1. Scrape historical Price and Volume  currency data
    from datetime import date
    today = date.today()
    Day_end = today.strftime("%d")
    Month_end = today.strftime("%m")
    Year_end = today.strftime("%Y")

    from datetime import date
    from datetime import timedelta

    past=today-timedelta(days=300)
    Day_start = past.strftime("%d")
    Month_start = past.strftime("%m")
    Year_start = past.strftime("%Y")

    date_start=[Year_start,Month_start,Day_start]
    date_end=[Year_end,Month_end,Day_end]

    import datetime
    df_ts_coins=su.scrape_historical_series([currency],ticker,date_start,date_end)[0]
    print(df_ts_coins)
    df_today_row=su.scrape_historical_series([currency],ticker,date_start,date_end)[1]
    #print(df_today_row)

    if len(df_ts_coins)>0:
        print(df_today_row)
        #df_today_row=df_today_row.drop(0)

        df_ts_coins=df_ts_coins[['id','date','prices','market_caps','total_vol']]
        df_ts_coins=pd.concat([df_ts_coins,df_today_row],axis=0)
        df_ts_coins.set_index('date',inplace=True)
        df_ts_coins.index=[pd.to_datetime(df_ts_coins.index[i]).strftime("%Y-%m-%d %H:%M:%S") for i in range(len(df_ts_coins))]

        # 2. Scrape macro
        df_cli=su.scrape_cli(past,today)
        df_cpi=su.scrape_cpi_employment()
        print(f'I have scraped CLI and L, CPI')

        # 3. Scrape google-trends
        google_data=su.scrape_google_trends(currency,ticker)
        print(f'Google trend dataset')

        # 4. Scrape Yahoo-Finance
        df_finance=su.scrape_stocks(past,today)
        print(f'yahoo dataset. I am done scraping !!!!!!!')

        #==== 5. CONCAT DATAFRAMES TOGETHER
        df_ts_coins.index=pd.to_datetime(df_ts_coins.index).strftime("%Y-%m-%d")
        df_cli.index=pd.to_datetime(df_cli.index).strftime("%Y-%m-%d")
        if len(df_cpi)>0:
            df_cpi.index=pd.to_datetime(df_cpi.index).strftime("%Y-%m-%d")
        else:
            print('MISSING CPI')
            df_cpi=pd.DataFrame({'CPI':np.repeat(0,len(df_cli)),'Employment':np.repeat(0,len(df_cli))})
            df_cpi.index=df_cli.index
        google_data.index=pd.to_datetime(google_data.index).strftime("%Y-%m-%d")
        df_finance.index=pd.to_datetime(df_finance.index).strftime("%Y-%m-%d")
        df_all=pd.concat([df_ts_coins,df_cli,df_cpi,google_data,df_finance],axis=1)
        df_all=df_all.sort_index()
    else:
        print('No data available.')
        df_all=pd.DataFrame()
    return df_all