diff --git a/.github/workflows/intraday.yml b/.github/workflows/intraday.yml index 4c8233b2..c3ef702a 100644 --- a/.github/workflows/intraday.yml +++ b/.github/workflows/intraday.yml @@ -44,4 +44,5 @@ jobs: AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} S3_BUCKET: ${{ secrets.S3_BUCKET }} APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} + POLYGON: ${{ secrets.POLYGON }} run: python scripts/update_intraday.py diff --git a/.github/workflows/ohlc.yml b/.github/workflows/ohlc.yml index 866d0294..4cdb7ec6 100644 --- a/.github/workflows/ohlc.yml +++ b/.github/workflows/ohlc.yml @@ -44,4 +44,5 @@ jobs: AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} S3_BUCKET: ${{ secrets.S3_BUCKET }} APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} + POLYGON: ${{ secrets.POLYGON }} run: python scripts/update_ohlc.py diff --git a/.github/workflows/sandbox.yml b/.github/workflows/sandbox.yml index 028aa0c0..69f1a315 100644 --- a/.github/workflows/sandbox.yml +++ b/.github/workflows/sandbox.yml @@ -16,9 +16,11 @@ env: STOCKTWITS: ${{ secrets.STOCKTWITS }} S3_BUCKET: ${{ secrets.S3_DEV_BUCKET }} APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} + POLYGON: ${{ secrets.POLYGON }} AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + TEST: true jobs: build: @@ -72,5 +74,8 @@ jobs: - name: Update OHLC run: python scripts/update_ohlc.py + - name: Update social sentiment + run: python scripts/update_sentiment.py + - name: Upload repo to S3 run: python scripts/update_repo.py diff --git a/requirements.txt b/requirements.txt index 85585283..09cb1895 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pandas == 1.1.2 robin-stocks == 1.5.2 boto3 == 1.15.9 polygon-api-client == 0.1.8 +pytz == 2020.1 diff --git a/scripts/update_dividends.py b/scripts/update_dividends.py index 8287fc15..ce7f8fa8 100644 --- a/scripts/update_dividends.py +++ b/scripts/update_dividends.py @@ -3,8 +3,8 @@ from multiprocessing import Process sys.path.append('src') from DataSource import IEXCloud, Polygon # noqa autopep8 -from Constants import CI, PathFinder # noqa autopep8 - +from Constants import PathFinder # noqa autopep8 +import Constants as C # noqa autopep8 iex = IEXCloud() poly = Polygon() @@ -18,14 +18,15 @@ def update_iex_dividends(): for symbol in symbols: try: - iex.save_dividends(symbol=symbol, timeframe='3m') + iex.save_dividends(symbol=symbol, timeframe='3m', + retries=1 if C.TEST else C.DEFAULT_RETRIES) except Exception as e: print(f'IEX Cloud dividend update failed for {symbol}.') print(e) finally: filename = PathFinder().get_dividends_path( symbol=symbol, provider=iex.provider) - if CI and os.path.exists(filename): + if C.CI and os.path.exists(filename): os.remove(filename) # 2nd pass @@ -33,14 +34,15 @@ def update_iex_dividends(): def update_poly_dividends(): for symbol in symbols: try: - poly.save_dividends(symbol=symbol, timeframe='3m') + poly.save_dividends(symbol=symbol, timeframe='3m', + retries=1 if C.TEST else C.DEFAULT_RETRIES) except Exception as e: print(f'Polygon.io dividend update failed for {symbol}.') print(e) finally: filename = PathFinder().get_dividends_path( symbol=symbol, provider=poly.provider) - if CI and os.path.exists(filename): + if C.CI and os.path.exists(filename): os.remove(filename) diff --git a/scripts/update_hist_ohlc.py b/scripts/update_hist_ohlc.py index 36cdbde6..113f1518 100644 --- a/scripts/update_hist_ohlc.py +++ b/scripts/update_hist_ohlc.py @@ -1,23 +1,24 @@ import os import sys +from time import sleep from multiprocessing import Process sys.path.append('src') from DataSource import IEXCloud, Polygon # noqa autopep8 -from Constants import CI, PathFinder # noqa autopep8 +from Constants import CI, PathFinder, POLY_CRYPTO_SYMBOLS # noqa autopep8 iex = IEXCloud() -poly = Polygon() -symbols = iex.get_symbols() -symbols = symbols[140:] - +poly_stocks = Polygon() +poly_crypto = Polygon(os.environ['POLYGON']) +stock_symbols = iex.get_symbols()[140:] +crypto_symbols = POLY_CRYPTO_SYMBOLS # Double redundancy # 1st pass def update_iex_ohlc(): - for symbol in symbols: + for symbol in stock_symbols: try: iex.save_ohlc(symbol=symbol, timeframe='max') except Exception as e: @@ -31,21 +32,41 @@ def update_iex_ohlc(): # 2nd pass -def update_poly_ohlc(): - for symbol in symbols: +def update_poly_stocks_ohlc(): + for symbol in stock_symbols: + try: + poly_stocks.save_ohlc(symbol=symbol, timeframe='max') + except Exception as e: + print(f'Polygon.io OHLC update failed for {symbol}.') + print(e) + finally: + filename = PathFinder().get_ohlc_path( + symbol=symbol, provider=poly_stocks.provider) + if CI and os.path.exists(filename): + os.remove(filename) +# Crypto pass + + +def update_poly_crypto_ohlc(): + for idx, symbol in enumerate(crypto_symbols): try: - poly.save_ohlc(symbol=symbol, timeframe='max') + poly_crypto.save_ohlc(symbol=symbol, timeframe='max') except Exception as e: print(f'Polygon.io OHLC update failed for {symbol}.') print(e) finally: filename = PathFinder().get_ohlc_path( - symbol=symbol, provider=poly.provider) + symbol=symbol, provider=poly_crypto.provider) if CI and os.path.exists(filename): os.remove(filename) + if idx != len(crypto_symbols) - 1: + sleep(60 // len(crypto_symbols) + 5) + p1 = Process(target=update_iex_ohlc) -p2 = Process(target=update_poly_ohlc) +p2 = Process(target=update_poly_stocks_ohlc) +p3 = Process(target=update_poly_crypto_ohlc) p1.start() p2.start() +p3.start() diff --git a/scripts/update_ohlc.py b/scripts/update_ohlc.py index 1625c039..7ad16a3c 100644 --- a/scripts/update_ohlc.py +++ b/scripts/update_ohlc.py @@ -1,50 +1,75 @@ import os import sys +from time import sleep from multiprocessing import Process sys.path.append('src') from DataSource import IEXCloud, Polygon # noqa autopep8 -from Constants import CI, PathFinder # noqa autopep8 - +from Constants import PathFinder # noqa autopep8 +import Constants as C # noqa autopep8 iex = IEXCloud() -poly = Polygon() -symbols = iex.get_symbols() - +poly_stocks = Polygon() +poly_crypto = Polygon(os.environ['POLYGON']) +stock_symbols = iex.get_symbols() +crypto_symbols = C.POLY_CRYPTO_SYMBOLS # Double redundancy # 1st pass def update_iex_ohlc(): - for symbol in symbols: + for symbol in stock_symbols: try: - iex.save_ohlc(symbol=symbol, timeframe='1d') + iex.save_ohlc(symbol=symbol, timeframe='1d', + retries=1 if C.TEST else C.DEFAULT_RETRIES) except Exception as e: print(f'IEX Cloud OHLC update failed for {symbol}.') print(e) finally: filename = PathFinder().get_ohlc_path( symbol=symbol, provider=iex.provider) - if CI and os.path.exists(filename): + if C.CI and os.path.exists(filename): os.remove(filename) # 2nd pass -def update_poly_ohlc(): - for symbol in symbols: +def update_poly_stocks_ohlc(): + for symbol in stock_symbols: + try: + poly_stocks.save_ohlc(symbol=symbol, timeframe='1d', + retries=1 if C.TEST else C.DEFAULT_RETRIES) + except Exception as e: + print(f'Polygon.io OHLC update failed for {symbol}.') + print(e) + finally: + filename = PathFinder().get_ohlc_path( + symbol=symbol, provider=poly_stocks.provider) + if C.CI and os.path.exists(filename): + os.remove(filename) +# Crypto pass + + +def update_poly_crypto_ohlc(): + for idx, symbol in enumerate(crypto_symbols): try: - poly.save_ohlc(symbol=symbol, timeframe='1d') + poly_crypto.save_ohlc(symbol=symbol, timeframe='1d', + retries=1 if C.TEST else C.DEFAULT_RETRIES) except Exception as e: print(f'Polygon.io OHLC update failed for {symbol}.') print(e) finally: filename = PathFinder().get_ohlc_path( - symbol=symbol, provider=poly.provider) - if CI and os.path.exists(filename): + symbol=symbol, provider=poly_crypto.provider) + if C.CI and os.path.exists(filename): os.remove(filename) + if idx != len(crypto_symbols) - 1: + sleep(60 // len(crypto_symbols) + 5) + p1 = Process(target=update_iex_ohlc) -p2 = Process(target=update_poly_ohlc) +p2 = Process(target=update_poly_stocks_ohlc) +p3 = Process(target=update_poly_crypto_ohlc) p1.start() p2.start() +p3.start() diff --git a/scripts/update_sentiment.py b/scripts/update_sentiment.py index b85237ab..538083c4 100644 --- a/scripts/update_sentiment.py +++ b/scripts/update_sentiment.py @@ -2,25 +2,33 @@ import sys sys.path.append('src') from DataSource import StockTwits # noqa autopep8 +from Constants import PathFinder # noqa autopep8 import Constants as C # noqa autopep8 -from Constants import CI, PathFinder # noqa autopep8 twit = StockTwits() symbols = twit.get_symbols() -symbols.extend(['BTC-X', 'ETH-X', 'LTC-X', 'XMR-X', 'IOT-X']) +crypto_symbols = ['BTC-X', 'ETH-X', 'LTC-X', 'XMR-X', 'IOT-X'] +if C.TEST: + symbols = crypto_symbols + twit.token = '' +else: + symbols.extend(crypto_symbols) BATCH = int(os.environ.get('BATCH')) if os.environ.get('BATCH') else 1 # better solution is to dynamically choose 175 most outdated symbols # First batch for symbol in symbols[C.TWIT_RATE*(BATCH-1):C.TWIT_RATE*BATCH]: + if symbol in C.SENTIMENT_SYMBOLS_IGNORE: + continue try: - twit.save_social_sentiment(symbol=symbol, timeframe='1d') + twit.save_social_sentiment(symbol=symbol, timeframe='1d', + retries=1 if C.TEST else 2) except Exception as e: print(f'Stocktwits sentiment update failed for {symbol}.') print(e) finally: filename = PathFinder().get_sentiment_path( symbol=symbol, provider=twit.provider) - if CI and os.path.exists(filename): + if C.CI and os.path.exists(filename): os.remove(filename) diff --git a/scripts/update_splits.py b/scripts/update_splits.py index 8b8e1b99..110d9f29 100644 --- a/scripts/update_splits.py +++ b/scripts/update_splits.py @@ -3,8 +3,8 @@ from multiprocessing import Process sys.path.append('src') from DataSource import IEXCloud, Polygon # noqa autopep8 -from Constants import CI, PathFinder # noqa autopep8 - +from Constants import PathFinder # noqa autopep8 +import Constants as C # noqa autopep8 iex = IEXCloud() poly = Polygon() @@ -18,14 +18,15 @@ def update_iex_splits(): for symbol in symbols: try: - iex.save_splits(symbol=symbol, timeframe='3m') + iex.save_splits(symbol=symbol, timeframe='3m', + retries=1 if C.TEST else C.DEFAULT_RETRIES) except Exception as e: print(f'IEX Cloud split update failed for {symbol}.') print(e) finally: filename = PathFinder().get_splits_path( symbol=symbol, provider=iex.provider) - if CI and os.path.exists(filename): + if C.CI and os.path.exists(filename): os.remove(filename) # 2nd pass @@ -33,14 +34,15 @@ def update_iex_splits(): def update_poly_splits(): for symbol in symbols: try: - poly.save_splits(symbol=symbol, timeframe='3m') + poly.save_splits(symbol=symbol, timeframe='3m', + retries=1 if C.TEST else C.DEFAULT_RETRIES) except Exception as e: print(f'Polygon.io split update failed for {symbol}.') print(e) finally: filename = PathFinder().get_splits_path( symbol=symbol, provider=poly.provider) - if CI and os.path.exists(filename): + if C.CI and os.path.exists(filename): os.remove(filename) diff --git a/src/Constants.py b/src/Constants.py index cb89b1b3..e857d05a 100644 --- a/src/Constants.py +++ b/src/Constants.py @@ -3,11 +3,17 @@ from dotenv import load_dotenv load_dotenv() + + +def get_env_bool(var_name): + return bool(os.environ.get(var_name) + and os.environ[var_name].lower() == 'true') + + # Environment -DEV = bool(os.environ.get('DEV') - and os.environ['DEV'].lower() == 'true') -CI = bool(os.environ.get('CI') - and os.environ['CI'].lower() == 'true') +DEV = get_env_bool('DEV') +CI = get_env_bool('CI') +TEST = get_env_bool('TEST') # File Paths # data @@ -59,6 +65,22 @@ DELTA = 'Delta' TWIT_RATE = 175 +# Misc +POLY_CRYPTO_SYMBOLS = [ + 'X%3ABTCUSD', 'X%3AETHUSD', + 'X%3ALTCUSD', 'X%3AXMRUSD', 'X%3AIOTUSD' +] + +SENTIMENT_SYMBOLS_IGNORE = { + 'SPYD', 'VWDRY', 'BPMP', + 'FOX', 'YYY', 'SDIV', + 'DIV', 'SHECY', 'PALL', + 'DWDP', 'TFCF', 'SPAR', + 'TMUSR', 'OXY+', 'BNTX^'} + +DEFAULT_RETRIES = 3 +DEFAULT_DELAY = 2 + class PathFinder: def make_path(self, path): diff --git a/src/DataSource.py b/src/DataSource.py index fbb82cb9..63e92925 100644 --- a/src/DataSource.py +++ b/src/DataSource.py @@ -1,6 +1,8 @@ import os import requests +from time import sleep import pandas as pd +from pytz import timezone from operator import attrgetter from datetime import datetime, timedelta from polygon import RESTClient @@ -9,17 +11,32 @@ from Constants import PathFinder import Constants as C -# MAKE market data class (broker=None): -# if broker, then use broker.get_hist else use default get_hist (tiingo?) - class MarketData: - def __init__(self, broker=None): + def __init__(self): self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() self.provider = 'iexcloud' + def try_again(self, func, **kwargs): + retries = (kwargs['retries'] + if 'retries' in kwargs + else C.DEFAULT_RETRIES) + delay = (kwargs['delay'] + if 'delay' in kwargs + else C.DEFAULT_DELAY) + func_args = {k: v for k, v in kwargs.items() if k not in { + 'retries', 'delay'}} + for retry in range(retries): + try: + return func(**func_args) + except Exception as e: + if retry == retries - 1: + raise e + else: + sleep(delay) + def get_symbols(self): # get cached list of symbols symbols_path = self.finder.get_symbols_path() @@ -224,9 +241,9 @@ def save_intraday(self): class IEXCloud(MarketData): - def __init__(self, broker=None): - super().__init__(broker=broker) + def __init__(self): load_dotenv() + super().__init__() self.base = 'https://cloud.iexapis.com' self.version = 'stable' self.token = os.environ['IEXCLOUD'] @@ -241,120 +258,127 @@ def get_endpoint(self, parts, raw_params=[]): endpoint = f'{url}?{params}' return endpoint - def get_dividends(self, symbol, timeframe='3m'): + def get_dividends(self, **kwargs): # given a symbol, return the dividend history - category = 'stock' - dataset = 'dividends' - parts = [ - self.base, - self.version, - category, - symbol.lower(), - dataset, - timeframe - ] - endpoint = self.get_endpoint(parts) - response = requests.get(endpoint) - empty = pd.DataFrame() - - if response.ok: - data = [datum for datum in response.json() if datum['flag'] - == 'Cash' and datum['currency'] == 'USD'] - # self.writer.save_json(f'data/{symbol}.json', data) - else: - print(f'Invalid response from IEX for {symbol} dividends.') - - if not response.ok or data == []: - return empty - - df = pd.DataFrame(data) - - return self.standardize_dividends(symbol, df) - - def get_splits(self, symbol, timeframe='3m'): + def _get_dividends(symbol, timeframe='3m'): + category = 'stock' + dataset = 'dividends' + parts = [ + self.base, + self.version, + category, + symbol.lower(), + dataset, + timeframe + ] + endpoint = self.get_endpoint(parts) + response = requests.get(endpoint) + empty = pd.DataFrame() + + if response.ok: + data = [datum for datum in response.json() if datum['flag'] + == 'Cash' and datum['currency'] == 'USD'] + else: + raise Exception( + f'Invalid response from IEX for {symbol} dividends.') + + if data == []: + return empty + + df = self.standardize_dividends(symbol, pd.DataFrame(data)) + return self.reader.data_in_timeframe(df, C.EX, timeframe) + + return self.try_again(func=_get_dividends, **kwargs) + + def get_splits(self, **kwargs): # given a symbol, return the stock splits - category = 'stock' - dataset = 'splits' - parts = [ - self.base, - self.version, - category, - symbol.lower(), - dataset, - timeframe - ] - endpoint = self.get_endpoint(parts) - response = requests.get(endpoint) - empty = pd.DataFrame() - - if response.ok: - data = response.json() - else: - print(f'Invalid response from IEX for {symbol} splits.') - - if not response.ok or data == []: - return empty - - df = pd.DataFrame(data) - - return self.standardize_splits(symbol, df) - - def get_prev_ohlc(self, symbol): - # given a symbol, return the prev day's ohlc - category = 'stock' - dataset = 'previous' - parts = [ - self.base, - self.version, - category, - symbol.lower(), - dataset - ] - endpoint = self.get_endpoint(parts) - response = requests.get(endpoint) - empty = pd.DataFrame() - - if response.ok: - data = response.json() - else: - print(f'Invalid response from IEX for {symbol} OHLC.') - - if not response.ok or data == []: - return empty - - df = pd.DataFrame([data]) - - return self.standardize_ohlc(symbol, df) - - def get_ohlc(self, symbol, timeframe='1m'): - if timeframe == '1d': - return self.get_prev_ohlc(symbol) - - category = 'stock' - dataset = 'chart' - parts = [ - self.base, - self.version, - category, - symbol.lower(), - dataset, - timeframe - ] - endpoint = self.get_endpoint(parts) - response = requests.get(endpoint) - empty = pd.DataFrame() - - if response.ok: - data = response.json() - else: - print(f'Invalid response from IEX for {symbol} OHLC.') - - if not response.ok or data == []: - return empty - - df = pd.DataFrame(data) - - return self.standardize_ohlc(symbol, df) + def _get_splits(symbol, timeframe='3m'): + category = 'stock' + dataset = 'splits' + parts = [ + self.base, + self.version, + category, + symbol.lower(), + dataset, + timeframe + ] + endpoint = self.get_endpoint(parts) + response = requests.get(endpoint) + empty = pd.DataFrame() + + if response.ok: + data = response.json() + else: + raise Exception( + f'Invalid response from IEX for {symbol} splits.') + + if data == []: + return empty + + df = self.standardize_splits(symbol, pd.DataFrame(data)) + return self.reader.data_in_timeframe(df, C.EX, timeframe) + + return self.try_again(func=_get_splits, **kwargs) + + def get_ohlc(self, **kwargs): + def _get_prev_ohlc(symbol): + category = 'stock' + dataset = 'previous' + parts = [ + self.base, + self.version, + category, + symbol.lower(), + dataset + ] + endpoint = self.get_endpoint(parts) + response = requests.get(endpoint) + empty = pd.DataFrame() + + if response.ok: + data = response.json() + else: + raise Exception( + f'Invalid response from IEX for {symbol} OHLC.') + + if data == []: + return empty + + df = pd.DataFrame([data]) + return self.standardize_ohlc(symbol, df) + + def _get_ohlc(symbol, timeframe='1m'): + if timeframe == '1d': + return _get_prev_ohlc(symbol) + + category = 'stock' + dataset = 'chart' + parts = [ + self.base, + self.version, + category, + symbol.lower(), + dataset, + timeframe + ] + endpoint = self.get_endpoint(parts) + response = requests.get(endpoint) + empty = pd.DataFrame() + + if response.ok: + data = response.json() + else: + raise Exception( + f'Invalid response from IEX for {symbol} OHLC.') + + if data == []: + return empty + + df = self.standardize_ohlc(symbol, pd.DataFrame(data)) + return self.reader.data_in_timeframe(df, C.TIME, timeframe) + + return self.try_again(func=_get_ohlc, **kwargs) # extra_hrs should be True if possible def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=True): @@ -366,56 +390,64 @@ def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=True): class Polygon(MarketData): - def __init__(self, broker=None): - super().__init__(broker=broker) + def __init__(self, token=os.environ['APCA_API_KEY_ID']): load_dotenv() - self.client = RESTClient(os.environ['APCA_API_KEY_ID']) + super().__init__() + self.client = RESTClient(token) self.provider = 'polygon' - def get_dividends(self, symbol, timeframe='max'): - response = self.client.reference_stock_dividends(symbol) - raw = pd.DataFrame(response.results) - df = self.standardize_dividends(symbol, raw) - return self.reader.data_in_timeframe(df, C.EX, timeframe) - - def get_splits(self, symbol, timeframe='max'): - response = self.client.reference_stock_splits(symbol) - raw = pd.DataFrame(response.results) - df = self.standardize_splits(symbol, raw) - return self.reader.data_in_timeframe(df, C.EX, timeframe) - - def get_prev_ohlc(self, symbol): - today = datetime.today() - one_day = timedelta(days=1) - yesterday = today - one_day - formatted_date = yesterday.strftime('%Y-%m-%d') - response = self.client.stocks_equities_daily_open_close( - symbol, formatted_date, unadjusted=False) - raw = attrgetter('from_', 'open', 'high', 'low', - 'close', 'volume')(response) - labels = ['date', 'open', 'high', 'low', 'close', 'volume'] - data = dict(zip(labels, raw)) - df = pd.DataFrame([data]) - return self.standardize_ohlc(symbol, df) - - def get_ohlc(self, symbol, timeframe='max'): - if timeframe == '1d': - return self.get_prev_ohlc(symbol) - end = datetime.today() - delta = self.reader.convert_delta(timeframe) - start = end - delta - formatted_start = start.strftime('%Y-%m-%d') - formatted_end = end.strftime('%Y-%m-%d') - response = self.client.stocks_equities_aggregates( - symbol, 1, 'day', - from_=formatted_start, to=formatted_end, unadjusted=False - ).results - columns = {'t': 'date', 'o': 'open', 'h': 'high', - 'l': 'low', 'c': 'close', 'v': 'volume'} - df = pd.DataFrame(response).rename(columns=columns) - df['date'] = df['date'].apply( - lambda x: datetime.fromtimestamp(int(x)/1000)) - return self.standardize_ohlc(symbol, df) + def get_dividends(self, **kwargs): + def _get_dividends(symbol, timeframe='max'): + response = self.client.reference_stock_dividends(symbol) + raw = pd.DataFrame(response.results) + df = self.standardize_dividends(symbol, raw) + return self.reader.data_in_timeframe(df, C.EX, timeframe) + return self.try_again(func=_get_dividends, **kwargs) + + def get_splits(self, **kwargs): + def _get_splits(symbol, timeframe='max'): + response = self.client.reference_stock_splits(symbol) + raw = pd.DataFrame(response.results) + df = self.standardize_splits(symbol, raw) + return self.reader.data_in_timeframe(df, C.EX, timeframe) + return self.try_again(func=_get_splits, **kwargs) + + def get_ohlc(self, **kwargs): + def _get_prev_ohlc(symbol): + today = datetime.now(timezone('US/Eastern')) + one_day = timedelta(days=1) + yesterday = today - one_day + formatted_date = yesterday.strftime('%Y-%m-%d') + response = self.client.stocks_equities_daily_open_close( + symbol, formatted_date, unadjusted=False) + raw = attrgetter('from_', 'open', 'high', 'low', + 'close', 'volume')(response) + labels = ['date', 'open', 'high', 'low', 'close', 'volume'] + data = dict(zip(labels, raw)) + df = pd.DataFrame([data]) + return self.standardize_ohlc(symbol, df) + + def _get_ohlc(symbol, timeframe='max'): + if timeframe == '1d': + return _get_prev_ohlc(symbol) + end = datetime.now(timezone('US/Eastern')) + delta = self.reader.convert_delta(timeframe) + start = end - delta + formatted_start = start.strftime('%Y-%m-%d') + formatted_end = end.strftime('%Y-%m-%d') + response = self.client.stocks_equities_aggregates( + symbol, 1, 'day', + from_=formatted_start, to=formatted_end, unadjusted=False + ).results + columns = {'t': 'date', 'o': 'open', 'h': 'high', + 'l': 'low', 'c': 'close', 'v': 'volume'} + df = pd.DataFrame(response).rename(columns=columns) + df['date'] = df['date'].apply( + lambda x: datetime.fromtimestamp(int(x)/1000)) + df = self.standardize_ohlc(symbol, df) + return self.reader.data_in_timeframe(df, C.TIME, timeframe) + + return self.try_again(func=_get_ohlc, **kwargs) def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=True): # pass min directly into stock_aggs function as multiplier @@ -424,63 +456,68 @@ def get_intraday(self, symbol, min=1, timeframe='max', extra_hrs=True): class StockTwits(MarketData): - def __init__(self, broker=None): - super().__init__(broker=broker) + def __init__(self): load_dotenv() + super().__init__() self.provider = 'stocktwits' self.token = os.environ.get('STOCKTWITS') - def get_social_volume(self, symbol, timeframe='max'): - vol_res = requests.get(( - f'https://api.stocktwits.com/api/2/symbols/{symbol}/volume.json' - f'?access_token={self.token}' - )) - empty = pd.DataFrame() - - if vol_res.ok: - vol_data = vol_res.json()['data'] - else: - print(f'Invalid response from Stocktwits for {symbol}') - - if not vol_res.ok or vol_data == []: - return empty - - vol_data.sort(key=lambda x: x['timestamp']) - # FIX THIS!! WHAT HAPPENS WHEN AROUND NEW YEAR'S WHEN JANUARY IS CONSIDERED EARLIER THAN DEC?? - # actually should be fine bc data is in YYYY-MM-DD format - vol_data.pop() - df = pd.DataFrame(vol_data) - std = self.standardize_volume(symbol, df) - if timeframe == '1d': - filtered = std.tail(1) - else: - filtered = self.reader.data_in_timeframe(std, C.TIME, timeframe)[ - [C.TIME, C.VOL, C.DELTA]] - return filtered - - def get_social_sentiment(self, symbol, timeframe='max'): - sen_res = requests.get(( - f'https://api.stocktwits.com/api/2/symbols/{symbol}/sentiment.json' - f'?access_token={self.token}' - )) - empty = pd.DataFrame() - - if sen_res.ok: - sen_data = sen_res.json()['data'] - else: - print(f'Invalid response from Stocktwits for {symbol}.') - - if not sen_res.ok or sen_data == []: - return empty - - sen_data.sort(key=lambda x: x['timestamp']) - # FIX THIS!! WHAT HAPPENS WHEN AROUND NEW YEAR'S WHEN JANUARY IS CONSIDERED EARLIER THAN DEC?? - # actually should be fine bc data is in YYYY-MM-DD format - sen_data.pop() - df = pd.DataFrame(sen_data) - std = self.standardize_sentiment(symbol, df) - if timeframe == '1d': - filtered = std.tail(1) - else: - filtered = self.reader.data_in_timeframe(std, C.TIME, timeframe) - return filtered + def get_social_volume(self, **kwargs): + def _get_social_volume(symbol, timeframe='max'): + vol_res = requests.get(( + f'https://api.stocktwits.com/api/2/symbols/{symbol}' + f'/volume.json?access_token={self.token}' + )) + empty = pd.DataFrame() + + if vol_res.ok: + vol_data = vol_res.json()['data'] + else: + raise Exception( + f'Invalid response from Stocktwits for {symbol}') + + if vol_data == []: + return empty + + vol_data.sort(key=lambda x: x['timestamp']) + vol_data.pop() + df = pd.DataFrame(vol_data) + std = self.standardize_volume(symbol, df) + if timeframe == '1d': + filtered = std.tail(1) + else: + filtered = self.reader.data_in_timeframe( + std, C.TIME, timeframe) + [[C.TIME, C.VOL, C.DELTA]] + return filtered + + return self.try_again(func=_get_social_volume, **kwargs) + + def get_social_sentiment(self, **kwargs): + def _get_social_sentiment(symbol, timeframe='max'): + sen_res = requests.get(( + f'https://api.stocktwits.com/api/2/symbols/{symbol}' + f'/sentiment.json?access_token={self.token}' + )) + empty = pd.DataFrame() + + if sen_res.ok: + sen_data = sen_res.json()['data'] + else: + raise Exception( + f'Invalid response from Stocktwits for {symbol}.') + + if sen_data == []: + return empty + + sen_data.sort(key=lambda x: x['timestamp']) + sen_data.pop() + df = pd.DataFrame(sen_data) + std = self.standardize_sentiment(symbol, df) + if timeframe == '1d': + filtered = std.tail(1) + else: + filtered = self.reader.data_in_timeframe( + std, C.TIME, timeframe) + return filtered + return self.try_again(func=_get_social_sentiment, **kwargs) diff --git a/test/test_DataSource.py b/test/test_DataSource.py index bdba2c68..57b8f0b2 100644 --- a/test/test_DataSource.py +++ b/test/test_DataSource.py @@ -45,7 +45,7 @@ def test_get_symbols(self): assert symbol in symbols def test_get_dividends(self): - df = md.get_dividends('AAPL') + df = md.get_dividends(symbol='AAPL') assert {C.EX, C.PAY, C.DEC, C.DIV}.issubset(df.columns) assert len(df) > 15 assert len(df[df[C.EX] < '2015-12-25']) > 0 @@ -76,7 +76,8 @@ def test_save_dividends(self): os.rename(div_path, temp_path) for _ in range(retries): - iex.save_dividends(symbol=symbol, timeframe='5y') + iex.save_dividends( + symbol=symbol, timeframe='5y', retries=1, delay=0) if not md.reader.check_file_exists(div_path): delay = choice(range(5, 10)) sleep(delay) @@ -122,7 +123,7 @@ def test_save_splits(self): os.rename(splt_path, temp_path) for _ in range(retries): - iex.save_splits(symbol=symbol, timeframe='5y') + iex.save_splits(symbol=symbol, timeframe='5y', retries=1, delay=0) if not md.reader.check_file_exists(splt_path): delay = choice(range(5, 10)) sleep(delay) @@ -156,7 +157,8 @@ def test_save_social_sentiment(self): if os.path.exists(sent_path): os.rename(sent_path, temp_path) - twit.save_social_sentiment(symbol=symbol, timeframe='1d') + twit.save_social_sentiment( + symbol=symbol, timeframe='1d', retries=1, delay=0) assert md.reader.check_file_exists(sent_path) assert md.reader.store.modified_delta(sent_path).total_seconds() < 60 @@ -224,7 +226,7 @@ def test_save_ohlc(self): os.rename(ohlc_path, temp_path) for _ in range(retries): - iex.save_ohlc(symbol=symbol, timeframe='1m') + iex.save_ohlc(symbol=symbol, timeframe='1m', retries=1, delay=0) if not md.reader.check_file_exists(ohlc_path): delay = choice(range(5, 10)) sleep(delay) @@ -258,7 +260,8 @@ def test_save_ohlc(self): # break # assert md.reader.check_file_exists(intra_path) - # assert md.reader.store.modified_delta(intra_path).total_seconds() < 60 + # assert md.reader.store.modified_delta( + # intra_path).total_seconds() < 60) # df = md.reader.load_csv(intra_path) # assert {C.TIME, C.OPEN, C.HIGH, C.LOW, # C.CLOSE, C.VOL}.issubset(df.columns) @@ -301,7 +304,7 @@ def test_get_dividends(self): for i in range(retries): if not len(df): - df = iex.get_dividends('AAPL', '5y') + df = iex.get_dividends(symbol='AAPL', timeframe='5y') if not i: delay = choice(range(5, 10)) sleep(delay) @@ -315,8 +318,8 @@ def test_get_splits(self): df1, df2 = [], [] for i in range(retries): if not(len(df1) or len(df2)): - df1 = iex.get_splits('AAPL', '5y') - df2 = iex.get_splits('NFLX', '5y') + df1 = iex.get_splits(symbol='AAPL', timeframe='5y') + df2 = iex.get_splits(symbol='NFLX', timeframe='5y') if not i: delay = choice(range(5, 10)) sleep(delay) @@ -328,7 +331,7 @@ def test_get_splits(self): df1.columns) or {C.EX, C.DEC, C.RATIO}.issubset(df2.columns) def test_get_ohlc(self): - df = iex.get_ohlc('AAPL', '1m') + df = iex.get_ohlc(symbol='AAPL', timeframe='1m') assert {C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL}.issubset(df.columns) assert len(df) > 10 @@ -347,17 +350,17 @@ def test_init(self): assert hasattr(poly, 'provider') def test_get_dividends(self): - df = poly.get_dividends('AAPL', '5y') + df = poly.get_dividends(symbol='AAPL', timeframe='5y') assert {C.EX, C.PAY, C.DEC, C.DIV}.issubset(df.columns) assert len(df) > 0 def test_get_splits(self): - df = poly.get_splits('AAPL') + df = poly.get_splits(symbol='AAPL') assert {C.EX, C.DEC, C.RATIO}.issubset(df.columns) assert len(df) > 0 def test_get_ohlc(self): - df = poly.get_ohlc('AAPL', '1m') + df = poly.get_ohlc(symbol='AAPL', timeframe='1m') assert {C.TIME, C.OPEN, C.HIGH, C.LOW, C.CLOSE, C.VOL}.issubset(df.columns) assert len(df) > 10 @@ -376,11 +379,11 @@ def test_init(self): assert hasattr(twit, 'token') def test_get_social_volume(self): - df = twit.get_social_volume('TSLA') + df = twit.get_social_volume(symbol='TSLA') assert len(df) > 30 assert {C.TIME, C.VOL, C.DELTA}.issubset(df.columns) def test_get_social_sentiment(self): - df = twit.get_social_sentiment('TSLA') + df = twit.get_social_sentiment(symbol='TSLA') assert len(df) > 30 assert {C.TIME, C.POS, C.NEG}.issubset(df.columns) diff --git a/util/get_loc.sh b/util/get_loc.sh index ab675885..08f16d7c 100644 --- a/util/get_loc.sh +++ b/util/get_loc.sh @@ -1,5 +1,5 @@ # get total lines of code -git ls-files | xargs cat | wc -l +git ls-files | grep -v 'img/*' | xargs cat | wc -l # git ls-files | xargs wc -l # git diff --stat `git hash-object -t tree /dev/null`