diff --git a/.github/dependabot.yml b/.github/dependabot.yml index 972eb3c6..11dbad47 100644 --- a/.github/dependabot.yml +++ b/.github/dependabot.yml @@ -9,3 +9,9 @@ updates: directory: "/" # Location of package manifests schedule: interval: "monthly" + + - package-ecosystem: "github-actions" + directory: "/" + schedule: + # Check for updates to GitHub Actions monthly + interval: "monthly" diff --git a/.github/workflows/sandbox.yml b/.github/workflows/sandbox.yml index 07cb2b72..4c36998e 100644 --- a/.github/workflows/sandbox.yml +++ b/.github/workflows/sandbox.yml @@ -65,5 +65,8 @@ jobs: - name: Update dividends run: python scripts/update_dividends.py + - name: Update splits + run: python scripts/update_splits.py + - name: Upload repo to S3 run: python3 scripts/update_repo.py diff --git a/.github/workflows/splits.yml b/.github/workflows/splits.yml new file mode 100644 index 00000000..e9aa1e10 --- /dev/null +++ b/.github/workflows/splits.yml @@ -0,0 +1,47 @@ +# This workflow will automatically update data files +# For more information see: https://help.github.com/en/actions/reference/events-that-trigger-workflows#scheduled-events-schedule + +name: Splits + +on: + schedule: + - cron: "30 12 1 * *" + # 8:30am EST + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - name: Checkout repo + uses: actions/checkout@v2 + with: + ref: ${{ github.head_ref }} + + - name: Set up Python 3.8 + uses: actions/setup-python@v2 + with: + python-version: 3.8 + + - name: Cache pip dependencies + uses: actions/cache@v2 + with: + path: ~/.cache/pip + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + if [ -f requirements.txt ]; then pip install -r requirements.txt; fi + + - name: Update splits + env: + IEXCLOUD: ${{ secrets.IEXCLOUD }} + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} + S3_BUCKET: ${{ secrets.S3_BUCKET }} + APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} + run: python scripts/update_splits.py diff --git a/README.md b/README.md index 920651e8..3594d05e 100644 --- a/README.md +++ b/README.md @@ -70,9 +70,9 @@ Using Robinhood 2FA, we can simply provide our MFA one-time password in the `.en - [x] ![Symbols](https://github.com/suchak1/scarlett/workflows/Symbols/badge.svg) - [ ] EOD OHLCV - [ ] Intraday OHLCV 5 min ticks -- [ ] Actions +- [x] Actions - [x] ![Dividends](https://github.com/suchak1/scarlett/workflows/Dividends/badge.svg) - - [ ] Splits + - [x] ![Splits](https://github.com/suchak1/scarlett/workflows/Splits/badge.svg) - [ ] Sentiment - [ ] News Sentiment - [ ] Social Sentiment diff --git a/scripts/update_splits.py b/scripts/update_splits.py new file mode 100644 index 00000000..0b137084 --- /dev/null +++ b/scripts/update_splits.py @@ -0,0 +1,24 @@ +import sys +sys.path.append('src') +from DataSource import IEXCloud, Polygon # noqa autopep8 + +iex = IEXCloud() +poly = Polygon() +symbols = iex.get_symbols() + +# Double redundancy + +for symbol in symbols: + # 1st pass + try: + iex.save_splits(symbol=symbol, timeframe='3m') + except Exception as e: + print(f'IEX Cloud split update failed for {symbol}.') + print(e) + + # 2nd pass + try: + poly.save_splits(symbol=symbol, timeframe='max') + except Exception as e: + print(f'Polygon.io split update failed for {symbol}.') + print(e) diff --git a/src/Constants.py b/src/Constants.py index 5b092def..3e7de7e6 100644 --- a/src/Constants.py +++ b/src/Constants.py @@ -32,6 +32,9 @@ DEC = 'Dec' # Declaration Date PAY = 'Pay' # Payment Date +# Splits +RATIO = 'Ratio' + class PathFinder: def make_path(self, path): @@ -54,12 +57,13 @@ def get_dividends_path(self, symbol, provider='iexcloud'): f'{symbol.upper()}.csv' ) - def get_splits_path(self, symbol): + def get_splits_path(self, symbol, provider='iexcloud'): # given a symbol # return the path to its stock splits return os.path.join( DATA_DIR, SPLT_DIR, + folders[provider], f'{symbol.upper()}.csv' ) diff --git a/src/DataSource.py b/src/DataSource.py index 146555e1..065b8708 100644 --- a/src/DataSource.py +++ b/src/DataSource.py @@ -30,6 +30,21 @@ def get_dividends(self, symbol, timeframe='max'): filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) return filtered + def standardize(self, symbol, df, full_mapping, fx, columns, default): + mapping = {k: v for k, v in full_mapping.items() if k in df} + + df = df[list(mapping)].rename(columns=mapping) + filename = fx(symbol, self.provider) + time_col, val_col = columns + + if time_col in df and val_col in df: + df = self.reader.update_df( + filename, df, time_col).sort_values(by=[time_col]) + df[val_col] = df[val_col].apply( + lambda val: float(val) if val else default) + + return df + def standardize_dividends(self, symbol, df): full_mapping = dict( zip( @@ -37,18 +52,14 @@ def standardize_dividends(self, symbol, df): [C.EX, C.PAY, C.DEC, C.DIV] ) ) - mapping = {k: v for k, v in full_mapping.items() if k in df} - columns = list(mapping) - - df = df[columns].rename(columns=mapping) - filename = self.finder.get_dividends_path(symbol, self.provider) - - if C.EX in df and C.DIV in df: - df = self.reader.update_df( - filename, df, C.EX).sort_values(by=[C.EX]) - df[C.DIV] = df[C.DIV].apply(lambda amt: float(amt) if amt else 0) - - return df + return self.standardize( + symbol, + df, + full_mapping, + self.finder.get_dividends_path, + [C.EX, C.DIV], + 0 + ) def save_dividends(self, **kwargs): # given a symbol, save its dividend history @@ -57,18 +68,35 @@ def save_dividends(self, **kwargs): self.writer.update_csv( self.finder.get_dividends_path(symbol, self.provider), df) - # def get_splits(self, symbol, timeframe='max'): - # # given a symbol, return a cached dataframe - # df = self.reader.load_csv(self.finder.get_splits_path(symbol)) - # filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) - # return filtered + def get_splits(self, symbol, timeframe='max'): + # given a symbol, return a cached dataframe + df = self.reader.load_csv( + self.finder.get_splits_path(symbol, self.provider)) + filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) + return filtered - # def save_splits(self, **kwargs): - # # given a symbol, save its splits history - # symbol = kwargs['symbol'] - # df = self.get_splits(**kwargs) - # self.writer.update_csv(self.finder.get_splits_path(symbol), df) + def standardize_splits(self, symbol, df): + full_mapping = dict( + zip( + ['exDate', 'paymentDate', 'declaredDate', 'ratio'], + [C.EX, C.PAY, C.DEC, C.RATIO] + ) + ) + return self.standardize( + symbol, + df, + full_mapping, + self.finder.get_splits_path, + [C.EX, C.RATIO], + 1 + ) + def save_splits(self, **kwargs): + # given a symbol, save its splits history + symbol = kwargs['symbol'] + df = self.get_splits(**kwargs) + self.writer.update_csv( + self.finder.get_splits_path(symbol, self.provider), df) # make tiingo OR IEX CLOUD!! version of get dividends which # fetches existing dividend csv and adds a row if dividend @@ -121,15 +149,34 @@ def get_dividends(self, symbol, timeframe='3m'): return self.standardize_dividends(symbol, df) - # def get_splits(self, symbol): - # # given a symbol, return the stock splits - # ticker = yf.Ticker(symbol.replace('.', '-')) - # df = ticker.actions.reset_index().drop( - # 'Dividends', - # axis=1 - # ) - # df = df[df['Stock Splits'] != 0] - # return df + def get_splits(self, symbol, timeframe='3m'): + # given a symbol, return the stock splits + category = 'stock' + dataset = 'splits' + parts = [ + self.base, + self.version, + category, + symbol.lower(), + dataset, + timeframe + ] + endpoint = self.get_endpoint(parts) + response = requests.get(endpoint) + empty = pd.DataFrame() + + if response.ok: + data = response.json() + # self.writer.save_json(f'data/{symbol}.json', data) + else: + print(f'Invalid response from IEX for {symbol} splits.') + + if not response.ok or data == []: + return empty + + df = pd.DataFrame(data) + + return self.standardize_splits(symbol, df) class Polygon(MarketData): @@ -144,3 +191,11 @@ def get_dividends(self, symbol, timeframe='max'): raw = pd.DataFrame(response.results) df = self.standardize_dividends(symbol, raw) return self.reader.data_in_timeframe(df, C.EX, timeframe) + + def get_splits(self, symbol, timeframe='max'): + response = self.client.reference_stock_splits(symbol) + raw = pd.DataFrame(response.results) + df = self.standardize_splits(symbol, raw) + return self.reader.data_in_timeframe(df, C.EX, timeframe) + +# newShares = oldShares / ratio diff --git a/test/test_Constants.py b/test/test_Constants.py index 00966240..98b98a15 100644 --- a/test/test_Constants.py +++ b/test/test_Constants.py @@ -20,5 +20,7 @@ def test_get_dividends_path(self): 'AMD') == 'data/dividends/iexcloud/AMD.csv' def test_get_splits_path(self): - assert finder.get_splits_path('aapl') == 'data/splits/AAPL.csv' - assert finder.get_splits_path('AMD') == 'data/splits/AMD.csv' + assert finder.get_splits_path( + 'aapl') == 'data/splits/iexcloud/AAPL.csv' + assert finder.get_splits_path( + 'AMD') == 'data/splits/iexcloud/AMD.csv' diff --git a/test/test_DataSource.py b/test/test_DataSource.py index 5d0734de..ed71043e 100644 --- a/test/test_DataSource.py +++ b/test/test_DataSource.py @@ -21,6 +21,7 @@ iex.base = 'https://sandbox.iexapis.com' iex.base = 'https://sandbox.iexapis.com' exp_symbols = ['AAPL', 'FB', 'DIS'] +retries = 10 class TestMarketData: @@ -67,11 +68,10 @@ def test_save_dividends(self): if os.path.exists(div_path): os.rename(div_path, temp_path) - retries = 10 - delay = choice(range(5, 10)) for _ in range(retries): iex.save_dividends(symbol=symbol, timeframe='5y') if not md.reader.check_file_exists(div_path): + delay = choice(range(5, 10)) sleep(delay) else: break @@ -85,6 +85,52 @@ def test_save_dividends(self): if os.path.exists(temp_path): os.rename(temp_path, div_path) + def test_get_splits(self): + df = md.get_splits('NFLX') + assert {C.EX, C.DEC, C.RATIO}.issubset(df.columns) + assert len(df) > 0 + + def test_standardize_splits(self): + columns = ['exDate', 'paymentDate', 'declaredDate', 'ratio'] + new_cols = [C.EX, C.PAY, C.DEC, C.RATIO] + sel_idx = 2 + selected = columns[sel_idx:] + df = pd.DataFrame({column: [0] for column in columns}) + standardized = md.standardize_splits('NFLX', df) + for column in new_cols: + assert column in standardized + + df.drop(columns=selected, inplace=True) + standardized = md.standardize_splits('NFLX', df) + for curr_idx, column in enumerate(new_cols): + col_in_df = column in standardized + assert col_in_df if curr_idx < sel_idx else not col_in_df + + def test_save_splits(self): + symbol = 'NFLX' + splt_path = md.finder.get_splits_path(symbol) + temp_path = f'{splt_path}_TEMP' + + if os.path.exists(splt_path): + os.rename(splt_path, temp_path) + + for _ in range(retries): + iex.save_splits(symbol=symbol, timeframe='5y') + if not md.reader.check_file_exists(splt_path): + delay = choice(range(5, 10)) + sleep(delay) + else: + break + + assert md.reader.check_file_exists(splt_path) + assert md.reader.store.modified_delta(splt_path).total_seconds() < 60 + df = md.reader.load_csv(splt_path) + assert {C.EX, C.DEC, C.RATIO}.issubset(df.columns) + assert len(df) > 0 + + if os.path.exists(temp_path): + os.rename(temp_path, splt_path) + class TestIEXCloud: def test_init(self): @@ -108,11 +154,35 @@ def test_get_endpoint(self): assert 'token' in endpoint def test_get_dividends(self): - df = iex.get_dividends('AAPL', '5y') - assert type(df).__name__ == 'DataFrame' + df = [] + + for i in range(retries): + if not len(df): + df = iex.get_dividends('AAPL', '5y') + if not i: + delay = choice(range(5, 10)) + sleep(delay) + else: + break - if len(df) > 0: - assert {C.EX, C.PAY, C.DEC, C.DIV}.issubset(df.columns) + assert len(df) > 0 + assert {C.EX, C.PAY, C.DEC, C.DIV}.issubset(df.columns) + + def test_get_splits(self): + df1, df2 = [], [] + for i in range(retries): + if not(len(df1) or len(df2)): + df1 = iex.get_splits('AAPL', '5y') + df2 = iex.get_splits('NFLX', '5y') + if not i: + delay = choice(range(5, 10)) + sleep(delay) + else: + break + + assert len(df1) or len(df2) + assert {C.EX, C.DEC, C.RATIO}.issubset( + df1.columns) or {C.EX, C.DEC, C.RATIO}.issubset(df2.columns) class TestPolygon: @@ -123,6 +193,10 @@ def test_init(self): def test_get_dividends(self): df = poly.get_dividends('AAPL', '5y') - assert type(df).__name__ == 'DataFrame' assert {C.EX, C.PAY, C.DEC, C.DIV}.issubset(df.columns) assert len(df) > 0 + + def test_get_splits(self): + df = poly.get_splits('AAPL') + assert {C.EX, C.DEC, C.RATIO}.issubset(df.columns) + assert len(df) > 0 diff --git a/test/test_FileOps.py b/test/test_FileOps.py index 061b7748..44440da5 100644 --- a/test/test_FileOps.py +++ b/test/test_FileOps.py @@ -1,6 +1,5 @@ import os import sys -import time from datetime import timedelta import pytest import pandas as pd @@ -8,11 +7,24 @@ from FileOps import FileReader, FileWriter # noqa autopep8 import Constants as C # noqa autopep8 -run_id = str(time.time()).replace('.', '_') +reader = FileReader() +writer = FileWriter() + +run_id = '' +if not os.environ.get('CI'): + reader.store.bucket_name = os.environ['S3_DEV_BUCKET'] + writer.store.bucket_name = os.environ['S3_DEV_BUCKET'] +else: + run_id = os.environ['RUN_ID'] + +symbols_path = reader.store.finder.get_symbols_path() json_path1 = 'test/test1.json' json_path2 = 'test/test2.json' +csv_path1 = f'test/test1_{run_id}.csv' +csv_path2 = f'test/test2_{run_id}.csv' + empty = {} data = [ { @@ -34,28 +46,15 @@ 'volume': 102265, 'date': '2015-01-15' } + data_ = data[:] data_.append(snippet) -csv_path1 = f'test/test1_{run_id}.csv' -csv_path2 = f'test/test2_{run_id}.csv' test_df = pd.DataFrame(data) big_df = pd.DataFrame(data_) small_df = pd.DataFrame([snippet]) empty_df = pd.DataFrame() -reader = FileReader() -writer = FileWriter() - -run_id = '' -if not os.environ.get('CI'): - reader.store.bucket_name = os.environ['S3_DEV_BUCKET'] - writer.store.bucket_name = os.environ['S3_DEV_BUCKET'] -else: - run_id = os.environ['RUN_ID'] - -symbols_path = reader.store.finder.get_symbols_path() - class TestFileWriter: def test_init(self): diff --git a/test/test_Storage.py b/test/test_Storage.py index 3486360b..d157ce1e 100644 --- a/test/test_Storage.py +++ b/test/test_Storage.py @@ -70,8 +70,8 @@ def test_download_file(self): assert os.path.exists(symbols_path) def test_rename_key(self): - src_path = f'{symbols_path}_{run_id}_SRC' - dst_path = f'{symbols_path}_{run_id}_DST' + src_path = f'{symbols_path}_{run_id}_SRC2' + dst_path = f'{symbols_path}_{run_id}_DST2' assert not store.key_exists(src_path) store.copy_object(symbols_path, src_path)