diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 00000000..30ab1917 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "daily" diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index f3c5480b..e3f3bcb1 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -55,6 +55,7 @@ jobs: RH_2FA: ${{ secrets.RH_2FA }} IEXCLOUD: ${{ secrets.IEXCLOUD_SANDBOX }} S3_BUCKET: ${{ secrets.S3_DEV_BUCKET }} + APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} run: coverage run -m pytest -vv - name: Generate test coverage report diff --git a/.github/workflows/dividends.yml b/.github/workflows/dividends.yml index d166ed38..da412906 100644 --- a/.github/workflows/dividends.yml +++ b/.github/workflows/dividends.yml @@ -43,4 +43,5 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} S3_BUCKET: ${{ secrets.S3_BUCKET }} + APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} run: python scripts/update_dividends.py diff --git a/.github/workflows/sandbox.yml b/.github/workflows/sandbox.yml index 8b5745ad..92e5dff6 100644 --- a/.github/workflows/sandbox.yml +++ b/.github/workflows/sandbox.yml @@ -55,6 +55,7 @@ jobs: RH_2FA: ${{ secrets.RH_2FA }} IEXCLOUD: ${{ secrets.IEXCLOUD_SANDBOX }} S3_BUCKET: ${{ secrets.S3_DEV_BUCKET }} + APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} run: coverage run -m pytest -vv - name: Generate test coverage report @@ -72,6 +73,7 @@ jobs: env: IEXCLOUD: ${{ secrets.IEXCLOUD_SANDBOX }} S3_BUCKET: ${{ secrets.S3_DEV_BUCKET }} + APCA_API_KEY_ID: ${{ secrets.APCA_API_KEY_ID }} run: python scripts/update_dividends.py - name: Upload repo to S3 diff --git a/requirements.txt b/requirements.txt index b2148bf4..261c0dfe 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ python-dotenv == 0.13.0 -pandas == 1.0.5 +pandas == 1.1.0 robin-stocks == 1.3.0 -boto3 == 1.14.35 \ No newline at end of file +boto3 == 1.14.35 +polygon-api-client == 0.1.6 \ No newline at end of file diff --git a/scripts/update_dividends.py b/scripts/update_dividends.py index 84ab0bed..fb234680 100644 --- a/scripts/update_dividends.py +++ b/scripts/update_dividends.py @@ -1,18 +1,24 @@ import sys -from multiprocessing import Pool sys.path.append('src') -from DataSource import IEXCloud # noqa autopep8 - +from DataSource import IEXCloud, Polygon # noqa autopep8 iex = IEXCloud() +poly = Polygon() symbols = iex.get_symbols() +# Double redundancy -def multi_div(symbol): - # define fx to be pickled in multiprocessing - iex.save_dividends(symbol) - +for symbol in symbols: + # 1st pass + try: + iex.save_dividends(symbol=symbol, timeframe='3m') + except Exception as e: + print(f'IEX Cloud dividend update failed for {symbol}.') + print(e) -# save files as CSVs and uploads to S3 -with Pool() as p: - p.map(multi_div, symbols) + # 2nd pass + try: + poly.save_dividends(symbol=symbol, timeframe='max') + except Exception as e: + print(f'Polygon.io dividend update failed for {symbol}.') + print(e) diff --git a/src/Constants.py b/src/Constants.py index 626fef5b..5b092def 100644 --- a/src/Constants.py +++ b/src/Constants.py @@ -11,7 +11,14 @@ DEV_DIR = 'dev' DIV_DIR = 'dividends' SPLT_DIR = 'splits' -FULL_DIV_DIR = os.path.join(DATA_DIR, DIV_DIR) +IEX_DIR = 'iexcloud' +POLY_DIR = 'polygon' + +folders = { + 'iexcloud': IEX_DIR, + 'polygon': POLY_DIR +} +# PLYGN # Column Names # Symbols / Generic @@ -37,12 +44,13 @@ def get_symbols_path(self): 'symbols.csv' ) - def get_dividends_path(self, symbol): + def get_dividends_path(self, symbol, provider='iexcloud'): # given a symbol # return the path to its csv return os.path.join( DATA_DIR, DIV_DIR, + folders[provider], f'{symbol.upper()}.csv' ) diff --git a/src/DataSource.py b/src/DataSource.py index b56d6eec..c97825ac 100644 --- a/src/DataSource.py +++ b/src/DataSource.py @@ -1,6 +1,7 @@ import os import requests import pandas as pd +from polygon import RESTClient from dotenv import load_dotenv from FileOps import FileReader, FileWriter from Constants import PathFinder @@ -15,29 +16,60 @@ def __init__(self, broker=None): self.writer = FileWriter() self.reader = FileReader() self.finder = PathFinder() + self.provider = 'iexcloud' def get_symbols(self): # get cached list of symbols symbols_path = self.finder.get_symbols_path() return list(self.reader.load_csv(symbols_path)[C.SYMBOL]) - def get_dividends(self, symbol, timeframe=None): + def get_dividends(self, symbol, timeframe='max'): # given a symbol, return a cached dataframe - return self.reader.load_csv(self.finder.get_dividends_path(symbol)) + df = self.reader.load_csv( + self.finder.get_dividends_path(symbol, self.provider)) + filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) + return filtered + + def standardize_dividends(self, symbol, df): + full_mapping = dict( + zip( + ['exDate', 'paymentDate', 'declaredDate', 'amount'], + [C.EX, C.PAY, C.DEC, C.DIV] + ) + ) + mapping = {k: v for k, v in full_mapping.items() if k in df} + columns = list(mapping) + + df = df[columns].rename(columns=mapping) + filename = self.finder.get_dividends_path(symbol, self.provider) + + if C.EX in df and C.DIV in df: + df = self.reader.update_df( + filename, df, C.EX).sort_values(by=[C.EX]) + df[C.DIV] = df[C.DIV].apply(lambda amt: float(amt) if amt else 0) - def save_dividends(self, symbol, timeframe=None): - # given a symbol, save its dividend history - if timeframe: - df = self.get_dividends(symbol, timeframe) - else: - df = self.get_dividends(symbol) - self.writer.update_csv(self.finder.get_dividends_path(symbol), df) + return df - # def save_splits(self, symbol): - # # given a symbol, save its stock split history - # df = self.get_splits(symbol) + def save_dividends(self, **kwargs): + # given a symbol, save its dividend history + symbol = kwargs['symbol'] + df = self.get_dividends(**kwargs) + self.writer.update_csv( + self.finder.get_dividends_path(symbol, self.provider), df) + + # def get_splits(self, symbol, timeframe='max'): + # # given a symbol, return a cached dataframe + # df = self.reader.load_csv(self.finder.get_splits_path(symbol)) + # filtered = self.reader.data_in_timeframe(df, C.EX, timeframe) + # return filtered + + # def save_splits(self, **kwargs): + # # given a symbol, save its splits history + # symbol = kwargs['symbol'] + # df = self.get_splits(**kwargs) # self.writer.update_csv(self.finder.get_splits_path(symbol), df) + # make tiingo OR IEX CLOUD!! version of get dividends which # fetches existing dividend csv and adds a row if dividend # today or fetches last 5 years, joins with existing and updates if new @@ -50,6 +82,7 @@ def __init__(self, broker=None): self.base = 'https://cloud.iexapis.com' self.version = 'stable' self.token = os.environ['IEXCLOUD'] + self.provider = 'iexcloud' def get_endpoint(self, parts): # given a url @@ -84,16 +117,9 @@ def get_dividends(self, symbol, timeframe='3m'): if not response.ok or data == []: return empty - columns = ['exDate', 'paymentDate', 'declaredDate', 'amount'] - mapping = dict(zip(columns, [C.EX, C.PAY, C.DEC, C.DIV])) - df = pd.DataFrame(data)[columns].rename(columns=mapping) - - filename = self.finder.get_dividends_path(symbol) + df = pd.DataFrame(data) - df = self.reader.update_df(filename, df, C.EX).sort_values(by=[C.EX]) - df[C.DIV] = df[C.DIV].apply(lambda amt: float(amt) if amt else 0) - - return df + return self.standardize_dividends(symbol, df) # def get_splits(self, symbol): # # given a symbol, return the stock splits @@ -104,3 +130,17 @@ def get_dividends(self, symbol, timeframe='3m'): # ) # df = df[df['Stock Splits'] != 0] # return df + + +class Polygon(MarketData): + def __init__(self, broker=None): + super().__init__(broker=broker) + load_dotenv() + self.client = RESTClient(os.environ['APCA_API_KEY_ID']) + self.provider = 'polygon' + + def get_dividends(self, symbol, timeframe='max'): + response = self.client.reference_stock_dividends(symbol) + raw = pd.DataFrame(response.results) + df = self.standardize_dividends(symbol, raw) + return self.reader.data_in_timeframe(df, C.EX, timeframe) diff --git a/src/FileOps.py b/src/FileOps.py index 185be947..9e250ca1 100644 --- a/src/FileOps.py +++ b/src/FileOps.py @@ -1,7 +1,7 @@ import os import json import time -from datetime import datetime +from datetime import date, datetime, timedelta import pandas as pd from Storage import Store # consider combining fileoperations into one class @@ -49,6 +49,8 @@ def check_update(self, filename, df): def update_df(self, filename, new, column): old = self.load_csv(filename) if not old.empty: + old[column] = pd.to_datetime(old[column]) + new[column] = pd.to_datetime(new[column]) old = old[~old[column].isin(new[column])] new = old.append(new, ignore_index=True) return new @@ -56,6 +58,39 @@ def update_df(self, filename, new, column): def check_file_exists(self, filename): return os.path.exists(filename) and self.store.key_exists(filename) + def convert_delta(self, timeframe): + if timeframe == 'max': + return timedelta(days=36500) + + periods = {'y': 365, 'm': 30, 'w': 7, 'd': 1} + period = 'y' + idx = -1 + + for curr_period in periods: + idx = timeframe.find(curr_period) + if idx != -1: + period = curr_period + break + + if idx == -1: + supported = ', '.join(list(periods)) + error_msg = f'Only certain suffixes ({supported}) are supported.' + raise ValueError(error_msg) + + num = int(timeframe[:idx]) + days = periods[period] * num + delta = timedelta(days=days) + + return delta + + def data_in_timeframe(self, df, col, timeframe='max'): + if col not in df: + return df + delta = self.convert_delta(timeframe) + df[col] = pd.to_datetime(df[col]) + filtered = df[df[col] > pd.to_datetime(date.today() - delta)] + return filtered + class FileWriter: # file write operations diff --git a/src/Storage.py b/src/Storage.py index d3c5cd19..f761a72c 100644 --- a/src/Storage.py +++ b/src/Storage.py @@ -42,9 +42,12 @@ def get_all_keys(self): def key_exists(self, key, download=False): try: - s3 = boto3.resource('s3') - bucket = s3.Bucket(self.bucket_name) - bucket.Object(key).load() + if download: + self.download_file(key) + else: + s3 = boto3.resource('s3') + bucket = s3.Bucket(self.bucket_name) + bucket.Object(key).load() except ClientError: return False else: diff --git a/test/test_Constants.py b/test/test_Constants.py index 068e9b18..00966240 100644 --- a/test/test_Constants.py +++ b/test/test_Constants.py @@ -14,8 +14,10 @@ def test_get_symbols_path(self): assert finder.get_symbols_path() == 'data/symbols.csv' def test_get_dividends_path(self): - assert finder.get_dividends_path('aapl') == 'data/dividends/AAPL.csv' - assert finder.get_dividends_path('AMD') == 'data/dividends/AMD.csv' + assert finder.get_dividends_path( + 'aapl') == 'data/dividends/iexcloud/AAPL.csv' + assert finder.get_dividends_path( + 'AMD') == 'data/dividends/iexcloud/AMD.csv' def test_get_splits_path(self): assert finder.get_splits_path('aapl') == 'data/splits/AAPL.csv' diff --git a/test/test_DataSource.py b/test/test_DataSource.py index fa124276..d69180b6 100644 --- a/test/test_DataSource.py +++ b/test/test_DataSource.py @@ -2,12 +2,14 @@ import sys from time import sleep from random import choice +import pandas as pd sys.path.append('src') -from DataSource import MarketData, IEXCloud # noqa autopep8 +from DataSource import MarketData, IEXCloud, Polygon # noqa autopep8 import Constants as C # noqa autopep8 md = MarketData() iex = IEXCloud() +poly = Polygon() if not os.environ.get('CI'): iex.token = os.environ['IEXCLOUD_SANDBOX'] iex.base = 'https://sandbox.iexapis.com' @@ -20,6 +22,7 @@ def test_init(self): assert hasattr(md, 'writer') assert hasattr(md, 'reader') assert hasattr(md, 'finder') + assert hasattr(md, 'provider') def test_get_symbols(self): symbols = set(md.get_symbols()) @@ -33,19 +36,43 @@ def test_get_dividends(self): assert len(df[df[C.EX] < '2015-12-25']) > 0 assert len(df[df[C.EX] > '2020-01-01']) > 0 + def test_standardize_dividends(self): + columns = ['exDate', 'paymentDate', 'declaredDate', 'amount'] + new_cols = [C.EX, C.PAY, C.DEC, C.DIV] + sel_idx = 2 + selected = columns[sel_idx:] + df = pd.DataFrame({column: [0] for column in columns}) + standardized = md.standardize_dividends('AAPL', df) + for column in new_cols: + assert column in standardized + + df.drop(columns=selected, inplace=True) + standardized = md.standardize_dividends('AAPL', df) + for curr_idx, column in enumerate(new_cols): + col_in_df = column in standardized + assert col_in_df if curr_idx < sel_idx else not col_in_df + def test_save_dividends(self): symbol = 'O' div_path = md.finder.get_dividends_path(symbol) test_path = f'{div_path}_TEST' - if not os.path.exists(div_path): - md.writer.store.download_file(div_path) - md.writer.rename_file(div_path, test_path) + + if os.path.exists(div_path): + os.remove(div_path) + elif os.path.exists(test_path): + os.remove(test_path) + + if md.writer.store.key_exists(div_path, download=True): + md.writer.rename_file(div_path, test_path) + else: + md.writer.store.download_file(test_path) + assert not md.reader.check_file_exists(div_path) retries = 10 delay = choice(range(5, 10)) for _ in range(retries): - iex.save_dividends(symbol, '5y') + iex.save_dividends(symbol=symbol, timeframe='5y') if not md.reader.check_file_exists(div_path): sleep(delay) else: @@ -62,6 +89,7 @@ def test_init(self): assert hasattr(iex, 'base') assert hasattr(iex, 'version') assert hasattr(iex, 'token') + assert hasattr(iex, 'provider') def test_get_endpoint(self): parts = [ @@ -82,3 +110,16 @@ def test_get_dividends(self): if len(df) > 0: assert {C.EX, C.PAY, C.DEC, C.DIV}.issubset(df.columns) + + +class TestPolygon: + def test_init(self): + assert type(poly).__name__ == 'Polygon' + assert hasattr(poly, 'client') + assert hasattr(poly, 'provider') + + def test_get_dividends(self): + df = poly.get_dividends('AAPL', '5y') + assert type(df).__name__ == 'DataFrame' + assert {C.EX, C.PAY, C.DEC, C.DIV}.issubset(df.columns) + assert len(df) > 0 diff --git a/test/test_FileOps.py b/test/test_FileOps.py index 04468061..7e97ada2 100644 --- a/test/test_FileOps.py +++ b/test/test_FileOps.py @@ -1,5 +1,7 @@ import os import sys +from datetime import timedelta +import pytest import pandas as pd sys.path.append('src') from FileOps import FileReader, FileWriter # noqa autopep8 @@ -137,3 +139,19 @@ def test_update_df(self): def test_check_file_exists(self): assert not reader.check_file_exists('a') assert reader.check_file_exists(symbols_path) + + def test_convert_delta(self): + assert reader.convert_delta('1d') == timedelta(days=1) + assert reader.convert_delta('3d') == timedelta(days=3) + + assert reader.convert_delta('1w') == timedelta(days=7) + assert reader.convert_delta('3w') == timedelta(days=21) + + assert reader.convert_delta('1m') == timedelta(days=30) + assert reader.convert_delta('3m') == timedelta(days=90) + + assert reader.convert_delta('1y') == timedelta(days=365) + assert reader.convert_delta('3y') == timedelta(days=1095) + + with pytest.raises(ValueError): + reader.convert_delta('0')