From c8b07fc43f05e3ad2e562971cebdcd49d3aec4a2 Mon Sep 17 00:00:00 2001 From: justinpolygon <123573436+justinpolygon@users.noreply.github.com> Date: Tue, 5 Sep 2023 15:56:28 -0700 Subject: [PATCH] Added bulk agg download and reader scripts --- examples/rest/bulk_aggs_downloader.py | 97 +++++++++++++++++++++++++++ examples/rest/bulk_aggs_reader.py | 57 ++++++++++++++++ 2 files changed, 154 insertions(+) create mode 100644 examples/rest/bulk_aggs_downloader.py create mode 100644 examples/rest/bulk_aggs_reader.py diff --git a/examples/rest/bulk_aggs_downloader.py b/examples/rest/bulk_aggs_downloader.py new file mode 100644 index 00000000..c5c6099e --- /dev/null +++ b/examples/rest/bulk_aggs_downloader.py @@ -0,0 +1,97 @@ +import datetime +import concurrent.futures +import logging +from polygon import RESTClient +import signal +import sys +import pickle +import lz4.frame # type: ignore + +""" +This script performs the following tasks: + +1. Downloads aggregated market data (referred to as 'aggs') for specific stock symbols using the Polygon API. +2. Handles data for multiple dates and performs these operations in parallel to improve efficiency. +3. Saves the downloaded data in a compressed format (LZ4) using Python's pickle serialization. +4. Utilizes logging to track its progress and any potential errors. +5. Designed to be interruptible: listens for a Ctrl+C keyboard interrupt and exits gracefully when detected. + +Usage: +1. pip install lz4 +2. Set your Polygon API key in the environment variable 'POLYGON_API_KEY'. +3. Specify the date range and stock symbols you are interested in within the script. +4. Run the script. + +The script will create compressed '.pickle.lz4' files containing the aggs for each specified stock symbol and date. + +Note: This script is designed to be compatible with a data reader script, such as 'bulk_aggs_reader.py'. +""" + +# Set up logging +logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s") + + +def signal_handler(sig, frame): + print("You pressed Ctrl+C!") + sys.exit(0) + + +signal.signal(signal.SIGINT, signal_handler) + + +def get_aggs_for_symbol_and_date(symbol_date_pair): + """Retrieve aggs for a given symbol and date""" + symbol, date = symbol_date_pair + aggs = [] + client = RESTClient(trace=True) # Uses POLYGON_API_KEY environment variable + + for a in client.list_aggs( + symbol, + 1, + "minute", + date, + date, + limit=50000, + ): + aggs.append(a) + + print(len(aggs)) + + filename = f"{symbol}-aggs-{date}.pickle.lz4" + with open(filename, "wb") as file: + try: + compressed_data = lz4.frame.compress(pickle.dumps(aggs)) + file.write(compressed_data) + except TypeError as e: + print(f"Serialization Error: {e}") + + logging.info(f"Downloaded aggs for {date} and saved to {filename}") + + +def weekdays_between(start_date, end_date): + """Generate all weekdays between start_date and end_date""" + day = start_date + while day <= end_date: + if day.weekday() < 5: # 0-4 denotes Monday to Friday + yield day + day += datetime.timedelta(days=1) + + +def main(): + start_date = datetime.date(2023, 8, 1) + end_date = datetime.date(2023, 8, 31) + + symbols = ["TSLA", "AAPL", "HCP", "GOOG"] # The array of symbols you want + + dates = list(weekdays_between(start_date, end_date)) + + # Generate a list of (symbol, date) pairs + symbol_date_pairs = [(symbol, date) for symbol in symbols for date in dates] + + # Use ThreadPoolExecutor to download data in parallel + with concurrent.futures.ThreadPoolExecutor(max_workers=50) as executor: + executor.map(get_aggs_for_symbol_and_date, symbol_date_pairs) + + +if __name__ == "__main__": + main() diff --git a/examples/rest/bulk_aggs_reader.py b/examples/rest/bulk_aggs_reader.py new file mode 100644 index 00000000..fcc91915 --- /dev/null +++ b/examples/rest/bulk_aggs_reader.py @@ -0,0 +1,57 @@ +import lz4.frame # type: ignore +import pickle +import datetime + +""" +This script performs the following tasks: + +1. Reads aggregated market data ('aggs') for a specific stock symbol for multiple dates. +2. Data is read from compressed (LZ4) and pickled files, which should have been generated by a separate data downloading script. +3. Displays the read data to the console. +4. Handles exceptions gracefully: informs the user if a file for a specific date was not found or if any other error occurred. + +Usage: +1. pip install lz4 +2. Ensure that the compressed '.pickle.lz4' files for the specified stock symbol and date range exist in the same directory as this script. +3. Modify the date range and stock symbol in the script as per your requirements. +4. Run the script. + +The script will read and display the market data for each specified date and stock symbol. + +Note: This script is designed to be compatible with files generated by a data downloading script, such as 'bulk_aggs_downloader.py'. +""" + + +def read_trades_for_date(symbol, date): + """Reads trades for a given symbol and date, then prints them.""" + + # Construct the filename, similar to your writer script + filename = f"{symbol}-aggs-{date}.pickle.lz4" + + try: + with open(filename, "rb") as file: + compressed_data = file.read() + trades = pickle.loads(lz4.frame.decompress(compressed_data)) + print(trades) + return trades + except FileNotFoundError: + print(f"No file found for {date}") + except Exception as e: + print(f"An error occurred: {e}") + + +def main(): + start_date = datetime.date(2023, 8, 1) + end_date = datetime.date(2023, 8, 31) + symbol = "HCP" + + # Loop through each weekday between the start and end dates and read the trades + day = start_date + while day <= end_date: + if day.weekday() < 5: # 0-4 denotes Monday to Friday + read_trades_for_date(symbol, day) + day += datetime.timedelta(days=1) + + +if __name__ == "__main__": + main()