Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added bulk agg download and reader scripts #511

Merged
merged 2 commits into from
Sep 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
97 changes: 97 additions & 0 deletions examples/rest/bulk_aggs_downloader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import datetime
import concurrent.futures
import logging
from polygon import RESTClient
import signal
import sys
import pickle
import lz4.frame # type: ignore

"""
This script performs the following tasks:

1. Downloads aggregated market data (referred to as 'aggs') for specific stock symbols using the Polygon API.
2. Handles data for multiple dates and performs these operations in parallel to improve efficiency.
3. Saves the downloaded data in a compressed format (LZ4) using Python's pickle serialization.
4. Utilizes logging to track its progress and any potential errors.
5. Designed to be interruptible: listens for a Ctrl+C keyboard interrupt and exits gracefully when detected.

Usage:
1. pip install lz4
2. Set your Polygon API key in the environment variable 'POLYGON_API_KEY'.
3. Specify the date range and stock symbols you are interested in within the script.
4. Run the script.

The script will create compressed '.pickle.lz4' files containing the aggs for each specified stock symbol and date.

Note: This script is designed to be compatible with a data reader script, such as 'bulk_aggs_reader.py'.
"""

# Set up logging
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(message)s")


def signal_handler(sig, frame):
print("You pressed Ctrl+C!")
sys.exit(0)


signal.signal(signal.SIGINT, signal_handler)


def get_aggs_for_symbol_and_date(symbol_date_pair):
"""Retrieve aggs for a given symbol and date"""
symbol, date = symbol_date_pair
aggs = []
client = RESTClient(trace=True) # Uses POLYGON_API_KEY environment variable

for a in client.list_aggs(
symbol,
1,
"minute",
date,
date,
limit=50000,
):
aggs.append(a)

print(len(aggs))

filename = f"{symbol}-aggs-{date}.pickle.lz4"
with open(filename, "wb") as file:
try:
compressed_data = lz4.frame.compress(pickle.dumps(aggs))
file.write(compressed_data)
except TypeError as e:
print(f"Serialization Error: {e}")

logging.info(f"Downloaded aggs for {date} and saved to {filename}")


def weekdays_between(start_date, end_date):
"""Generate all weekdays between start_date and end_date"""
day = start_date
while day <= end_date:
if day.weekday() < 5: # 0-4 denotes Monday to Friday
yield day
day += datetime.timedelta(days=1)


def main():
start_date = datetime.date(2023, 8, 1)
end_date = datetime.date(2023, 8, 31)

symbols = ["TSLA", "AAPL", "HCP", "GOOG"] # The array of symbols you want

dates = list(weekdays_between(start_date, end_date))

# Generate a list of (symbol, date) pairs
symbol_date_pairs = [(symbol, date) for symbol in symbols for date in dates]

# Use ThreadPoolExecutor to download data in parallel
with concurrent.futures.ThreadPoolExecutor(max_workers=50) as executor:
executor.map(get_aggs_for_symbol_and_date, symbol_date_pairs)


if __name__ == "__main__":
main()
57 changes: 57 additions & 0 deletions examples/rest/bulk_aggs_reader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import lz4.frame # type: ignore
import pickle
import datetime

"""
This script performs the following tasks:

1. Reads aggregated market data ('aggs') for a specific stock symbol for multiple dates.
2. Data is read from compressed (LZ4) and pickled files, which should have been generated by a separate data downloading script.
3. Displays the read data to the console.
4. Handles exceptions gracefully: informs the user if a file for a specific date was not found or if any other error occurred.

Usage:
1. pip install lz4
2. Ensure that the compressed '.pickle.lz4' files for the specified stock symbol and date range exist in the same directory as this script.
3. Modify the date range and stock symbol in the script as per your requirements.
4. Run the script.

The script will read and display the market data for each specified date and stock symbol.

Note: This script is designed to be compatible with files generated by a data downloading script, such as 'bulk_aggs_downloader.py'.
"""


def read_trades_for_date(symbol, date):
"""Reads trades for a given symbol and date, then prints them."""

# Construct the filename, similar to your writer script
filename = f"{symbol}-aggs-{date}.pickle.lz4"

try:
with open(filename, "rb") as file:
compressed_data = file.read()
trades = pickle.loads(lz4.frame.decompress(compressed_data))
print(trades)
return trades
except FileNotFoundError:
print(f"No file found for {date}")
except Exception as e:
print(f"An error occurred: {e}")


def main():
start_date = datetime.date(2023, 8, 1)
end_date = datetime.date(2023, 8, 31)
symbol = "HCP"

# Loop through each weekday between the start and end dates and read the trades
day = start_date
while day <= end_date:
if day.weekday() < 5: # 0-4 denotes Monday to Friday
read_trades_for_date(symbol, day)
day += datetime.timedelta(days=1)


if __name__ == "__main__":
main()