Skip to content

Commit

Permalink
Merge pull request #12 from jk1mm/dev
Browse files Browse the repository at this point in the history
Merge dev to release
  • Loading branch information
jk1mm authored Jan 14, 2021
2 parents 5c8423a + 04d764a commit ee47742
Show file tree
Hide file tree
Showing 12 changed files with 271 additions and 22 deletions.
10 changes: 7 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ work in the making.

---
## Module Listing
* [Data](https://github.com/jk1mm/stock_market#data)
* [Analysis](https://github.com/jk1mm/stock_market#analysis)
* [Data](https://github.com/jk1mm/stock-market#data)
* [Analysis](https://github.com/jk1mm/stock-market#analysis)



Expand All @@ -33,8 +33,12 @@ work in the making.

### Analysis

#### [Market Analysis](stock_market/analysis)
#### [Market analysis](stock_market/analysis)
- **IPO**: Analysis on recent and upcoming IPO stocks
1) General success metrics on recent IPO bubble
2) Optimal sell day analysis
3) Individual stock performance views
- **Index**: Analysis on a market index
1) Stock categorization summary by industry
2) Index performance for different periodic times
3) Today's top and bottom performing stocks
25 changes: 25 additions & 0 deletions docs/analysis/indexes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
## Index Analysis


The **IndexView** module within the analysis directory contains data views for
specified index of interest. The following features are shown in the code
snippet below.

```python
# Python

# Import module
from stock_market.analysis.index import IndexView

# Let's look at the SP500 index information
index_sp500 = IndexView(index = "SP500")

# Get the list of SP500 stocks with the industry
print(index_sp500.data)

# Get today's top and bottom stock performances from this index
top, bottom = index_sp500.summary_stocks_today
print(top)
print(bottom)

```
1 change: 1 addition & 0 deletions requirements/requirements-dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@ tox
tox-wheel
black
pre-commit

1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
omit =
stock_market/data/_ipo.py
stock_market/analysis/ipo.py
stock_market/analysis/index.py
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

setup(
name="stock_market",
version="1.1.1",
version="1.1.2",
description="Modules related to stock market model.",
author="Josh Kim",
author_email="joshkim47@gmail.com",
Expand Down
206 changes: 206 additions & 0 deletions stock_market/analysis/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import importlib
import re
from typing import Optional, Tuple

import bs4
import pandas as pd
import requests

from stock_market.data.constants import (
SP500_URL,
PERFORMANCE_PERIODIC,
PERFORMANCE_TOP_STOCKS,
PERFORMERS_BOTTOM_STOCKS,
)

AVAILABLE_INDEX = ["SP500"]


class IndexView(object):
"""
Analysis on market indexes.
Parameters
----------
index: str
The market index of interest for analysis. Currently supports the indexes in AVAILABLE_INDEX.
"""

def __init__(self, index: str):

# Check availability of index
index_name = index.upper()
if index_name not in AVAILABLE_INDEX:
raise Warning(
f"Please select from the available indexes: {AVAILABLE_INDEX}"
)

# Extract specified index data
data = getattr(importlib.import_module("stock_market.data"), index_name)

# Column name constants
self._column_names = {
"ticker_symbol": "Ticker",
"ticker_full": "Name",
"ticker_sector": "Sector",
}

# Self stores
self.data = data
self.sector_list = list(set(data[self._column_names["ticker_sector"]]))

# Value from property
self._summary = dict()

@property
def summary_sector_view(self) -> pd.DataFrame:
"""
Summary of number of stocks by sector.
"""
if "sector_view" not in self._summary:
# Setup for metric population
data = self.data
ticker_symbol = self._column_names["ticker_symbol"]
ticker_sector = self._column_names["ticker_sector"]

# Number of stocks by sector
sector_count = dict()
sector_count["sector_count"] = (
data[
[
ticker_symbol,
ticker_sector,
]
]
.groupby([ticker_sector])
.count()
.to_dict()[ticker_symbol]
)

self._summary["sector_view"] = sector_count

# Populate sector count in pandas form
sector_count = pd.DataFrame.from_dict(self._summary["sector_view"])

return sector_count

@property
def summary_performance(self) -> pd.DataFrame:
"""
High level summary of index's periodic performance.
"""
if "performance" not in self._summary:
# Run scrape function to extract all metrics in one go
index_scrape = _sp500()
self._summary["performance"] = index_scrape[PERFORMANCE_PERIODIC]
self._summary["top_stocks"] = index_scrape[PERFORMANCE_TOP_STOCKS]
self._summary["bottom_stocks"] = index_scrape[PERFORMERS_BOTTOM_STOCKS]

periodic_performance = pd.DataFrame.from_dict(
{"periodic_performance": self._summary["performance"]}
)

# TODO: Properly sort the periodic time periods

return periodic_performance

@property
def summary_stocks_today(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Summary of today's top and bottom stock performances.
"""
if ("top_stocks" not in self._summary) or (
"bottom_stocks" not in self._summary
):
# Run scrape function to extract all metrics in one go
index_scrape = _sp500()
self._summary["performance"] = index_scrape[PERFORMANCE_PERIODIC]
self._summary["top_stocks"] = index_scrape[PERFORMANCE_TOP_STOCKS]
self._summary["bottom_stocks"] = index_scrape[PERFORMERS_BOTTOM_STOCKS]

return self._summary["top_stocks"], self._summary["bottom_stocks"]


# Scraper for sp500
def _sp500():
"""
Scraping SP500 information from MarketWatch. (link in constants folder in data directory)
"""
# Search and store the following information
ws_dict = dict()
for metric in [
PERFORMANCE_PERIODIC,
PERFORMANCE_TOP_STOCKS,
PERFORMERS_BOTTOM_STOCKS,
]:
# Regex search for the above metrics
regex = re.compile(f"element element--table ({metric})")

# Web Scraped data
ws_metric = bs4.BeautifulSoup(
requests.get(SP500_URL).content, "html.parser"
).find("div", {"class": regex})

# Check if data return requires a webscrape fix
if len(ws_metric) == 0:
print(f"The web-scrape metric name seems to be changed for {metric}.")
return None

ws_dict[metric] = ws_metric

# Extract data points for each metric
metric_data = {}

# 1) Performance per periods
data_1 = dict()
data = ws_dict[PERFORMANCE_PERIODIC].find_all("td")
for i in range(0, len(data), 2):
# Every even index represents the info and odd index represents the value
data_1[data[i].text.replace("\n", "")] = data[i + 1].text.replace("\n", "")

# 2) Top performing stocks today
data_2 = _stock_performers_ws(data=ws_dict[PERFORMANCE_TOP_STOCKS])

# 3) Bottom performing stocks today
data_3 = _stock_performers_ws(data=ws_dict[PERFORMERS_BOTTOM_STOCKS])

# All data store
metric_data[PERFORMANCE_PERIODIC] = data_1
metric_data[PERFORMANCE_TOP_STOCKS] = data_2
metric_data[PERFORMERS_BOTTOM_STOCKS] = data_3

return metric_data


# Helper function for _sp500()
def _stock_performers_ws(
data: bs4.element.Tag,
) -> Optional[pd.DataFrame]:
"""
Web scrapes the top and bottom performing stocks for an index in MarketWatch.
"""
data_ws = data.find_all("tr")

if len(data_ws) == 0:
return None

# Setup stock data
stock_data = []

# First row is the column names
col_names = list(filter(None, data_ws[0].text.split("\n")))

# Extract all other row info
for row in range(1, len(data_ws)):
stock_data.append(list(filter(None, data_ws[row].text.split("\n"))))

# Form pandas dataframe
data_df = pd.DataFrame(stock_data, columns=col_names)

return data_df
2 changes: 1 addition & 1 deletion stock_market/analysis/ipo.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def overall_summary(self) -> pd.DataFrame:
return self._overall_summary["stats"]
# TODO: Best OSD (using probability) by number of stocks and percent increase!!

def individual_summary(self, ticker: str):
def individual_summary(self, ticker: str) -> pd.DataFrame:
"""
Individual summary of recent IPOs.
Expand Down
7 changes: 3 additions & 4 deletions stock_market/data/_ipo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
import pandas as pd
import requests

# URL with IPO information (from MarketWatch)
IPO_URL = "https://www.marketwatch.com/tools/ipo-calendar"
from stock_market.data.constants import IPO_URL


class IPO(object):
Expand Down Expand Up @@ -52,8 +51,8 @@ def recent_ipo(self) -> pd.DataFrame:
del data["Symbol"]

# Removal of some characters in Price and Shares variable for type conversion
data["Price"] = data["Price"].str.replace("$", "")
data["Shares"] = data["Shares"].str.replace(",", "")
data["Price"] = data["Price"].str.replace("$", "", regex=True)
data["Shares"] = data["Shares"].str.replace(",", "", regex=True)

# Data type conversion
data = data.astype(
Expand Down
18 changes: 15 additions & 3 deletions stock_market/data/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,16 @@
from stock_market.data import SP500
# Data
# ----

# Stock categories (from S&P 500)
STOCK_CATEGORY = [industry.lower() for industry in list(set(SP500.Sector))]
# Ipo
IPO_URL = "https://www.marketwatch.com/tools/ipo-calendar"


# Analysis
# --------

# Index: SP500
SP500_URL = "https://www.marketwatch.com/investing/index/spx"
# WebScrape constants
PERFORMANCE_PERIODIC: str = "performance"
PERFORMANCE_TOP_STOCKS: str = "ByIndexGainers"
PERFORMERS_BOTTOM_STOCKS: str = "ByIndexDecliners"
Loading

0 comments on commit ee47742

Please sign in to comment.