Merge pull request #12 from jk1mm/dev

Merge dev to release
jk1mm · Jan 14, 2021 · ee47742 · ee47742
2 parents 5c8423a + 04d764a
commit ee47742
Show file tree

Hide file tree

Showing 12 changed files with 271 additions and 22 deletions.
diff --git a/README.md b/README.md
@@ -11,8 +11,8 @@ work in the making.
 
 ---
 ## Module Listing
- * [Data](https://github.com/jk1mm/stock_market#data)
- * [Analysis](https://github.com/jk1mm/stock_market#analysis)
+ * [Data](https://github.com/jk1mm/stock-market#data)
+ * [Analysis](https://github.com/jk1mm/stock-market#analysis)
 
 
 
@@ -33,8 +33,12 @@ work in the making.
 
 ### Analysis
 
-#### [Market Analysis](stock_market/analysis)
+#### [Market analysis](stock_market/analysis)
 - **IPO**: Analysis on recent and upcoming IPO stocks
     1) General success metrics on recent IPO bubble 
     2) Optimal sell day analysis
     3) Individual stock performance views
+- **Index**: Analysis on a market index
+    1) Stock categorization summary by industry
+    2) Index performance for different periodic times
+    3) Today's top and bottom performing stocks
diff --git a/docs/analysis/indexes.md b/docs/analysis/indexes.md
@@ -0,0 +1,25 @@
+## Index Analysis
+
+
+The **IndexView** module within the analysis directory contains data views for 
+specified index of interest. The following features are shown in the code
+snippet below.
+
+```python
+# Python
+
+# Import module
+from stock_market.analysis.index import IndexView
+
+# Let's look at the SP500 index information
+index_sp500 = IndexView(index = "SP500")
+
+# Get the list of SP500 stocks with the industry
+print(index_sp500.data)
+
+# Get today's top and bottom stock performances from this index
+top, bottom = index_sp500.summary_stocks_today
+print(top)
+print(bottom)
+
+```
diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt
@@ -4,3 +4,4 @@ tox
 tox-wheel
 black
 pre-commit
+
diff --git a/setup.cfg b/setup.cfg
@@ -2,3 +2,4 @@
 omit =
     stock_market/data/_ipo.py
     stock_market/analysis/ipo.py
+    stock_market/analysis/index.py
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name="stock_market",
-    version="1.1.1",
+    version="1.1.2",
     description="Modules related to stock market model.",
     author="Josh Kim",
     author_email="joshkim47@gmail.com",

diff --git a/stock_market/analysis/index.py b/stock_market/analysis/index.py
@@ -0,0 +1,206 @@
+import importlib
+import re
+from typing import Optional, Tuple
+
+import bs4
+import pandas as pd
+import requests
+
+from stock_market.data.constants import (
+    SP500_URL,
+    PERFORMANCE_PERIODIC,
+    PERFORMANCE_TOP_STOCKS,
+    PERFORMERS_BOTTOM_STOCKS,
+)
+
+AVAILABLE_INDEX = ["SP500"]
+
+
+class IndexView(object):
+    """
+    Analysis on market indexes.
+
+    Parameters
+    ----------
+    index: str
+        The market index of interest for analysis. Currently supports the indexes in AVAILABLE_INDEX.
+
+    """
+
+    def __init__(self, index: str):
+
+        # Check availability of index
+        index_name = index.upper()
+        if index_name not in AVAILABLE_INDEX:
+            raise Warning(
+                f"Please select from the available indexes: {AVAILABLE_INDEX}"
+            )
+
+        # Extract specified index data
+        data = getattr(importlib.import_module("stock_market.data"), index_name)
+
+        # Column name constants
+        self._column_names = {
+            "ticker_symbol": "Ticker",
+            "ticker_full": "Name",
+            "ticker_sector": "Sector",
+        }
+
+        # Self stores
+        self.data = data
+        self.sector_list = list(set(data[self._column_names["ticker_sector"]]))
+
+        # Value from property
+        self._summary = dict()
+
+    @property
+    def summary_sector_view(self) -> pd.DataFrame:
+        """
+        Summary of number of stocks by sector.
+
+        """
+        if "sector_view" not in self._summary:
+            # Setup for metric population
+            data = self.data
+            ticker_symbol = self._column_names["ticker_symbol"]
+            ticker_sector = self._column_names["ticker_sector"]
+
+            # Number of stocks by sector
+            sector_count = dict()
+            sector_count["sector_count"] = (
+                data[
+                    [
+                        ticker_symbol,
+                        ticker_sector,
+                    ]
+                ]
+                .groupby([ticker_sector])
+                .count()
+                .to_dict()[ticker_symbol]
+            )
+
+            self._summary["sector_view"] = sector_count
+
+        # Populate sector count in pandas form
+        sector_count = pd.DataFrame.from_dict(self._summary["sector_view"])
+
+        return sector_count
+
+    @property
+    def summary_performance(self) -> pd.DataFrame:
+        """
+        High level summary of index's periodic performance.
+
+        """
+        if "performance" not in self._summary:
+            # Run scrape function to extract all metrics in one go
+            index_scrape = _sp500()
+            self._summary["performance"] = index_scrape[PERFORMANCE_PERIODIC]
+            self._summary["top_stocks"] = index_scrape[PERFORMANCE_TOP_STOCKS]
+            self._summary["bottom_stocks"] = index_scrape[PERFORMERS_BOTTOM_STOCKS]
+
+        periodic_performance = pd.DataFrame.from_dict(
+            {"periodic_performance": self._summary["performance"]}
+        )
+
+        # TODO: Properly sort the periodic time periods
+
+        return periodic_performance
+
+    @property
+    def summary_stocks_today(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
+        """
+        Summary of today's top and bottom stock performances.
+
+        """
+        if ("top_stocks" not in self._summary) or (
+            "bottom_stocks" not in self._summary
+        ):
+            # Run scrape function to extract all metrics in one go
+            index_scrape = _sp500()
+            self._summary["performance"] = index_scrape[PERFORMANCE_PERIODIC]
+            self._summary["top_stocks"] = index_scrape[PERFORMANCE_TOP_STOCKS]
+            self._summary["bottom_stocks"] = index_scrape[PERFORMERS_BOTTOM_STOCKS]
+
+        return self._summary["top_stocks"], self._summary["bottom_stocks"]
+
+
+# Scraper for sp500
+def _sp500():
+    """
+    Scraping SP500 information from MarketWatch. (link in constants folder in data directory)
+
+    """
+    # Search and store the following information
+    ws_dict = dict()
+    for metric in [
+        PERFORMANCE_PERIODIC,
+        PERFORMANCE_TOP_STOCKS,
+        PERFORMERS_BOTTOM_STOCKS,
+    ]:
+        # Regex search for the above metrics
+        regex = re.compile(f"element element--table ({metric})")
+
+        # Web Scraped data
+        ws_metric = bs4.BeautifulSoup(
+            requests.get(SP500_URL).content, "html.parser"
+        ).find("div", {"class": regex})
+
+        # Check if data return requires a webscrape fix
+        if len(ws_metric) == 0:
+            print(f"The web-scrape metric name seems to be changed for {metric}.")
+            return None
+
+        ws_dict[metric] = ws_metric
+
+    # Extract data points for each metric
+    metric_data = {}
+
+    # 1) Performance per periods
+    data_1 = dict()
+    data = ws_dict[PERFORMANCE_PERIODIC].find_all("td")
+    for i in range(0, len(data), 2):
+        # Every even index represents the info and odd index represents the value
+        data_1[data[i].text.replace("\n", "")] = data[i + 1].text.replace("\n", "")
+
+    # 2) Top performing stocks today
+    data_2 = _stock_performers_ws(data=ws_dict[PERFORMANCE_TOP_STOCKS])
+
+    # 3) Bottom performing stocks today
+    data_3 = _stock_performers_ws(data=ws_dict[PERFORMERS_BOTTOM_STOCKS])
+
+    # All data store
+    metric_data[PERFORMANCE_PERIODIC] = data_1
+    metric_data[PERFORMANCE_TOP_STOCKS] = data_2
+    metric_data[PERFORMERS_BOTTOM_STOCKS] = data_3
+
+    return metric_data
+
+
+# Helper function for _sp500()
+def _stock_performers_ws(
+    data: bs4.element.Tag,
+) -> Optional[pd.DataFrame]:
+    """
+    Web scrapes the top and bottom performing stocks for an index in MarketWatch.
+
+    """
+    data_ws = data.find_all("tr")
+
+    if len(data_ws) == 0:
+        return None
+
+    # Setup stock data
+    stock_data = []
+
+    # First row is the column names
+    col_names = list(filter(None, data_ws[0].text.split("\n")))
+
+    # Extract all other row info
+    for row in range(1, len(data_ws)):
+        stock_data.append(list(filter(None, data_ws[row].text.split("\n"))))
+
+    # Form pandas dataframe
+    data_df = pd.DataFrame(stock_data, columns=col_names)
+
+    return data_df
diff --git a/stock_market/analysis/ipo.py b/stock_market/analysis/ipo.py
@@ -166,7 +166,7 @@ def overall_summary(self) -> pd.DataFrame:
         return self._overall_summary["stats"]
         # TODO: Best OSD (using probability) by number of stocks and percent increase!!
 
-    def individual_summary(self, ticker: str):
+    def individual_summary(self, ticker: str) -> pd.DataFrame:
         """
         Individual summary of recent IPOs.
 

diff --git a/stock_market/data/_ipo.py b/stock_market/data/_ipo.py
@@ -2,8 +2,7 @@
 import pandas as pd
 import requests
 
-# URL with IPO information (from MarketWatch)
-IPO_URL = "https://www.marketwatch.com/tools/ipo-calendar"
+from stock_market.data.constants import IPO_URL
 
 
 class IPO(object):
@@ -52,8 +51,8 @@ def recent_ipo(self) -> pd.DataFrame:
             del data["Symbol"]
 
             # Removal of some characters in Price and Shares variable for type conversion
-            data["Price"] = data["Price"].str.replace("$", "")
-            data["Shares"] = data["Shares"].str.replace(",", "")
+            data["Price"] = data["Price"].str.replace("$", "", regex=True)
+            data["Shares"] = data["Shares"].str.replace(",", "", regex=True)
 
             # Data type conversion
             data = data.astype(

diff --git a/stock_market/data/constants.py b/stock_market/data/constants.py
@@ -1,4 +1,16 @@
-from stock_market.data import SP500
+# Data
+# ----
 
-# Stock categories (from S&P 500)
-STOCK_CATEGORY = [industry.lower() for industry in list(set(SP500.Sector))]
+# Ipo
+IPO_URL = "https://www.marketwatch.com/tools/ipo-calendar"
+
+
+# Analysis
+# --------
+
+# Index: SP500
+SP500_URL = "https://www.marketwatch.com/investing/index/spx"
+# WebScrape constants
+PERFORMANCE_PERIODIC: str = "performance"
+PERFORMANCE_TOP_STOCKS: str = "ByIndexGainers"
+PERFORMERS_BOTTOM_STOCKS: str = "ByIndexDecliners"