Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

V2.2.1 #30

Merged
merged 3 commits into from
Apr 7, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions attic/Dockerfile

This file was deleted.

79 changes: 0 additions & 79 deletions attic/dbshell.py

This file was deleted.

19 changes: 0 additions & 19 deletions attic/default.conf

This file was deleted.

Binary file removed attic/restful_example.png
Binary file not shown.
2 changes: 1 addition & 1 deletion company_dns/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM tiangolo/uwsgi-nginx-flask:python3.10
FROM tiangolo/uwsgi-nginx-flask:python3.11
COPY ./requirements.txt /var/www/requirements.txt
RUN pip3 install --no-cache-dir --upgrade -r /var/www/requirements.txt
COPY ./app /app
101 changes: 91 additions & 10 deletions company_dns/app/lib/edgar.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/local/bin/python3
#!/usr/bin/env python3
import sqlite3
import re
import sys
import requests
import argparse
import pprint
Expand All @@ -9,10 +10,11 @@
__author__ = "Michael Hay"
__copyright__ = "Copyright 2023, Mediumroast, Inc. All rights reserved."
__license__ = "Apache 2.0"
__version__ = "2.0.0"
__version__ = "2.1.0"
__maintainer__ = "Michael Hay"
__status__ = "Alpha"
__date__ = '2022-October-4'
__status__ = "Beta"
__date__ = '2023-April-1'
__contact__ = 'https://github.com/miha42-github/company_dns'

#### Globals ####
# Used for setting attributes consistently when unknown
Expand Down Expand Up @@ -41,6 +43,21 @@
GETOWNER = "/cgi-bin/own-disp?action=getowner&CIK="
EDGARSEARCH = "&action=getcompany"

# Package and data dependencies
DEPENDENCIES = {
'modules': {
'sic': 'https://github.com/miha42-github/company_dns',
'pyedgar': 'https://github.com/gaulinmp/pyedgar'
},
'data': {
'sicData': 'https://github.com/miha42-github/sic4-list',
'oshaSICQuery': 'https://www.osha.gov/data/sic-search',
'pyedgarRoot': 'https://www.sec.gov/Archives/edgar/data',
'edgarFacts': 'https://data.sec.gov/api/xbrl/companyfacts/',
'edgarSubmissions': 'https://data.sec.gov/submissions/'
}
}

class EdgarQueries:
"""A core set of methods designed to interact with both locally cached and remote SEC EDGAR data.

Expand All @@ -63,12 +80,14 @@ def __init__(
db_file='./company_dns.db',
agent='Mediumroast, Inc. help@mediumroast.io',
name='edgar',
flat_return=False,
description='A module and simple CLI too to search for company data in EDGAR.'):

# The SQLite database connection and cursor
self.e_conn = sqlite3.connect(db_file)
self.ec = self.e_conn.cursor()
self.db_file = db_file
self.flat_return = flat_return

# User agent to talk to EDGAR services
self.headers = {
Expand Down Expand Up @@ -142,6 +161,9 @@ def get_all_ciks(self):
Returns:
final_companies (dict): An object containing a list of all company CIK pairs and the total number of company objects processed
"""
my_function = sys._getframe(0).f_code.co_name
my_class = self.__class__.__name__

final_companies = {
'companies': {},
'totalCompanies': 0
Expand Down Expand Up @@ -173,11 +195,30 @@ def get_all_ciks(self):

# Count the results
final_companies['totalCompanies'] = len(tmp_companies)

if self.flat_return:
return final_companies
elif final_companies['totalCompanies'] == 0:
return {
'code': 404,
'message': 'No company CIK found for query [' + self.company_or_cik + '].',
'module': my_class + '-> ' + my_function,
'data': final_companies,
'dependencies': DEPENDENCIES
}
else:
return {
'code': 200,
'message': 'Company CIK data has been returned for query [' + self.company_or_cik + '].',
'module': my_class + '-> ' + my_function,
'data': final_companies,
'dependencies': DEPENDENCIES
}

# Return
return final_companies

# TODO align return to be consistent to merged

def get_all_details (self, firmographics=True, cik_query=False):
"""Using a supplied query string retrieve all matching company data including from the cache DB and EDGAR.

Expand All @@ -192,6 +233,8 @@ def get_all_details (self, firmographics=True, cik_query=False):
Returns:
final_companies (dict): An object containing a list of all companies' details from the cache and EDGAR
"""
my_function = sys._getframe(0).f_code.co_name
my_class = self.__class__.__name__

# Set up the final data structure
final_companies = {
Expand All @@ -201,19 +244,24 @@ def get_all_details (self, firmographics=True, cik_query=False):
tmp_companies = {}

# Define the type of SQL query to use
sql_query = "SELECT * FROM companies WHERE name LIKE '%" + \
sql_query = "SELECT DISTINCT * FROM companies WHERE name LIKE '%" + \
self.company_or_cik + \
"%' AND form LIKE '" + \
self.form_type + \
"%'" \
if not cik_query \
else "SELECT * FROM companies WHERE cik = " + \
else "SELECT DISTINCT * FROM companies WHERE cik = " + \
self.company_or_cik + \
" AND form LIKE '" + \
self.form_type + \
"%'"

# Set up for capturing only unique CIKs
found_ciks = set()
for row in self.ec.execute(sql_query):
# Check to see if this is a unique CIK and if so process it otherwise skip
if row[CIK] in found_ciks: continue
else: found_ciks.add(row[CIK])

# Directory Listing for the filing
filing_dir = str(row[CIK]) + '/' + row[ACCESSION].replace('-', '')
Expand All @@ -236,6 +284,7 @@ def get_all_details (self, firmographics=True, cik_query=False):

# Get all relevant company data either from EDGAR or just use what is in the cache DB
company_info = {'cik': cik_no, 'companyName': company_name}

if firmographics: company_info = self.get_firmographics(cik_no)

# Pull in the form type and define the specific form object
Expand All @@ -256,8 +305,28 @@ def get_all_details (self, firmographics=True, cik_query=False):
final_companies['companies'] = tmp_companies
final_companies['totalCompanies'] = len(tmp_companies)

# Return to the caller
return final_companies
# If we only want the data internally then return without the wrapper
if self.flat_return:
return final_companies
# Return not found error if there weren't any companies
elif final_companies['totalCompanies'] == 0:
return {
'code': 404,
'message': 'No company found for query [' + self.company_or_cik + '].',
'module': my_class + '-> ' + my_function,
'data': final_companies,
'dependencies': DEPENDENCIES
}
# Return a successful query
else:
return {
'code': 200,
'message': 'Company data has been returned for query [' + self.company_or_cik + '].',
'module': my_class + '-> ' + my_function,
'data': final_companies,
'dependencies': DEPENDENCIES
}


def _transform_raw_firmographics(self, final, raw):
"""An internal helper function for get_firmographics to select only what is needed and fill in blanks.
Expand Down Expand Up @@ -289,6 +358,9 @@ def get_firmographics(self, cik):

"""
# Pick which CIK, not we will prefer the CIK that is supplied as an argument
my_function = sys._getframe(0).f_code.co_name
my_class = self.__class__.__name__

my_cik = cik if cik else self.company_or_cik

# Define the CIK and the CIK file name
Expand Down Expand Up @@ -403,7 +475,16 @@ def get_firmographics(self, cik):


# Return the company details
return firmographics
if self.flat_return:
return firmographics
else:
return {
'code': 200,
'message': 'Company data has been returned for query [' + self.company_or_cik + '].',
'module': my_class + '-> ' + my_function,
'data': firmographics,
'dependencies': DEPENDENCIES
}

if __name__ == '__main__':
query = EdgarQueries(db_file='../company_dns.db')
Expand Down
Loading