Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dev: Bug fixes in Repo Info Worker & Metric implementations #320

Merged
merged 11 commits into from
Jul 6, 2019
239 changes: 234 additions & 5 deletions augur/datasources/augur_db/augur_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -1069,14 +1069,14 @@ def issues_open_age(self, repo_group_id, repo_id=None, period='day', begin_date=
:param repo_id: The repository's repo_id, defaults to None
:return: DataFrame of age of open issues.
"""

if not begin_date:
begin_date = '1970-1-1 00:00:01'
if not end_date:
end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

openAgeSQL = None

if not repo_id:
openAgeSQL = s.sql.text("""
SELECT repo.repo_id, repo_name, issue_id, date_trunc(:period, issues.created_at ) as date, EXTRACT(DAY FROM NOW() - issues.created_at) AS open_date
Expand Down Expand Up @@ -1161,9 +1161,9 @@ def issues_closed_resolution_duration(self, repo_group_id, repo_id=None, period=
GROUP BY repo.repo_id, repo.repo_name, gh_issue_number, issue_title, issues.created_at, issues.closed_at, DIFFDATE
ORDER BY gh_issue_number
""")

results = pd.read_sql(issueSQL, self.db,
params={'repo_id': repo_id,
params={'repo_id': repo_id,
'repo_group_id': repo_group_id,
'period': period, 'begin_date':begin_date,
'end_date':end_date})
Expand Down Expand Up @@ -1202,6 +1202,81 @@ def cii_best_practices_badge(self, repo_group_id, repo_id=None):
results = pd.read_sql(cii_best_practices_badge_SQL, self.db, params={'repo_id': repo_id})
return results

@annotate(tag='forks')
def forks(self, repo_group_id, repo_id=None):
"""
Returns a time series of the fork count

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:return: Time series of fork count
"""
if not repo_id:
forks_SQL = s.sql.text("""
SELECT
repo_info.repo_id,
repo_name,
repo_info.data_collection_date as date,
fork_count AS forks
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id IN
(SELECT repo_id FROM repo
WHERE repo_group_id = :repo_group_id)
ORDER BY repo_info.repo_id, date
""")

results = pd.read_sql(forks_SQL, self.db, params={'repo_group_id': repo_group_id})
return results

else:
forks_SQL = s.sql.text("""
SELECT
repo_name,
repo_info.data_collection_date as date,
fork_count AS forks
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id = :repo_id
ORDER BY date
""")

results = pd.read_sql(forks_SQL, self.db, params={'repo_id': repo_id})
return results

@annotate(tag='fork-count')
def fork_count(self, repo_group_id, repo_id=None):
"""
Returns the latest fork count

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:return: Fork count
"""
if not repo_id:
fork_count_SQL = s.sql.text("""
SELECT a.repo_id, repo_name, a.fork_count AS forks
FROM repo_info a LEFT JOIN repo_info b
ON (a.repo_id = b.repo_id AND a.repo_info_id < b.repo_info_id), repo
WHERE b.repo_info_id IS NULL
AND a.repo_id = repo.repo_id
AND a.repo_id IN
(SELECT repo_id FROM repo
WHERE repo_group_id = :repo_group_id)
""")

results = pd.read_sql(fork_count_SQL, self.db, params={'repo_group_id': repo_group_id})
return results
else:
fork_count_SQL = s.sql.text("""
SELECT repo_name, fork_count AS forks
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id = :repo_id
ORDER BY repo_info.data_collection_date DESC
LIMIT 1
""")

results = pd.read_sql(fork_count_SQL, self.db, params={'repo_id': repo_id})
return results

@annotate(tag='languages')
def languages(self, repo_group_id, repo_id=None):
"""Returns the implementation languages
Expand Down Expand Up @@ -1330,6 +1405,160 @@ def issues_maintainer_response_duration(self, repo_group_id, repo_id=None, begin

return results

#####################################
### VALUE ###
#####################################

@annotate(tag='stars')
def stars(self, repo_group_id, repo_id=None):
"""
Returns a time series of the stars count

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:return: Time series of stars count
"""
if not repo_id:
stars_SQL = s.sql.text("""
SELECT
repo_info.repo_id,
repo_name,
repo_info.data_collection_date as date,
stars_count AS stars
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id IN
(SELECT repo_id FROM repo
WHERE repo_group_id = :repo_group_id)
ORDER BY repo_info.repo_id, date
""")

results = pd.read_sql(stars_SQL, self.db, params={'repo_group_id': repo_group_id})
return results

else:
stars_SQL = s.sql.text("""
SELECT
repo_name,
repo_info.data_collection_date as date,
stars_count AS stars
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id = :repo_id
ORDER BY date
""")

results = pd.read_sql(stars_SQL, self.db, params={'repo_id': repo_id})
return results

@annotate(tag='stars-count')
def stars_count(self, repo_group_id, repo_id=None):
"""
Returns the latest stars count

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:return: stars count
"""
if not repo_id:
stars_count_SQL = s.sql.text("""
SELECT a.repo_id, repo_name, a.stars_count AS stars
FROM repo_info a LEFT JOIN repo_info b
ON (a.repo_id = b.repo_id AND a.repo_info_id < b.repo_info_id), repo
WHERE b.repo_info_id IS NULL
AND a.repo_id = repo.repo_id
AND a.repo_id IN
(SELECT repo_id FROM repo
WHERE repo_group_id = :repo_group_id)
""")

results = pd.read_sql(stars_count_SQL, self.db, params={'repo_group_id': repo_group_id})
return results
else:
stars_count_SQL = s.sql.text("""
SELECT repo_name, stars_count AS stars
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id = :repo_id
ORDER BY repo_info.data_collection_date DESC
LIMIT 1
""")

results = pd.read_sql(stars_count_SQL, self.db, params={'repo_id': repo_id})
return results

@annotate(tag='watchers')
def watchers(self, repo_group_id, repo_id=None):
"""
Returns a time series of the watchers count

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:return: Time series of watchers count
"""
if not repo_id:
watchers_SQL = s.sql.text("""
SELECT
repo_info.repo_id,
repo_name,
repo_info.data_collection_date as date,
watchers_count AS watchers
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id IN
(SELECT repo_id FROM repo
WHERE repo_group_id = :repo_group_id)
ORDER BY repo_info.repo_id, date
""")

results = pd.read_sql(watchers_SQL, self.db, params={'repo_group_id': repo_group_id})
return results

else:
watchers_SQL = s.sql.text("""
SELECT
repo_name,
repo_info.data_collection_date as date,
watchers_count AS watchers
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id = :repo_id
ORDER BY date
""")

results = pd.read_sql(watchers_SQL, self.db, params={'repo_id': repo_id})
return results

@annotate(tag='watchers-count')
def watchers_count(self, repo_group_id, repo_id=None):
"""
Returns the latest watchers count

:param repo_group_id: The repository's repo_group_id
:param repo_id: The repository's repo_id, defaults to None
:return: watchers count
"""
if not repo_id:
watchers_count_SQL = s.sql.text("""
SELECT a.repo_id, repo_name, a.watchers_count AS watchers
FROM repo_info a LEFT JOIN repo_info b
ON (a.repo_id = b.repo_id AND a.repo_info_id < b.repo_info_id), repo
WHERE b.repo_info_id IS NULL
AND a.repo_id = repo.repo_id
AND a.repo_id IN
(SELECT repo_id FROM repo
WHERE repo_group_id = :repo_group_id)
""")

results = pd.read_sql(watchers_count_SQL, self.db, params={'repo_group_id': repo_group_id})
return results
else:
watchers_count_SQL = s.sql.text("""
SELECT repo_name, watchers_count AS watchers
FROM repo_info JOIN repo ON repo_info.repo_id = repo.repo_id
WHERE repo_info.repo_id = :repo_id
ORDER BY repo_info.data_collection_date DESC
LIMIT 1
""")

results = pd.read_sql(watchers_count_SQL, self.db, params={'repo_id': repo_id})
return results

#####################################
### EXPERIMENTAL ###
#####################################
Expand Down Expand Up @@ -1622,7 +1851,7 @@ def get_repo(self, owner, repo):
def get_repos_for_dosocs(self):
""" Returns a list of repos along with their repo_id & path """
get_repos_for_dosocs_SQL = s.sql.text("""
SELECT b.repo_id, CONCAT(a.value || b.repo_group_id || chr(47) || b.repo_path || b.repo_name)
SELECT b.repo_id, CONCAT(a.value || b.repo_group_id || chr(47) || b.repo_path || b.repo_name) AS path
FROM settings a, repo b
WHERE a.setting='repo_directory'
""")
Expand Down
Loading