Skip to content

Commit

Permalink
implement 'issue_duration' metric
Browse files Browse the repository at this point in the history
Signed-off-by: Parth Sharma <parth261297@gmail.com>
  • Loading branch information
parthsharma2 committed May 29, 2019
1 parent 2414c47 commit 2f6ace7
Showing 1 changed file with 30 additions and 12 deletions.
42 changes: 30 additions & 12 deletions augur/datasources/augur_db/augur_db.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#SPDX-License-Identifier: MIT
"""
Data source that uses the Augur relational database of GitHub activity.
Data source that uses the Augur relational database of GitHub activity.
"""

import pandas as pd
Expand Down Expand Up @@ -29,7 +29,7 @@ def __init__(self, user, password, host, port, dbname, schema):
connect_args={'options': '-csearch_path={}'.format(schema)})

logger.debug('GHTorrent: Connecting to {} schema of {}:{}/{} as {}'.format(schema, host, port, dbname, user))

# try:
# self.userid('howderek')
# except Exception as e:
Expand All @@ -56,22 +56,22 @@ def code_changes(self, repo_url, period='day', begin_date=None, end_date=None):
end_date = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

code_changes_SQL = s.sql.text("""
SELECT date_trunc(:period, cmt_committer_date::DATE) as commit_date, COUNT(cmt_id)
SELECT date_trunc(:period, cmt_committer_date::DATE) as commit_date, COUNT(cmt_id)
FROM commits
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
AND cmt_committer_date BETWEEN :begin_date AND :end_date
GROUP BY commit_date
ORDER BY commit_date;
""")

results = pd.read_sql(code_changes_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period,
results = pd.read_sql(code_changes_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period,
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='code-changes-lines')
def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=None):
"""Returns a timeseries of code changes added and removed.
:param repo_url: The repository's URL
:param period: To set the periodicity to 'day', 'week', 'month', or 'year', defaults to 'day'
:param begin_date: Specifies the begin date, defaults to '1970-1-1 00:00:00'
Expand All @@ -86,7 +86,7 @@ def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=N
code_changes_lines_SQL = s.sql.text("""
SELECT date_trunc(:period, cmt_author_date::DATE) as commit_date, SUM(cmt_added) AS added, SUM(cmt_removed) as removed
FROM commits
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
AND cmt_author_date BETWEEN :begin_date AND :end_date
GROUP BY commit_date
ORDER BY commit_date;
Expand All @@ -95,7 +95,7 @@ def code_changes_lines(self, repo_url, period='day', begin_date=None, end_date=N
results = pd.read_sql(code_changes_lines_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period,
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='issues-new')
def issues_new(self, repo_url, period='day', begin_date=None, end_date=None):
"""Returns a timeseries of new issues opened.
Expand All @@ -114,7 +114,7 @@ def issues_new(self, repo_url, period='day', begin_date=None, end_date=None):
issues_new_SQL = s.sql.text("""
SELECT date_trunc(:period, created_at::DATE) as issue_date, COUNT(issue_id) as issues
FROM issues
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
AND created_at BETWEEN :begin_date AND :end_date
GROUP BY issue_date
ORDER BY issue_date;
Expand All @@ -123,7 +123,7 @@ def issues_new(self, repo_url, period='day', begin_date=None, end_date=None):
results = pd.read_sql(issues_new_SQL, self.db, params={'repourl': '%{}%'.format(repo_url), 'period': period,
'begin_date': begin_date, 'end_date': end_date})
return results


@annotate(tag='issues-closed')
def issues_closed(self, repo_url, period='day', begin_date=None, end_date=None):
Expand Down Expand Up @@ -153,6 +153,24 @@ def issues_closed(self, repo_url, period='day', begin_date=None, end_date=None):
'begin_date': begin_date, 'end_date': end_date})
return results

@annotate(tag='issue-duration')
def issue_duration(self, repo_url):
"""Returns the duration of each issue.
:param repo_url: The repository's URL
:return: DataFrame of issue id with the corresponding duration
"""
issue_duration_SQL = s.sql.text("""
SELECT issue_id, (closed_at - created_at) AS duration
FROM issues
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
AND closed_at IS NOT NULL
ORDER BY issue_id;
""")

results = pd.read_sql(issue_duration_SQL, self.db, params={'repourl': f'%{repo_url}%'})
return results

@annotate(tag='issue-backlog')
def issues_backlog(self, repo_url):
"""Returns number of issues currently open.
Expand All @@ -177,12 +195,12 @@ def issues_backlog(self, repo_url):
@annotate(tag='lines-changed-by-author')
def lines_changed_by_author(self, repo_url):
"""
Returns number of lines changed per author per day
Returns number of lines changed per author per day
:param repo_url: the repository's URL
"""
linesChangedByAuthorSQL = s.sql.text("""
SELECT cmt_author_email, cmt_author_date, cmt_author_affiliation as affiliation,
SELECT cmt_author_email, cmt_author_date, cmt_author_affiliation as affiliation,
SUM(cmt_added) as additions, SUM(cmt_removed) as deletions, SUM(cmt_whitespace) as whitespace
FROM commits
WHERE repo_id = (SELECT repo_id FROM repo WHERE repo_git LIKE :repourl LIMIT 1)
Expand Down

0 comments on commit 2f6ace7

Please sign in to comment.