-
Notifications
You must be signed in to change notification settings - Fork 2.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Script to send workflow metrics to a database #23484
Changes from 4 commits
db65c31
d746b26
3d7443f
afc0b23
c896b81
419cefe
5e89584
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,175 @@ | ||
#!/usr/bin/env python3 | ||
|
||
from github import Github | ||
from psycopg2 import sql | ||
import os | ||
import logging | ||
import psycopg2 | ||
import dateutil | ||
|
||
def init_logger(): | ||
LOGLEVEL = os.environ.get('LOGLEVEL', 'INFO').upper() | ||
logging.basicConfig(level=LOGLEVEL, | ||
format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s', | ||
datefmt='%m-%d-%Y %H:%M:%S') | ||
|
||
def create_db_tables(conn, cur): | ||
cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_runs_test( | ||
id SERIAL, | ||
run_id BIGINT PRIMARY KEY, | ||
html_url TEXT, | ||
name VARCHAR(255), | ||
run_started_at TIMESTAMP, | ||
triggering_actor_login VARCHAR(255), | ||
conclusion VARCHAR(25), | ||
run_number INT, | ||
event VARCHAR(50), | ||
run_attempt INT, | ||
repository_full_name VARCHAR(255), | ||
head_repository_full_name VARCHAR(255), | ||
head_branch VARCHAR(255), | ||
status VARCHAR(25), | ||
display_title TEXT, | ||
path TEXT | ||
); | ||
''') | ||
cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_jobs_test( | ||
id SERIAL, | ||
job_id BIGINT PRIMARY KEY, | ||
parent_run_id BIGINT REFERENCES github_workflow_runs_test(run_id), | ||
html_url TEXT, | ||
name VARCHAR(255), | ||
created_at TIMESTAMP, | ||
started_at TIMESTAMP, | ||
completed_at TIMESTAMP, | ||
queued_duration_seconds INT, | ||
duration_seconds INT, | ||
runner_name VARCHAR(255), | ||
status VARCHAR(25), | ||
conclusion VARCHAR(25), | ||
head_branch VARCHAR(255) | ||
); | ||
''') | ||
cur.execute('''CREATE TABLE IF NOT EXISTS github_workflow_steps_test( | ||
id SERIAL PRIMARY KEY, | ||
parent_job_id BIGINT REFERENCES github_workflow_jobs_test(job_id), | ||
name VARCHAR(255), | ||
conclusion VARCHAR(25), | ||
number INT, | ||
started_at TIMESTAMP, | ||
completed_at TIMESTAMP, | ||
duration_seconds INT | ||
); | ||
''') | ||
conn.commit() | ||
|
||
def main(): | ||
init_logger() | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
github_token = os.environ.get('GITHUB_TOKEN') | ||
if not github_token: | ||
raise ValueError('GITHUB_TOKEN environment variable is not set!') | ||
|
||
run_id = os.environ.get('RUN_ID') | ||
if not run_id: | ||
raise ValueError('RUN_ID environment variable is not set!') | ||
|
||
repo_name = os.environ.get('GITHUB_REPOSITORY') | ||
if not repo_name: | ||
raise ValueError('GITHUB_REPOSITORY environment variable is not set!') | ||
|
||
|
||
# this should be specified in runner's env | ||
db_username = os.environ.get('PGUSER') | ||
db_password = os.environ.get('PGPASSWORD') | ||
db_host = os.environ.get('PGHOST') | ||
db_database = os.environ.get('PGDATABASE') | ||
db_port = os.environ.get('PGPORT') | ||
conn = psycopg2.connect(host=db_host, | ||
port=db_port, | ||
user=db_username, | ||
password=db_password, | ||
database=db_database) | ||
|
||
# Create tables | ||
cur = conn.cursor() | ||
create_db_tables(conn, cur) | ||
|
||
# Get the data | ||
g = Github(github_token) | ||
repo = g.get_repo(repo_name) | ||
|
||
run = repo.get_workflow_run(int(run_id)) | ||
|
||
workflow_data_query = f'''INSERT INTO github_workflow_runs_test( | ||
run_id, html_url, name, | ||
run_started_at, triggering_actor_login, conclusion, | ||
run_number, event, run_attempt, repository_full_name, | ||
head_branch, display_title, path) | ||
VALUES( | ||
'{run_id}', '{run.html_url}', '{run.name}', '{run.run_started_at}', | ||
'{run.raw_data['triggering_actor']['login']}', | ||
'{run.conclusion}', '{run.run_number}', '{run.event}', | ||
'{run.run_attempt}', '{run.raw_data['repository']['full_name']}', | ||
'{run.head_branch}', '{run.display_title}', '{run.path}' | ||
); | ||
''' | ||
|
||
logger.debug('Workflow run query: %s', workflow_data_query) | ||
cur.execute(workflow_data_query) | ||
|
||
for job in run.jobs(): | ||
job_id = job.id | ||
queued_duration_seconds = 0 | ||
duration_seconds = 0 | ||
|
||
job_created_at_date = dateutil.parser.parse(job.raw_data['created_at']) | ||
|
||
queued_duration_timedelta = job.started_at - job_created_at_date | ||
queued_duration_seconds = round(queued_duration_timedelta.total_seconds()) | ||
|
||
duration_timedelta = job.completed_at - job.started_at | ||
duration_seconds = round(duration_timedelta.total_seconds()) | ||
|
||
job_data_query = f''' | ||
INSERT INTO github_workflow_jobs_test( | ||
job_id, parent_run_id, html_url, name, | ||
created_at, started_at, completed_at, | ||
queued_duration_seconds, duration_seconds, | ||
runner_name, status, conclusion, head_branch) | ||
VALUES( | ||
'{job_id}', '{run_id}', '{job.html_url}', '{job.name}', | ||
'{job.raw_data['created_at']}', '{job.started_at}', '{job.completed_at}', | ||
'{queued_duration_seconds}', '{duration_seconds}', | ||
'{job.raw_data['runner_name']}', '{job.status}', '{job.conclusion}', | ||
'{job.raw_data['head_branch']}' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nah, it's in the Job object GitHub API returns, let's keep it here, it'll allow us to avoid additional JOINs sometimes. |
||
); | ||
''' | ||
logger.debug('Job query: %s', job_data_query) | ||
cur.execute(job_data_query) | ||
for step in job.steps: | ||
duration_seconds_timedelta = step.completed_at - step.started_at | ||
duration_seconds = round(duration_seconds_timedelta.total_seconds()) | ||
|
||
step_data_query = f''' | ||
INSERT INTO github_workflow_steps_test( | ||
parent_job_id, name, conclusion, | ||
number, started_at, completed_at, | ||
duration_seconds) | ||
VALUES( | ||
'{job_id}', '{step.name}','{step.conclusion}', | ||
'{step.number}', '{step.started_at}', '{step.completed_at}', | ||
'{duration_seconds}' | ||
); | ||
''' | ||
logger.debug('Step query: %s', step_data_query) | ||
cur.execute(step_data_query) | ||
|
||
conn.commit() | ||
cur.close() | ||
conn.close() | ||
g.close() | ||
if __name__ == "__main__": | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe use It could be done later though. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The first plan was to turn this script to a GitHub Action, that's why environment variables have been chosen. Right now it seems like we should do both, but on the next stage indeed. |
||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
PyGithub==2.2.0 | ||
github-action-utils==1.1.0 | ||
psycopg2-binary==2.9.9 | ||
python-dateutil==2.9.0.post0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I believe that something like
completed_at
should also be present to calculate the total pipeline time. If there is no such thing in the pipeline data, I guess it could be calculated based on the pipelinestarted_time
+ the latestcompleted_at
time of the jobs yet it seems a bit messy.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That's a good suggestion! I can see how it can be done, but it'll require some changes in the logic, so let's do this on the next stage.