Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Commit Message Table and Add Commit Messages to Facade Collection #2879

Merged
merged 6 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions augur/application/db/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
PullRequestTeam,
PullRequestRepo,
PullRequestReviewMessageRef,
CommitMessage,
RepoClone,
)

Expand Down
30 changes: 30 additions & 0 deletions augur/application/db/models/augur_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,36 @@ class Commit(Base):
repo = relationship("Repo", back_populates="commits")
message_ref = relationship("CommitCommentRef", back_populates="cmt")

class CommitMessage(Base):
__tablename__ = "commit_messages"
__table_args__ = ( UniqueConstraint("repo_id","cmt_hash", name="commit-message-insert-unique"),
{
"schema": "augur_data",
"comment": "This table holds commit messages",
}
)

cmt_msg_id = Column(
BigInteger,
primary_key=True,
server_default=text("nextval('augur_data.commits_cmt_id_seq'::regclass)"),
)

repo_id = Column(
ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE"),
nullable=False,
)

cmt_msg = Column(String, nullable=False)

cmt_hash = Column(String(80), nullable=False)

tool_source = Column(String)
tool_version = Column(String)
data_source = Column(String)
data_collection_date = Column(
TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP")
)

class Issue(Base):
__tablename__ = "issues"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Add commit message table

Revision ID: 29
Revises: 28
Create Date: 2024-07-25 12:02:57.185867

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = '29'
down_revision = '28'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('commit_messages',
sa.Column('cmt_msg_id', sa.BigInteger(), server_default=sa.text("nextval('augur_data.commits_cmt_id_seq'::regclass)"), nullable=False),
sa.Column('repo_id', sa.BigInteger(), nullable=False),
sa.Column('cmt_msg', sa.String(), nullable=False),
sa.Column('cmt_hash', sa.String(length=80), nullable=False),
sa.Column('tool_source', sa.String(), nullable=True),
sa.Column('tool_version', sa.String(), nullable=True),
sa.Column('data_source', sa.String(), nullable=True),
sa.Column('data_collection_date', postgresql.TIMESTAMP(precision=0), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=True),
sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], onupdate='CASCADE', ondelete='RESTRICT'),
sa.PrimaryKeyConstraint('cmt_msg_id'),
sa.UniqueConstraint('repo_id', 'cmt_hash', name='commit-message-insert-unique'),
schema='augur_data',
comment='This table holds commit messages'
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('commit_messages', schema='augur_data')
# ### end Alembic commands ###
17 changes: 12 additions & 5 deletions augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

import logging
from celery import group, chain
import sqlalchemy as s

from augur.application.db.lib import execute_sql, fetchall_data_from_sql_text, get_session, get_repo_by_repo_git, get_repo_by_repo_id, remove_working_commits_by_repo_id_and_hashes, get_working_commits_by_repo_id, facade_bulk_insert_commits
from augur.application.db.lib import get_session, get_repo_by_repo_git, get_repo_by_repo_id, remove_working_commits_by_repo_id_and_hashes, get_working_commits_by_repo_id, facade_bulk_insert_commits, bulk_insert_dicts

from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import trim_commits
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path, get_parent_commits_set, get_existing_commits_set
Expand All @@ -23,7 +22,7 @@
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask


from augur.application.db.models import Repo, CollectionStatus
from augur.application.db.models import Repo, CollectionStatus, CommitMessage

from augur.tasks.git.dependency_tasks.tasks import process_dependency_metrics
from augur.tasks.git.dependency_libyear_tasks.tasks import process_libyear_dependency_metrics
Expand All @@ -35,7 +34,7 @@
#define an error callback for chains in facade collection so facade doesn't make the program crash
#if it does.
@celery.task
def facade_error_handler(request,exc,traceback):

Check warning on line 37 in augur/tasks/git/facade_tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 W0621: Redefining name 'traceback' from outer scope (line 13) (redefined-outer-name) Raw Output: augur/tasks/git/facade_tasks.py:37:37: W0621: Redefining name 'traceback' from outer scope (line 13) (redefined-outer-name)

logger = logging.getLogger(facade_error_handler.__name__)

Expand Down Expand Up @@ -194,7 +193,7 @@
facade_helper.log_activity('Debug',f"Commits missing from repo {repo_id}: {len(missing_commits)}")


if not len(missing_commits) or repo_id is None:

Check warning on line 196 in augur/tasks/git/facade_tasks.py

View workflow job for this annotation

GitHub Actions / runner / pylint

[pylint] reported by reviewdog 🐶 C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len) Raw Output: augur/tasks/git/facade_tasks.py:196:7: C1802: Do not use `len(SEQUENCE)` without comparison to determine if a sequence is empty (use-implicit-booleaness-not-len)
#session.log_activity('Info','Type of missing_commits: %s' % type(missing_commits))
return

Expand All @@ -205,6 +204,7 @@
repo_loc = (f"{absolute_path}/.git")

pendingCommitRecordsToInsert = []
pendingCommitMessageRecordsToInsert = []

for count, commitTuple in enumerate(queue):
quarterQueue = int(len(queue) / 4)
Expand All @@ -217,14 +217,21 @@
logger.info(f"Progress through current analysis queue is {(count / len(queue)) * 100}%")

#logger.info(f"Got to analysis!")
commitRecords = analyze_commit(logger, repo_id, repo_loc, commitTuple)
commitRecords, commit_msg = analyze_commit(logger, repo_id, repo_loc, commitTuple)
#logger.debug(commitRecord)
if len(commitRecords):
if commitRecords:
pendingCommitRecordsToInsert.extend(commitRecords)
if len(pendingCommitRecordsToInsert) >= 1000:
facade_bulk_insert_commits(logger,pendingCommitRecordsToInsert)
pendingCommitRecordsToInsert = []

if commit_msg:
pendingCommitMessageRecordsToInsert.append(commit_msg)

if len(pendingCommitMessageRecordsToInsert) >= 1000:
bulk_insert_dicts(logger,pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id","cmt_hash"])

bulk_insert_dicts(logger,pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id","cmt_hash"])
facade_bulk_insert_commits(logger,pendingCommitRecordsToInsert)

# Remove the working commit.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
# and checks for any parents of HEAD that aren't already accounted for in the
# repos. It also rebuilds analysis data, checks any changed affiliations and
# aliases, and caches data for display.
import datetime
import subprocess
from subprocess import check_output
import os
import sqlalchemy as s

Expand Down Expand Up @@ -177,6 +179,22 @@ def generate_commit_record(repos_id,commit,filename,
#db_local.commit()
execute_sql(store_working_commit)

commit_message = check_output(
f"git --git-dir {repo_loc} log --format=%B -n 1 {commit}".split()
).strip()


msg_record = {
'repo_id' : repo_id,
'cmt_msg' : commit_message,
'cmt_hash' : commit,
'tool_source' : 'Facade',
'tool_version' : '0.78?',
'data_source' : 'git',
'data_collection_date' : datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}


#session.log_activity('Debug',f"Stored working commit and analyzing : {commit}")

for line in git_log.stdout.read().decode("utf-8",errors="ignore").split(os.linesep):
Expand Down Expand Up @@ -314,4 +332,4 @@ def generate_commit_record(repos_id,commit,filename,
added,removed,whitespace))


return recordsToInsert
return recordsToInsert, msg_record
Loading