Skip to content

Commit

Permalink
Merge pull request #2879 from chaoss/add-commit-message-data
Browse files Browse the repository at this point in the history
Add Commit Message Table and Add Commit Messages to Facade Collection
  • Loading branch information
sgoggins authored Aug 5, 2024
2 parents 925e782 + 54987c5 commit 0abf235
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 6 deletions.
1 change: 1 addition & 0 deletions augur/application/db/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
PullRequestTeam,
PullRequestRepo,
PullRequestReviewMessageRef,
CommitMessage,
RepoClone,
)

Expand Down
30 changes: 30 additions & 0 deletions augur/application/db/models/augur_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -1350,6 +1350,36 @@ class Commit(Base):
repo = relationship("Repo", back_populates="commits")
message_ref = relationship("CommitCommentRef", back_populates="cmt")

class CommitMessage(Base):
__tablename__ = "commit_messages"
__table_args__ = ( UniqueConstraint("repo_id","cmt_hash", name="commit-message-insert-unique"),
{
"schema": "augur_data",
"comment": "This table holds commit messages",
}
)

cmt_msg_id = Column(
BigInteger,
primary_key=True,
server_default=text("nextval('augur_data.commits_cmt_id_seq'::regclass)"),
)

repo_id = Column(
ForeignKey("augur_data.repo.repo_id", ondelete="RESTRICT", onupdate="CASCADE"),
nullable=False,
)

cmt_msg = Column(String, nullable=False)

cmt_hash = Column(String(80), nullable=False)

tool_source = Column(String)
tool_version = Column(String)
data_source = Column(String)
data_collection_date = Column(
TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP")
)

class Issue(Base):
__tablename__ = "issues"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
"""Add commit message table
Revision ID: 29
Revises: 28
Create Date: 2024-07-25 12:02:57.185867
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = '29'
down_revision = '28'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('commit_messages',
sa.Column('cmt_msg_id', sa.BigInteger(), server_default=sa.text("nextval('augur_data.commits_cmt_id_seq'::regclass)"), nullable=False),
sa.Column('repo_id', sa.BigInteger(), nullable=False),
sa.Column('cmt_msg', sa.String(), nullable=False),
sa.Column('cmt_hash', sa.String(length=80), nullable=False),
sa.Column('tool_source', sa.String(), nullable=True),
sa.Column('tool_version', sa.String(), nullable=True),
sa.Column('data_source', sa.String(), nullable=True),
sa.Column('data_collection_date', postgresql.TIMESTAMP(precision=0), server_default=sa.text('CURRENT_TIMESTAMP'), nullable=True),
sa.ForeignKeyConstraint(['repo_id'], ['augur_data.repo.repo_id'], onupdate='CASCADE', ondelete='RESTRICT'),
sa.PrimaryKeyConstraint('cmt_msg_id'),
sa.UniqueConstraint('repo_id', 'cmt_hash', name='commit-message-insert-unique'),
schema='augur_data',
comment='This table holds commit messages'
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_table('commit_messages', schema='augur_data')
# ### end Alembic commands ###
17 changes: 12 additions & 5 deletions augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,8 @@

import logging
from celery import group, chain
import sqlalchemy as s

from augur.application.db.lib import execute_sql, fetchall_data_from_sql_text, get_session, get_repo_by_repo_git, get_repo_by_repo_id, remove_working_commits_by_repo_id_and_hashes, get_working_commits_by_repo_id, facade_bulk_insert_commits
from augur.application.db.lib import get_session, get_repo_by_repo_git, get_repo_by_repo_id, remove_working_commits_by_repo_id_and_hashes, get_working_commits_by_repo_id, facade_bulk_insert_commits, bulk_insert_dicts

from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import trim_commits
from augur.tasks.git.util.facade_worker.facade_worker.utilitymethods import get_absolute_repo_path, get_parent_commits_set, get_existing_commits_set
Expand All @@ -23,7 +22,7 @@
from augur.tasks.init.celery_app import AugurFacadeRepoCollectionTask


from augur.application.db.models import Repo, CollectionStatus
from augur.application.db.models import Repo, CollectionStatus, CommitMessage

from augur.tasks.git.dependency_tasks.tasks import process_dependency_metrics
from augur.tasks.git.dependency_libyear_tasks.tasks import process_libyear_dependency_metrics
Expand Down Expand Up @@ -205,6 +204,7 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
repo_loc = (f"{absolute_path}/.git")

pendingCommitRecordsToInsert = []
pendingCommitMessageRecordsToInsert = []

for count, commitTuple in enumerate(queue):
quarterQueue = int(len(queue) / 4)
Expand All @@ -217,14 +217,21 @@ def analyze_commits_in_parallel(repo_git, multithreaded: bool)-> None:
logger.info(f"Progress through current analysis queue is {(count / len(queue)) * 100}%")

#logger.info(f"Got to analysis!")
commitRecords = analyze_commit(logger, repo_id, repo_loc, commitTuple)
commitRecords, commit_msg = analyze_commit(logger, repo_id, repo_loc, commitTuple)
#logger.debug(commitRecord)
if len(commitRecords):
if commitRecords:
pendingCommitRecordsToInsert.extend(commitRecords)
if len(pendingCommitRecordsToInsert) >= 1000:
facade_bulk_insert_commits(logger,pendingCommitRecordsToInsert)
pendingCommitRecordsToInsert = []

if commit_msg:
pendingCommitMessageRecordsToInsert.append(commit_msg)

if len(pendingCommitMessageRecordsToInsert) >= 1000:
bulk_insert_dicts(logger,pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id","cmt_hash"])

bulk_insert_dicts(logger,pendingCommitMessageRecordsToInsert, CommitMessage, ["repo_id","cmt_hash"])
facade_bulk_insert_commits(logger,pendingCommitRecordsToInsert)

# Remove the working commit.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@
# and checks for any parents of HEAD that aren't already accounted for in the
# repos. It also rebuilds analysis data, checks any changed affiliations and
# aliases, and caches data for display.
import datetime
import subprocess
from subprocess import check_output
import os
import sqlalchemy as s

Expand Down Expand Up @@ -177,6 +179,22 @@ def generate_commit_record(repos_id,commit,filename,
#db_local.commit()
execute_sql(store_working_commit)

commit_message = check_output(
f"git --git-dir {repo_loc} log --format=%B -n 1 {commit}".split()
).strip()


msg_record = {
'repo_id' : repo_id,
'cmt_msg' : commit_message,
'cmt_hash' : commit,
'tool_source' : 'Facade',
'tool_version' : '0.78?',
'data_source' : 'git',
'data_collection_date' : datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}


#session.log_activity('Debug',f"Stored working commit and analyzing : {commit}")

for line in git_log.stdout.read().decode("utf-8",errors="ignore").split(os.linesep):
Expand Down Expand Up @@ -314,4 +332,4 @@ def generate_commit_record(repos_id,commit,filename,
added,removed,whitespace))


return recordsToInsert
return recordsToInsert, msg_record

0 comments on commit 0abf235

Please sign in to comment.