Skip to content

Commit

Permalink
Merge branch 'dev' into main
Browse files Browse the repository at this point in the history
Signed-off-by: Sean P. Goggins <outdoors@acm.org>
  • Loading branch information
sgoggins authored Oct 18, 2024
2 parents b3dcfe6 + 73c8fb3 commit b6382bd
Show file tree
Hide file tree
Showing 11 changed files with 536 additions and 144 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ We follow the [First Timers Only](https://www.firsttimersonly.com/) philosophy o
## NEW RELEASE ALERT!
**If you want to jump right in, the updated docker, docker-compose and bare metal installation instructions are available [here](docs/new-install.md)**.


Augur is now releasing a dramatically improved new version to the ```main``` branch. It is also available [here](https://github.com/chaoss/augur/releases/tag/v0.76.2).


- The `main` branch is a stable version of our new architecture, which features:
- Dramatic improvement in the speed of large scale data collection (100,000+ repos). All data is obtained for 100k+ repos within 2 weeks.
- A new job management architecture that uses Celery and Redis to manage queues, and enables users to run a Flower job monitoring dashboard.
Expand Down
111 changes: 47 additions & 64 deletions augur/api/view/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import re
from flask_login import current_user, login_required
from augur.application.db.models import Repo, RepoGroup, UserGroup, UserRepo
from augur.tasks.frontend import add_org_repo_list, parse_org_and_repo_name, parse_org_name
from augur.tasks.frontend import add_github_orgs_and_repos, parse_org_and_repo_name, parse_org_name, add_gitlab_repos
from .utils import *
from ..server import app
from augur.application.db.session import DatabaseSession
Expand All @@ -14,18 +14,6 @@ def cache(file=None):
return redirect(url_for('static', filename="cache"))
return redirect(url_for('static', filename="cache/" + toCacheFilename(file, False)))


def add_existing_repo_to_group(session, user_id, group_name, repo_id):

logger.info("Adding existing repo to group")

group_id = UserGroup.convert_group_name_to_id(session, user_id, group_name)
if group_id is None:
return False

result = UserRepo.insert(session, repo_id, group_id)
if not result:
return False

def add_existing_org_to_group(session, user_id, group_name, rg_id):

Expand All @@ -48,6 +36,8 @@ def add_existing_org_to_group(session, user_id, group_name, rg_id):
@login_required
def av_add_user_repo():

print("Adding user repos")

urls = request.form.get('urls')
group = request.form.get("group_name")

Expand All @@ -68,58 +58,51 @@ def av_add_user_repo():

invalid_urls = []

with DatabaseSession(logger, current_app.engine) as session:
for url in urls:

# matches https://github.com/{org}/ or htts://github.com/{org}
if (org_name := Repo.parse_github_org_url(url)):
rg_obj = RepoGroup.get_by_name(session, org_name)
if rg_obj:
# add the orgs repos to the group
add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id)

# matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo}
elif Repo.parse_github_repo_url(url)[0]:
org_name, repo_name = Repo.parse_github_repo_url(url)
repo_git = f"https://github.com/{org_name}/{repo_name}"
repo_obj = Repo.get_by_repo_git(session, repo_git)
if repo_obj:
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)

# matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo}
elif (match := parse_org_and_repo_name(url)):
org, repo = match.groups()
repo_git = f"https://github.com/{org}/{repo}"
repo_obj = Repo.get_by_repo_git(session, repo_git)
if repo_obj:
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)
orgs = []
repo_urls = []
gitlab_repo_urls = []
for url in urls:

# matches https://github.com/{org}/ or htts://github.com/{org}
if (org_name := Repo.parse_github_org_url(url)):
orgs.append(org_name)

# matches https://github.com/{org}/{repo}/ or htts://github.com/{org}/{repo}
elif Repo.parse_github_repo_url(url)[0]:
repo_urls.append(url)

# matches /{org}/{repo}/ or /{org}/{repo} or {org}/{repo}/ or {org}/{repo}
elif (match := parse_org_and_repo_name(url)):
org, repo = match.groups()
repo_git = f"https://github.com/{org}/{repo}"
repo_urls.append(repo_git)

# matches /{org}/ or /{org} or {org}/ or {org}
elif (match := parse_org_name(url)):
org_name = match.group(1)
orgs.append(org_name)

# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
elif Repo.parse_gitlab_repo_url(url)[0]:

org_name, repo_name = Repo.parse_gitlab_repo_url(url)
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"

# matches /{org}/ or /{org} or {org}/ or {org}
elif (match := parse_org_name(url)):
org_name = match.group(1)
rg_obj = RepoGroup.get_by_name(session, org_name)
logger.info(rg_obj)
if rg_obj:
# add the orgs repos to the group
add_existing_org_to_group(session, current_user.user_id, group, rg_obj.repo_group_id)

# matches https://gitlab.com/{org}/{repo}/ or http://gitlab.com/{org}/{repo}
elif Repo.parse_gitlab_repo_url(url)[0]:

org_name, repo_name = Repo.parse_gitlab_repo_url(url)
repo_git = f"https://gitlab.com/{org_name}/{repo_name}"

# TODO: gitlab ensure the whole repo git is inserted so it can be found here
repo_obj = Repo.get_by_repo_git(session, repo_git)
if repo_obj:
add_existing_repo_to_group(session, current_user.user_id, group, repo_obj.repo_id)

else:
invalid_urls.append(url)

if urls:
urls = [url.lower() for url in urls]
add_org_repo_list.si(current_user.user_id, group, urls).apply_async()
gitlab_repo_urls.append(repo_git)
else:
invalid_urls.append(url)



if orgs or repo_urls:
repo_urls = [url.lower() for url in repo_urls]
orgs = [url.lower() for url in orgs]
flash(f"Adding repos: {repo_urls}")
flash(f"Adding orgs: {orgs}")
add_github_orgs_and_repos.si(current_user.user_id, group, orgs, repo_urls).apply_async()

if gitlab_repo_urls:
add_gitlab_repos(current_user.user_id, group, gitlab_repo_urls)

flash("Adding repos and orgs in the background")

Expand Down
42 changes: 41 additions & 1 deletion augur/application/db/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from psycopg2.errors import DeadlockDetected
from typing import List, Any, Optional, Union

from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus
from augur.application.db.models import Config, Repo, Commit, WorkerOauth, Issue, PullRequest, PullRequestReview, ContributorsAlias,UnresolvedCommitEmail, Contributor, CollectionStatus, UserGroup, RepoGroup
from augur.tasks.util.collection_state import CollectionState
from augur.application.db import get_session, get_engine
from augur.application.db.util import execute_session_query
Expand Down Expand Up @@ -144,6 +144,25 @@ def get_repo_by_repo_id(repo_id):

return repo

def get_github_repo_by_src_id(src_id):

with get_session() as session:

query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://github.com%'))
repo = execute_session_query(query, 'first')

return repo

def get_gitlab_repo_by_src_id(src_id):

with get_session() as session:

query = session.query(Repo).filter(Repo.repo_src_id == src_id, Repo.repo_git.ilike(f'%https://gitlab.com%'))
repo = execute_session_query(query, 'first')

return repo


def remove_working_commits_by_repo_id_and_hashes(repo_id, commit_hashes):

remove_working_commits = s.sql.text("""DELETE FROM working_commits
Expand Down Expand Up @@ -553,3 +572,24 @@ def get_updated_issues(repo_id, since):
with get_session() as session:
return session.query(Issue).filter(Issue.repo_id == repo_id, Issue.updated_at >= since).order_by(Issue.gh_issue_number).all()



def get_group_by_name(user_id, group_name):


with get_session() as session:

try:
user_group = session.query(UserGroup).filter(UserGroup.user_id == user_id, UserGroup.name == group_name).one()
except s.orm.exc.NoResultFound:
return None

return user_group

def get_repo_group_by_name(name):


with get_session() as session:

return session.query(RepoGroup).filter(RepoGroup.rg_name == name).first()

11 changes: 7 additions & 4 deletions augur/application/db/models/augur_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -870,6 +870,7 @@ class Repo(Base):
data_collection_date = Column(
TIMESTAMP(precision=0), server_default=text("CURRENT_TIMESTAMP")
)
repo_src_id = Column(BigInteger)

repo_group = relationship("RepoGroup", back_populates="repo")
user_repo = relationship("UserRepo", back_populates="repo")
Expand Down Expand Up @@ -1064,7 +1065,7 @@ def parse_github_org_url(url):
return result.groups()[0]

@staticmethod
def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source, repo_src_id):
"""Add a repo to the repo table.
Args:
Expand Down Expand Up @@ -1098,7 +1099,8 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
"repo_type": None,
"tool_source": tool_source,
"tool_version": "1.0",
"data_source": "Git"
"data_source": "Git",
"repo_src_id": repo_src_id
}

repo_unique = ["repo_git"]
Expand All @@ -1111,7 +1113,7 @@ def insert_gitlab_repo(session, url: str, repo_group_id: int, tool_source):
return result[0]["repo_id"]

@staticmethod
def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type):
def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_type, repo_src_id):
"""Add a repo to the repo table.
Args:
Expand Down Expand Up @@ -1146,7 +1148,8 @@ def insert_github_repo(session, url: str, repo_group_id: int, tool_source, repo_
"repo_type": repo_type,
"tool_source": tool_source,
"tool_version": "1.0",
"data_source": "Git"
"data_source": "Git",
"repo_src_id": repo_src_id
}

repo_unique = ["repo_git"]
Expand Down
25 changes: 25 additions & 0 deletions augur/application/schema/alembic/versions/30_add_repo_src_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Add repo src id
Revision ID: 30
Revises: 29
Create Date: 2024-08-30
"""
from alembic import op
import sqlalchemy as sa

# revision identifiers, used by Alembic.
revision = '30'
down_revision = '29'
branch_labels = None
depends_on = None


def upgrade():
op.add_column('repo', sa.Column('repo_src_id', sa.BigInteger(), nullable=True), schema='augur_data')
op.create_unique_constraint('repo_src_id_unique', 'repo', ['repo_src_id'], schema='augur_data')


def downgrade():
op.drop_constraint('repo_src_id_unique', 'repo', schema='augur_data', type_='unique')
op.drop_column('repo', 'repo_src_id', schema='augur_data')
Loading

0 comments on commit b6382bd

Please sign in to comment.