Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Major PR fumble... Release! #2499

Merged
merged 30 commits into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
73094d3
Merge pull request #2451 from chaoss/dev
sgoggins Jul 4, 2023
6e232d2
excluding local script
sgoggins Jul 13, 2023
03e5cf8
Merge pull request #2464 from chaoss/dev
sgoggins Aug 13, 2023
78efe66
Add scc install for value worker to makefiles
IsaacMilarky Aug 21, 2023
91ab0a6
start implementing new value worker functionality
IsaacMilarky Aug 21, 2023
6d473c3
more progress with value task
IsaacMilarky Aug 21, 2023
38ee211
parse output of scc into json
IsaacMilarky Aug 21, 2023
6fe820c
add method to parse json from subprocess call because we now do like …
IsaacMilarky Aug 21, 2023
980a8a9
Finish first draft of implementation before testing
IsaacMilarky Aug 21, 2023
fe222ae
There is no "undo". The old matview is wrong so we wouldn't back rev …
sgoggins Aug 23, 2023
64fa2fe
database upgrade
sgoggins Aug 23, 2023
800ad34
Correct formatting tested.
sgoggins Aug 23, 2023
d799105
typo fix
sgoggins Aug 23, 2023
27f5f0d
test
sgoggins Aug 23, 2023
d77fb68
dialed back secondary since they are key eaters.
sgoggins Aug 23, 2023
ac39a3d
Merge pull request #2496 from chaoss/dev
sgoggins Aug 25, 2023
97fc239
index add
sgoggins Aug 25, 2023
aedc3bc
add.md
sgoggins Aug 25, 2023
c74933c
add scc value task to facade tasks
IsaacMilarky Aug 25, 2023
497aa9d
missing celery task declare
IsaacMilarky Aug 25, 2023
a2fc994
make sure scc actually gives us file data
IsaacMilarky Aug 25, 2023
e483a2c
make it so that the unique constraint on repo_labor isn't initially d…
IsaacMilarky Aug 25, 2023
df0a20a
fixed materialized view
sgoggins Aug 25, 2023
0424308
Merge remote-tracking branch 'origin/dev-index-matview-patch-spg-1' i…
sgoggins Aug 25, 2023
fd88add
alter name of alembic revision
IsaacMilarky Aug 25, 2023
b277462
add repo_url field to repo_labor insert
IsaacMilarky Aug 25, 2023
0c4ff66
fix merge conflict in a comment
IsaacMilarky Aug 25, 2023
fb47594
Merge pull request #2497 from chaoss/value-worker-migrate-to-task
sgoggins Aug 25, 2023
826c63c
simultaneous work resulted in duplicate "22" versions of the db.
sgoggins Aug 25, 2023
b995da4
Merge pull request #2490 from chaoss/dev-index-matview-patch-spg-1
sgoggins Aug 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions add.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
dfadffd
6 changes: 3 additions & 3 deletions augur/application/cli/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,21 +170,21 @@ def determine_worker_processes(ratio,maximum):
sleep_time += 6

#60% of estimate, Maximum value of 45
core_num_processes = determine_worker_processes(.6, 45)
core_num_processes = determine_worker_processes(.6, 80)
logger.info(f"Starting core worker processes with concurrency={core_num_processes}")
core_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={core_num_processes} -n core:{uuid.uuid4().hex}@%h"
process_list.append(subprocess.Popen(core_worker.split(" ")))
sleep_time += 6

#20% of estimate, Maximum value of 25
secondary_num_processes = determine_worker_processes(.2, 25)
secondary_num_processes = determine_worker_processes(.2, 26)
logger.info(f"Starting secondary worker processes with concurrency={secondary_num_processes}")
secondary_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={secondary_num_processes} -n secondary:{uuid.uuid4().hex}@%h -Q secondary"
process_list.append(subprocess.Popen(secondary_worker.split(" ")))
sleep_time += 6

#15% of estimate, Maximum value of 20
facade_num_processes = determine_worker_processes(.2, 20)
facade_num_processes = determine_worker_processes(.2, 40)
logger.info(f"Starting facade worker processes with concurrency={facade_num_processes}")
facade_worker = f"celery -A augur.tasks.init.celery_app.celery_app worker -l info --concurrency={facade_num_processes} -n facade:{uuid.uuid4().hex}@%h -Q facade"

Expand Down
188 changes: 188 additions & 0 deletions augur/application/schema/alembic/versions/22_mat_view_cntrbid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
"""Fix Keys and materialized view

Revision ID: 22
Revises: 21
Create Date: 2023-08-23 18:17:22.651191

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy import text

# revision identifiers, used by Alembic.
revision = '22'
down_revision = '21'
branch_labels = None
depends_on = None


def upgrade():

add_fix_keys_22()

def downgrade():

upgrade=False

add_fix_keys_22(upgrade)

def add_fix_keys_22(upgrade=True):

if upgrade:

conn = op.get_bind()
conn.execute(text("""
alter TABLE
augur_data.commits DROP CONSTRAINT if exists fk_commits_contributors_3,
DROP CONSTRAINT if exists fk_commits_contributors_4;
alter TABLE augur_data.contributors
DROP CONSTRAINT if exists "GH-UNIQUE-C",
DROP CONSTRAINT if exists
"GL-cntrb-LOGIN-UNIQUE";"""))

conn = op.get_bind()
conn.execute(text("""
drop materialized view if exists augur_data.explorer_contributor_actions; """))

conn = op.get_bind()
conn.execute(text("""
create materialized view augur_data.explorer_contributor_actions as
SELECT
A.ID AS cntrb_id,
A.created_at,
A.repo_id,
A.ACTION,
repo.repo_name,
A.LOGIN,
DENSE_RANK() OVER(PARTITION BY A.ID, A.repo_id ORDER BY A.created_at) AS RANK
FROM (
select
commits.cmt_ght_author_id AS ID,
commits.cmt_author_timestamp AS created_at,
commits.repo_id,
'commit' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
( augur_data.commits LEFT JOIN augur_data.contributors ON ( ( ( contributors.cntrb_id ) :: TEXT = ( commits.cmt_ght_author_id ) :: TEXT ) ) )
GROUP BY
commits.cmt_commit_hash,
commits.cmt_ght_author_id,
commits.repo_id,
commits.cmt_author_timestamp,
'commit' :: TEXT,
contributors.cntrb_login
UNION all
SELECT
issues.reporter_id AS ID,
issues.created_at,
issues.repo_id,
'issue_opened' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
( augur_data.issues LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = issues.reporter_id ) ) )
WHERE
( issues.pull_request IS NULL )
UNION ALL
SELECT
pull_request_events.cntrb_id AS ID,
pull_request_events.created_at,
pull_requests.repo_id,
'pull_request_closed' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) )
WHERE
pull_requests.pull_request_id = pull_request_events.pull_request_id
AND pull_requests.pr_merged_at IS NULL
AND ( ( pull_request_events.ACTION ) :: TEXT = 'closed' :: TEXT )
UNION ALL
SELECT
pull_request_events.cntrb_id AS ID,
pull_request_events.created_at,
pull_requests.repo_id,
'pull_request_merged' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
( augur_data.pull_request_events LEFT JOIN augur_data.contributors ON ( ( contributors.cntrb_id = pull_request_events.cntrb_id ) ) )
WHERE
pull_requests.pull_request_id = pull_request_events.pull_request_id
AND ( ( pull_request_events.ACTION ) :: TEXT = 'merged' :: TEXT )
UNION ALL
SELECT
issue_events.cntrb_id AS ID,
issue_events.created_at,
issues.repo_id,
'issue_closed' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.issues,
augur_data.issue_events
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = issue_events.cntrb_id
WHERE
issues.issue_id = issue_events.issue_id
AND issues.pull_request IS NULL
AND ( ( issue_events.ACTION ) :: TEXT = 'closed' :: TEXT )
UNION ALL
SELECT
pull_request_reviews.cntrb_id AS ID,
pull_request_reviews.pr_review_submitted_at AS created_at,
pull_requests.repo_id,
( 'pull_request_review_' :: TEXT || ( pull_request_reviews.pr_review_state ) :: TEXT ) AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
augur_data.pull_request_reviews
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = pull_request_reviews.cntrb_id
WHERE
pull_requests.pull_request_id = pull_request_reviews.pull_request_id
UNION ALL
SELECT
pull_requests.pr_augur_contributor_id AS ID,
pull_requests.pr_created_at AS created_at,
pull_requests.repo_id,
'pull_request_open' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests
LEFT JOIN augur_data.contributors ON pull_requests.pr_augur_contributor_id = contributors.cntrb_id
UNION ALL
SELECT
message.cntrb_id AS ID,
message.msg_timestamp AS created_at,
pull_requests.repo_id,
'pull_request_comment' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.pull_requests,
augur_data.pull_request_message_ref,
augur_data.message
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id
WHERE
pull_request_message_ref.pull_request_id = pull_requests.pull_request_id
AND pull_request_message_ref.msg_id = message.msg_id
UNION ALL
SELECT
issues.reporter_id AS ID,
message.msg_timestamp AS created_at,
issues.repo_id,
'issue_comment' :: TEXT AS ACTION,
contributors.cntrb_login AS LOGIN
FROM
augur_data.issues,
augur_data.issue_message_ref,
augur_data.message
LEFT JOIN augur_data.contributors ON contributors.cntrb_id = message.cntrb_id
WHERE
issue_message_ref.msg_id = message.msg_id
AND issues.issue_id = issue_message_ref.issue_id
AND issues.closed_at != message.msg_timestamp
) A,
augur_data.repo
WHERE
A.repo_id = repo.repo_id
ORDER BY
A.created_at DESC"""))

45 changes: 45 additions & 0 deletions augur/application/schema/alembic/versions/23_add_index_ghlogin.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
"""add index

Revision ID: 23
Revises: 22
Create Date: 2023-08-23 18:17:22.651191

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy import text

# revision identifiers, used by Alembic.
revision = '23'
down_revision = '22'
branch_labels = None
depends_on = None


def upgrade():

gh_loginindex()

def downgrade():

upgrade=False

gh_loginindex(upgrade)

def gh_loginindex(upgrade=True):

if upgrade:

conn = op.get_bind()
conn.execute(text("""
CREATE INDEX if not exists "gh_login" ON "augur_data"."contributors" USING btree (
"gh_login" ASC NULLS FIRST);"""))

else:


conn = op.get_bind()
conn.execute(text("""
DROP INDEX if exists "gh_login" ON "augur_data"."contributors" USING btree (
"gh_login" ASC NULLS FIRST);"""))
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""Alter repo labor unique

Revision ID: 24
Revises: 23
Create Date: 2023-08-25 18:17:22.651191

"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql
from sqlalchemy.sql import text
import re

# revision identifiers, used by Alembic.
revision = '24'
down_revision = '23'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###

conn = op.get_bind()

#Remove constraint being initially deferred.
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name");
"""))
"""

"""
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
conn = op.get_bind()

#Make unique initially deferred
conn.execute(text(f"""
ALTER TABLE "augur_data"."repo_labor"
DROP CONSTRAINT IF EXISTS "rl-unique",
ADD CONSTRAINT "rl-unique" UNIQUE ("repo_id", "rl_analysis_date", "file_path", "file_name") DEFERRABLE INITIALLY DEFERRED;
"""))

# ### end Alembic commands ###
15 changes: 4 additions & 11 deletions augur/tasks/git/dependency_tasks/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from augur.tasks.github.util.github_api_key_handler import GithubApiKeyHandler
from augur.application.db.util import execute_session_query
from augur.tasks.git.dependency_tasks.dependency_util import dependency_calculator as dep_calc
from augur.tasks.util.worker_util import parse_json_from_subprocess_call

def generate_deps_data(session, repo_id, path):
"""Runs deps modules on repo and stores data in database
"""Run dependency logic on repo and stores data in database
:param repo_id: Repository ID
:param path: Absolute path of the Repostiory
"""
Expand Down Expand Up @@ -80,16 +81,8 @@ def generate_scorecard(session,repo_id,path):
key_handler = GithubApiKeyHandler(session)
os.environ['GITHUB_AUTH_TOKEN'] = key_handler.get_random_key()

p= subprocess.run(['./scorecard', command, '--format=json'], cwd= path_to_scorecard ,capture_output=True, text=True, timeout=None)
session.logger.info('subprocess completed successfully... ')
output = p.stdout

try:
required_output = json.loads(output)
except json.decoder.JSONDecodeError as e:
session.logger.error(f"Could not parse required output! \n output: {output} \n Error: {e}")
return

required_output = parse_json_from_subprocess_call(session.logger,['./scorecard', command, '--format=json'],cwd=path_to_scorecard)

session.logger.info('adding to database...')
session.logger.debug(f"output: {required_output}")

Expand Down
4 changes: 3 additions & 1 deletion augur/tasks/git/facade_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@

from augur.tasks.git.dependency_tasks.tasks import process_dependency_metrics
from augur.tasks.git.dependency_libyear_tasks.tasks import process_libyear_dependency_metrics
from augur.tasks.git.scc_value_tasks.tasks import process_scc_value_metrics

from augur.tasks.github.util.github_paginator import GithubPaginator, hit_api
from augur.tasks.github.util.gh_graphql_entities import PullRequest
Expand Down Expand Up @@ -526,7 +527,8 @@ def facade_phase(repo_git):
group(
chain(*facade_core_collection),
process_dependency_metrics.si(repo_git),
process_libyear_dependency_metrics.si(repo_git)
process_libyear_dependency_metrics.si(repo_git),
process_scc_value_metrics.si(repo_git)
)
)

Expand Down
Empty file.
Loading