Skip to content

Commit

Permalink
🎉 Source Github: PullRequestCommentReactions - re-implemented using G…
Browse files Browse the repository at this point in the history
…raphQL (#14795)

Signed-off-by: Sergey Chvalyuk <grubberr@gmail.com>
  • Loading branch information
grubberr authored Aug 2, 2022
1 parent 09d6901 commit ae58fa5
Show file tree
Hide file tree
Showing 10 changed files with 1,138 additions and 31 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@
- name: GitHub
sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e
dockerRepository: airbyte/source-github
dockerImageTag: 0.2.43
dockerImageTag: 0.2.44
documentationUrl: https://docs.airbyte.io/integrations/sources/github
icon: github.svg
sourceType: api
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2595,7 +2595,7 @@
supportsNormalization: false
supportsDBT: false
supported_destination_sync_modes: []
- dockerImage: "airbyte/source-github:0.2.43"
- dockerImage: "airbyte/source-github:0.2.44"
spec:
documentationUrl: "https://docs.airbyte.com/integrations/sources/github"
connectionSpecification:
Expand Down
2 changes: 1 addition & 1 deletion airbyte-integrations/connectors/source-github/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ RUN pip install .
ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py"
ENTRYPOINT ["python", "/airbyte/integration_code/main.py"]

LABEL io.airbyte.version=0.2.43
LABEL io.airbyte.version=0.2.44
LABEL io.airbyte.name=airbyte/source-github
Original file line number Diff line number Diff line change
Expand Up @@ -89,9 +89,7 @@
},
"pull_request_comment_reactions": {
"airbytehq/integration-test": {
"699253726": {
"created_at": "2121-12-31T23:59:59Z"
}
"created_at": "2121-12-31T23:59:59Z"
}
},
"pull_request_stats": {
Expand Down
196 changes: 180 additions & 16 deletions airbyte-integrations/connectors/source-github/source_github/graphql.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,30 @@
# Copyright (c) 2022 Airbyte, Inc., all rights reserved.
#

import heapq
import itertools
from typing import Optional

import sgqlc.operation
from sgqlc.operation import Selector

from . import github_schema

_schema = github_schema
_schema_root = _schema.github_schema


def select_user_fields(user):
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)


def get_query_pull_requests(owner, name, first, after, direction):
kwargs = {"first": first, "order_by": {"field": "UPDATED_AT", "direction": direction}}
if after:
Expand Down Expand Up @@ -41,14 +56,7 @@ def get_query_pull_requests(owner, name, first, after, direction):
reviews.total_count()
reviews.nodes.comments.__fields__(total_count=True)
user = pull_requests.nodes.merged_by(__alias__="merged_by").__as__(_schema_root.User)
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)
select_user_fields(user)
pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True)
return str(op)

Expand Down Expand Up @@ -87,12 +95,168 @@ def get_query_reviews(owner, name, first, after, number=None):
)
reviews.nodes.commit.oid()
user = reviews.nodes.author(__alias__="user").__as__(_schema_root.User)
user.__fields__(
id="node_id",
database_id="id",
login=True,
avatar_url="avatar_url",
url="html_url",
is_site_admin="site_admin",
)
select_user_fields(user)
return str(op)


class QueryReactions:

# AVERAGE_REVIEWS - optimal number of reviews to fetch inside every pull request.
# If we try to fetch too many (up to 100) we will spend too many scores of query cost.
# https://docs.github.com/en/graphql/overview/resource-limitations#calculating-a-rate-limit-score-before-running-the-call
# If we query too low we would need to make additional sub-queries to fetch the rest of the reviews inside specific pull request.
AVERAGE_REVIEWS = 5
AVERAGE_COMMENTS = 2
AVERAGE_REACTIONS = 2

def get_query_root_repository(self, owner: str, name: str, first: int, after: Optional[str] = None):
"""
Get GraphQL query which allows fetching reactions starting from the repository:
query {
repository {
pull_requests(first: page_size) {
reviews(first: AVERAGE_REVIEWS) {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
}
}
"""
op = self._get_operation()
repository = op.repository(owner=owner, name=name)
repository.name()
repository.owner.login()

kwargs = {"first": first}
if after:
kwargs["after"] = after
pull_requests = repository.pull_requests(**kwargs)
pull_requests.page_info.__fields__(has_next_page=True, end_cursor=True)
pull_requests.total_count()
pull_requests.nodes.id(__alias__="node_id")

reviews = self._select_reviews(pull_requests.nodes, first=self.AVERAGE_REVIEWS)
comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_pull_request(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the pull_request:
query {
pull_request {
reviews(first: AVERAGE_REVIEWS) {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
}
"""
op = self._get_operation()
pull_request = op.node(id=node_id).__as__(_schema_root.PullRequest)
pull_request.id(__alias__="node_id")
pull_request.repository.name()
pull_request.repository.owner.login()

reviews = self._select_reviews(pull_request, first, after)
comments = self._select_comments(reviews.nodes, first=self.AVERAGE_COMMENTS)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_review(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the review:
query {
review {
comments(first: AVERAGE_COMMENTS) {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
}
"""
op = self._get_operation()
review = op.node(id=node_id).__as__(_schema_root.PullRequestReview)
review.id(__alias__="node_id")
review.repository.name()
review.repository.owner.login()

comments = self._select_comments(review, first, after)
self._select_reactions(comments.nodes, first=self.AVERAGE_REACTIONS)
return str(op)

def get_query_root_comment(self, node_id: str, first: int, after: str):
"""
Get GraphQL query which allows fetching reactions starting from the comment:
query {
comment {
reactions(first: AVERAGE_REACTIONS) {
}
}
}
"""
op = self._get_operation()
comment = op.node(id=node_id).__as__(_schema_root.PullRequestReviewComment)
comment.id(__alias__="node_id")
comment.database_id(__alias__="id")
comment.repository.name()
comment.repository.owner.login()
self._select_reactions(comment, first, after)
return str(op)

def _select_reactions(self, comment: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
reactions = comment.reactions(**kwargs)
reactions.page_info.__fields__(has_next_page=True, end_cursor=True)
reactions.total_count()
reactions.nodes.__fields__(id="node_id", database_id="id", content=True, created_at="created_at")
select_user_fields(reactions.nodes.user())
return reactions

def _select_comments(self, review: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
comments = review.comments(**kwargs)
comments.page_info.__fields__(has_next_page=True, end_cursor=True)
comments.total_count()
comments.nodes.id(__alias__="node_id")
comments.nodes.database_id(__alias__="id")
return comments

def _select_reviews(self, pull_request: Selector, first: int, after: Optional[str] = None):
kwargs = {"first": first}
if after:
kwargs["after"] = after
reviews = pull_request.reviews(**kwargs)
reviews.page_info.__fields__(has_next_page=True, end_cursor=True)
reviews.total_count()
reviews.nodes.id(__alias__="node_id")
reviews.nodes.database_id(__alias__="id")
return reviews

def _get_operation(self):
return sgqlc.operation.Operation(_schema_root.query_type)


class CursorStorage:
def __init__(self, typenames):
self.typename_to_prio = {o: prio for prio, o in enumerate(reversed(typenames))}
self.count = itertools.count()
self.storage = []

def add_cursor(self, typename, cursor, total_count, parent_id=None):
priority = self.typename_to_prio[typename]
heapq.heappush(self.storage, (priority, next(self.count), (typename, cursor, total_count, parent_id)))

def get_cursor(self):
if self.storage:
_, _, c = heapq.heappop(self.storage)
return {"typename": c[0], "cursor": c[1], "total_count": c[2], "parent_id": c[3]}
Original file line number Diff line number Diff line change
@@ -1,4 +1,28 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"$ref": "reaction.json"
"type": "object",
"properties": {
"id": {
"type": ["null", "integer"]
},
"node_id": {
"type": ["null", "string"]
},
"content": {
"type": ["null", "string"]
},
"created_at": {
"type": "string",
"format": "date-time"
},
"user": {
"$ref": "user_graphql.json"
},
"repository": {
"type": "string"
},
"comment_id": {
"type": "integer"
}
}
}
Loading

0 comments on commit ae58fa5

Please sign in to comment.