diff --git a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml index f3b3f5c007ee..45114bfa9e87 100644 --- a/airbyte-config/init/src/main/resources/seed/source_definitions.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_definitions.yaml @@ -354,7 +354,7 @@ - name: GitHub sourceDefinitionId: ef69ef6e-aa7f-4af1-a01d-ef775033524e dockerRepository: airbyte/source-github - dockerImageTag: 0.3.2 + dockerImageTag: 0.3.3 documentationUrl: https://docs.airbyte.io/integrations/sources/github icon: github.svg sourceType: api diff --git a/airbyte-config/init/src/main/resources/seed/source_specs.yaml b/airbyte-config/init/src/main/resources/seed/source_specs.yaml index d3bfd165d902..227064bc1db3 100644 --- a/airbyte-config/init/src/main/resources/seed/source_specs.yaml +++ b/airbyte-config/init/src/main/resources/seed/source_specs.yaml @@ -3503,7 +3503,7 @@ supportsNormalization: false supportsDBT: false supported_destination_sync_modes: [] -- dockerImage: "airbyte/source-github:0.3.2" +- dockerImage: "airbyte/source-github:0.3.3" spec: documentationUrl: "https://docs.airbyte.com/integrations/sources/github" connectionSpecification: diff --git a/airbyte-integrations/connectors/source-github/Dockerfile b/airbyte-integrations/connectors/source-github/Dockerfile index ef0dab90d30a..112569d34660 100644 --- a/airbyte-integrations/connectors/source-github/Dockerfile +++ b/airbyte-integrations/connectors/source-github/Dockerfile @@ -12,5 +12,5 @@ RUN pip install . ENV AIRBYTE_ENTRYPOINT "python /airbyte/integration_code/main.py" ENTRYPOINT ["python", "/airbyte/integration_code/main.py"] -LABEL io.airbyte.version=0.3.2 +LABEL io.airbyte.version=0.3.3 LABEL io.airbyte.name=airbyte/source-github diff --git a/airbyte-integrations/connectors/source-github/source_github/source.py b/airbyte-integrations/connectors/source-github/source_github/source.py index fba2e0318944..db18092f14c7 100644 --- a/airbyte-integrations/connectors/source-github/source_github/source.py +++ b/airbyte-integrations/connectors/source-github/source_github/source.py @@ -225,7 +225,7 @@ def streams(self, config: Mapping[str, Any]) -> List[Stream]: team_members_stream, Users(**organization_args), Workflows(**repository_args_with_start_date), - WorkflowRuns(**repository_args_with_start_date), + workflow_runs_stream, WorkflowJobs(parent=workflow_runs_stream, **repository_args_with_start_date), TeamMemberships(parent=team_members_stream, **repository_args), ] diff --git a/airbyte-integrations/connectors/source-github/source_github/streams.py b/airbyte-integrations/connectors/source-github/source_github/streams.py index 17c42df169ec..7c495444a067 100644 --- a/airbyte-integrations/connectors/source-github/source_github/streams.py +++ b/airbyte-integrations/connectors/source-github/source_github/streams.py @@ -1353,18 +1353,26 @@ def __init__(self, parent: WorkflowRuns, **kwargs): def path(self, stream_slice: Mapping[str, Any] = None, **kwargs) -> str: return f"repos/{stream_slice['repository']}/actions/runs/{stream_slice['run_id']}/jobs" - def stream_slices( - self, sync_mode: SyncMode, cursor_field: List[str] = None, stream_state: Mapping[str, Any] = None - ) -> Iterable[Optional[Mapping[str, Any]]]: - parent_stream_slices = self.parent.stream_slices( - sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_state=stream_state - ) + def read_records( + self, + sync_mode: SyncMode, + cursor_field: List[str] = None, + stream_slice: Mapping[str, Any] = None, + stream_state: Mapping[str, Any] = None, + ) -> Iterable[Mapping[str, Any]]: + parent_stream_state = None + if stream_state is not None: + parent_stream_state = {repository: {self.parent.cursor_field: v[self.cursor_field]} for repository, v in stream_state.items()} + parent_stream_slices = self.parent.stream_slices(sync_mode=sync_mode, cursor_field=cursor_field, stream_state=parent_stream_state) for stream_slice in parent_stream_slices: parent_records = self.parent.read_records( - sync_mode=SyncMode.full_refresh, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state + sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=parent_stream_state ) for record in parent_records: - yield {"repository": record["repository"]["full_name"], "run_id": record["id"]} + stream_slice["run_id"] = record["id"] + yield from super().read_records( + sync_mode=sync_mode, cursor_field=cursor_field, stream_slice=stream_slice, stream_state=stream_state + ) def parse_response( self, @@ -1373,14 +1381,16 @@ def parse_response( stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None, ) -> Iterable[Mapping]: - for record in response.json().get("jobs"): # GitHub puts records in an array. - yield self.transform(record=record, stream_slice=stream_slice) + for record in response.json()["jobs"]: + if record.get(self.cursor_field): + yield self.transform(record=record, stream_slice=stream_slice) - def transform(self, record: MutableMapping[str, Any], stream_slice: Mapping[str, Any]) -> MutableMapping[str, Any]: - record = super().transform(record=record, stream_slice=stream_slice) - record["run_id"] = stream_slice["run_id"] - record["repository"] = stream_slice["repository"] - return record + def request_params( + self, stream_state: Mapping[str, Any], stream_slice: Mapping[str, Any] = None, next_page_token: Mapping[str, Any] = None + ) -> MutableMapping[str, Any]: + params = super().request_params(stream_state=stream_state, stream_slice=stream_slice, next_page_token=next_page_token) + params["filter"] = "all" + return params class TeamMembers(GithubStream): diff --git a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py index b4d5cafd1a54..c72497fc0acc 100644 --- a/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py +++ b/airbyte-integrations/connectors/source-github/unit_tests/test_stream.py @@ -1077,426 +1077,105 @@ def test_stream_workflow_runs_read_incremental(monkeypatch): @responses.activate -def test_stream_workflow_jobs_read_incremental(): +def test_stream_workflow_jobs_read(): repository_args = { "repositories": ["org/repo"], "page_size_for_large_streams": 100, } - repository_args_with_start_date = {**repository_args, "start_date": "2022-09-01T00:00:00Z"} + repository_args_with_start_date = {**repository_args, "start_date": "2022-09-02T09:05:00Z"} workflow_runs_stream = WorkflowRuns(**repository_args_with_start_date) stream = WorkflowJobs(workflow_runs_stream, **repository_args_with_start_date) - data = [ + workflow_runs = [ { "id": 1, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 1, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], + "created_at": "2022-09-02T09:00:00Z", + "updated_at": "2022-09-02T09:10:02Z", + "repository": {"full_name": "org/repo"}, }, { "id": 2, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 1, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - }, - { - "id": 3, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 2, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - }, - { - "id": 4, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 2, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], + "created_at": "2022-09-02T09:06:00Z", + "updated_at": "2022-09-02T09:08:00Z", + "repository": {"full_name": "org/repo"}, }, ] - responses.add( - "GET", - "https://api.github.com/repos/org/repo/actions/runs", - json={ - "total_count": 2, - "workflow_runs": [ - { - "id": 1, - "created_at": "2022-09-02T09:10:02Z", - "updated_at": "2022-09-02T09:10:02Z", - "repository": {"full_name": "org/repo"}, - }, - { - "id": 2, - "created_at": "2022-09-02T09:10:04Z", - "updated_at": "2022-09-02T09:10:04Z", - "repository": {"full_name": "org/repo"}, - }, - ], - }, - ) + workflow_jobs_1 = [ + {"id": 1, "completed_at": "2022-09-02T09:02:00Z", "run_id": 1}, + {"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1}, + {"id": 5, "completed_at": None, "run_id": 1}, + ] - responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": data[0:2]}) + workflow_jobs_2 = [ + {"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2}, + {"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2}, + ] responses.add( "GET", - "https://api.github.com/repos/org/repo/actions/runs/2/jobs", - json={"jobs": data[2:4]}, + "https://api.github.com/repos/org/repo/actions/runs", + json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs}, ) + responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1}) + responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2}) state = {} records = read_incremental(stream, state) - - assert state == {"org/repo": {"completed_at": "2022-09-02T10:11:02Z"}} + assert state == {"org/repo": {"completed_at": "2022-09-02T09:10:00Z"}} assert records == [ - { - "id": 1, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 1, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - "repository": "org/repo", - }, - { - "id": 2, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 1, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - "repository": "org/repo", - }, - { - "id": 3, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 2, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - "repository": "org/repo", - }, - { - "id": 4, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 2, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - "repository": "org/repo", - }, + {"completed_at": "2022-09-02T09:10:00Z", "id": 4, "repository": "org/repo", "run_id": 1}, + {"completed_at": "2022-09-02T09:07:00Z", "id": 2, "repository": "org/repo", "run_id": 2}, + {"completed_at": "2022-09-02T09:08:00Z", "id": 3, "repository": "org/repo", "run_id": 2}, ] assert len(responses.calls) == 3 - data.insert( - 0, + workflow_jobs_1[2]["completed_at"] = "2022-09-02T09:12:00Z" + workflow_runs[0]["updated_at"] = "2022-09-02T09:12:01Z" + workflow_runs.append( { - "id": 5, - "completed_at": "2022-09-03T01:00:00Z", - "run_id": 2, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - }, - ) - - data[2]["completed_at"] = "2022-09-04T01:00:00Z" # data with ID 2 - - responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": data[0:1]}) - - responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": data[1:2]}) - - responses.add( - "GET", - "https://api.github.com/repos/org/repo/actions/runs/2/jobs", - json={"jobs": data[2:3]}, + "id": 3, + "created_at": "2022-09-02T09:14:00Z", + "updated_at": "2022-09-02T09:15:00Z", + "repository": {"full_name": "org/repo"}, + } ) + workflow_jobs_3 = [ + {"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3}, + {"id": 7, "completed_at": None, "run_id": 3}, + ] responses.add( "GET", - "https://api.github.com/repos/org/repo/actions/runs/2/jobs", - json={"jobs": data[3:4]}, + "https://api.github.com/repos/org/repo/actions/runs", + json={"total_count": len(workflow_runs), "workflow_runs": workflow_runs}, ) + responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/1/jobs", json={"jobs": workflow_jobs_1}) + responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/2/jobs", json={"jobs": workflow_jobs_2}) + responses.add("GET", "https://api.github.com/repos/org/repo/actions/runs/3/jobs", json={"jobs": workflow_jobs_3}) responses.calls.reset() records = read_incremental(stream, state) - assert state == {"org/repo": {"completed_at": "2022-09-04T01:00:00Z"}} + assert state == {"org/repo": {"completed_at": "2022-09-02T09:15:00Z"}} assert records == [ - { - "id": 5, - "completed_at": "2022-09-03T01:00:00Z", - "run_id": 1, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - "repository": "org/repo", - }, - { - "id": 2, - "completed_at": "2022-09-04T01:00:00Z", - "run_id": 2, - "steps": [ - {"name": "Set up job", "status": "completed", "conclusion": "success", "number": 1}, - {"name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", "status": "completed", "conclusion": "success", "number": 2}, - ], - "repository": "org/repo", - }, + {"completed_at": "2022-09-02T09:12:00Z", "id": 5, "repository": "org/repo", "run_id": 1}, + {"completed_at": "2022-09-02T09:15:00Z", "id": 6, "repository": "org/repo", "run_id": 3}, ] - -@responses.activate -def test_stream_workflow_jobs_full_refresh(): - - repository_args = { - "repositories": ["org/repo"], - "page_size_for_large_streams": 100, - } - repository_args_with_start_date = {**repository_args, "start_date": "2022-09-01T00:00:00Z"} - - workflow_runs_stream = WorkflowRuns(**repository_args_with_start_date) - stream = WorkflowJobs(workflow_runs_stream, **repository_args) - - responses.add( - "GET", - "https://api.github.com/repos/org/repo/actions/runs", - json={ - "total_count": 2, - "workflow_runs": [ - { - "id": 1, - "created_at": "2022-09-02T09:10:02Z", - "updated_at": "2022-09-02T09:10:02Z", - "repository": {"full_name": "org/repo"}, - }, - { - "id": 2, - "created_at": "2022-09-02T09:10:04Z", - "updated_at": "2022-09-02T09:10:04Z", - "repository": {"full_name": "org/repo"}, - }, - ], - }, - ) - - responses.add( - "GET", - "https://api.github.com/repos/org/repo/actions/runs/1/jobs", - json={ - "jobs": [ - { - "id": 1, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 1, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T09:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T09:02:00.000-00:00", - }, - ], - }, - { - "id": 2, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 1, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T10:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T10:02:00.000-00:00", - }, - ], - }, - ] - }, - ) - - responses.add( - "GET", - "https://api.github.com/repos/org/repo/actions/runs/2/jobs", - json={ - "jobs": [ - { - "id": 3, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 2, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T09:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T09:02:00.000-00:00", - }, - ], - }, - { - "id": 4, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 2, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T10:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T10:02:00.000-00:00", - }, - ], - }, - ] - }, - ) - records = list(read_full_refresh(stream)) - assert records == [ - { - "id": 1, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 1, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T09:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T09:02:00.000-00:00", - }, - ], - "repository": "org/repo", - }, - { - "id": 2, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 1, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T10:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T10:02:00.000-00:00", - }, - ], - "repository": "org/repo", - }, - { - "id": 3, - "completed_at": "2022-09-02T09:11:02Z", - "run_id": 2, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T09:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T09:02:00.000-00:00", - }, - ], - "repository": "org/repo", - }, - { - "id": 4, - "completed_at": "2022-09-02T10:11:02Z", - "run_id": 2, - "steps": [ - { - "name": "Set up job", - "status": "completed", - "conclusion": "success", - "number": 1, - "completed_at": "2022-09-02T10:01:00.000-00:00", - }, - { - "name": "Pull ghcr.io/rtcamp/action-slack-notify:v2.2.0", - "status": "completed", - "conclusion": "success", - "number": 2, - "completed_at": "2022-09-02T10:02:00.000-00:00", - }, - ], - "repository": "org/repo", - }, + {"id": 4, "completed_at": "2022-09-02T09:10:00Z", "run_id": 1, "repository": "org/repo"}, + {"id": 5, "completed_at": "2022-09-02T09:12:00Z", "run_id": 1, "repository": "org/repo"}, + {"id": 2, "completed_at": "2022-09-02T09:07:00Z", "run_id": 2, "repository": "org/repo"}, + {"id": 3, "completed_at": "2022-09-02T09:08:00Z", "run_id": 2, "repository": "org/repo"}, + {"id": 6, "completed_at": "2022-09-02T09:15:00Z", "run_id": 3, "repository": "org/repo"}, ] - assert len(responses.calls) == 3 - @responses.activate def test_stream_pull_request_comment_reactions_read(): diff --git a/docs/integrations/sources/github.md b/docs/integrations/sources/github.md index 7d5a8e0a1181..cddeefb74315 100644 --- a/docs/integrations/sources/github.md +++ b/docs/integrations/sources/github.md @@ -147,6 +147,7 @@ The GitHub connector should not run into GitHub API limitations under normal usa | Version | Date | Pull Request | Subject | | :------ | :--------- | :---------------------------------------------------------------------------------------------------------------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| 0.3.3 | 2022-09-28 | [17287](https://github.com/airbytehq/airbyte/pull/17287) | Fix problem with "null" `cursor_field` for WorkflowJobs stream | | 0.3.2 | 2022-09-28 | [17304](https://github.com/airbytehq/airbyte/pull/17304) | Migrate to per-stream state. | | 0.3.1 | 2022-09-21 | [16947](https://github.com/airbytehq/airbyte/pull/16947) | Improve error logging when handling HTTP 500 error | | 0.3.0 | 2022-09-09 | [16534](https://github.com/airbytehq/airbyte/pull/16534) | Add new stream `WorkflowJobs` |