Skip to content

Commit

Permalink
feat!: migrate to use microgen (#34)
Browse files Browse the repository at this point in the history
* feat!: migrate to use microgen

* update sample

* update sample

* update sample

* Update UPGRADING.md

Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com>

Co-authored-by: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com>
  • Loading branch information
2 people authored and ivanmkc committed Nov 4, 2022
1 parent 7858a28 commit cee3077
Show file tree
Hide file tree
Showing 17 changed files with 393 additions and 362 deletions.
53 changes: 25 additions & 28 deletions datalabeling/snippets/create_annotation_spec_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,61 +26,58 @@ def create_annotation_spec_set(project_id):
Google Cloud project.
"""
from google.cloud import datalabeling_v1beta1 as datalabeling

client = datalabeling.DataLabelingServiceClient()
# [END datalabeling_create_annotation_spec_set_beta]
# If provided, use a provided test endpoint - this will prevent tests on
# this snippet from triggering any action by a real human
if 'DATALABELING_ENDPOINT' in os.environ:
opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
if "DATALABELING_ENDPOINT" in os.environ:
opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT"))
client = datalabeling.DataLabelingServiceClient(client_options=opts)
# [START datalabeling_create_annotation_spec_set_beta]

project_path = client.project_path(project_id)
project_path = f"projects/{project_id}"

annotation_spec_1 = datalabeling.types.AnnotationSpec(
display_name='label_1',
description='label_description_1'
annotation_spec_1 = datalabeling.AnnotationSpec(
display_name="label_1", description="label_description_1"
)

annotation_spec_2 = datalabeling.types.AnnotationSpec(
display_name='label_2',
description='label_description_2'
annotation_spec_2 = datalabeling.AnnotationSpec(
display_name="label_2", description="label_description_2"
)

annotation_spec_set = datalabeling.types.AnnotationSpecSet(
display_name='YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME',
description='YOUR_DESCRIPTION',
annotation_specs=[annotation_spec_1, annotation_spec_2]
annotation_spec_set = datalabeling.AnnotationSpecSet(
display_name="YOUR_ANNOTATION_SPEC_SET_DISPLAY_NAME",
description="YOUR_DESCRIPTION",
annotation_specs=[annotation_spec_1, annotation_spec_2],
)

response = client.create_annotation_spec_set(
project_path, annotation_spec_set)
request={"parent": project_path, "annotation_spec_set": annotation_spec_set}
)

# The format of the resource name:
# project_id/{project_id}/annotationSpecSets/{annotationSpecSets_id}
print('The annotation_spec_set resource name: {}'.format(response.name))
print('Display name: {}'.format(response.display_name))
print('Description: {}'.format(response.description))
print('Annotation specs:')
print("The annotation_spec_set resource name: {}".format(response.name))
print("Display name: {}".format(response.display_name))
print("Description: {}".format(response.description))
print("Annotation specs:")
for annotation_spec in response.annotation_specs:
print('\tDisplay name: {}'.format(annotation_spec.display_name))
print('\tDescription: {}\n'.format(annotation_spec.description))
print("\tDisplay name: {}".format(annotation_spec.display_name))
print("\tDescription: {}\n".format(annotation_spec.description))

return response


# [END datalabeling_create_annotation_spec_set_beta]


if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)

parser.add_argument(
'--project-id',
help='Project ID. Required.',
required=True
)
parser.add_argument("--project-id", help="Project ID. Required.", required=True)

args = parser.parse_args()

Expand Down
10 changes: 5 additions & 5 deletions datalabeling/snippets/create_annotation_spec_set_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,10 @@
import testing_lib


PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT')
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")


@pytest.fixture(scope='module')
@pytest.fixture(scope="module")
def cleaner():
resource_names = []

Expand All @@ -38,9 +38,9 @@ def cleaner():


def test_create_annotation_spec_set(cleaner, capsys):

@backoff.on_exception(
backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE)
backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE
)
def run_sample():
return create_annotation_spec_set.create_annotation_spec_set(PROJECT_ID)

Expand All @@ -50,4 +50,4 @@ def run_sample():
cleaner.append(response.name)

out, _ = capsys.readouterr()
assert 'The annotation_spec_set resource name:' in out
assert "The annotation_spec_set resource name:" in out
73 changes: 33 additions & 40 deletions datalabeling/snippets/create_instruction.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,77 +27,70 @@ def create_instruction(project_id, data_type, instruction_gcs_uri):
Google Cloud Storage.
"""
from google.cloud import datalabeling_v1beta1 as datalabeling

client = datalabeling.DataLabelingServiceClient()
# [END datalabeling_create_instruction_beta]
# If provided, use a provided test endpoint - this will prevent tests on
# this snippet from triggering any action by a real human
if 'DATALABELING_ENDPOINT' in os.environ:
opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
if "DATALABELING_ENDPOINT" in os.environ:
opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT"))
client = datalabeling.DataLabelingServiceClient(client_options=opts)
# [START datalabeling_create_instruction_beta]

project_path = client.project_path(project_id)
project_path = f"projects/{project_id}"

pdf_instruction = datalabeling.types.PdfInstruction(
gcs_file_uri=instruction_gcs_uri)
pdf_instruction = datalabeling.PdfInstruction(gcs_file_uri=instruction_gcs_uri)

instruction = datalabeling.types.Instruction(
display_name='YOUR_INSTRUCTION_DISPLAY_NAME',
description='YOUR_DESCRIPTION',
instruction = datalabeling.Instruction(
display_name="YOUR_INSTRUCTION_DISPLAY_NAME",
description="YOUR_DESCRIPTION",
data_type=data_type,
pdf_instruction=pdf_instruction
pdf_instruction=pdf_instruction,
)

operation = client.create_instruction(project_path, instruction)
operation = client.create_instruction(
request={"parent": project_path, "instruction": instruction}
)

result = operation.result()

# The format of the resource name:
# project_id/{project_id}/instruction/{instruction_id}
print('The instruction resource name: {}'.format(result.name))
print('Display name: {}'.format(result.display_name))
print('Description: {}'.format(result.description))
print('Create time:')
print('\tseconds: {}'.format(result.create_time.seconds))
print('\tnanos: {}'.format(result.create_time.nanos))
print('Data type: {}'.format(
datalabeling.enums.DataType(result.data_type).name))
print('Pdf instruction:')
print('\tGcs file uri: {}\n'.format(
result.pdf_instruction.gcs_file_uri))
print("The instruction resource name: {}".format(result.name))
print("Display name: {}".format(result.display_name))
print("Description: {}".format(result.description))
print("Create time:")
print("\tseconds: {}".format(result.create_time.timestamp_pb().seconds))
print("\tnanos: {}".format(result.create_time.timestamp_pb().nanos))
print("Data type: {}".format(datalabeling.DataType(result.data_type).name))
print("Pdf instruction:")
print("\tGcs file uri: {}\n".format(result.pdf_instruction.gcs_file_uri))

return result


# [END datalabeling_create_instruction_beta]


if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)

parser.add_argument(
'--project-id',
help='Project ID. Required.',
required=True
)
parser.add_argument("--project-id", help="Project ID. Required.", required=True)

parser.add_argument(
'--data-type',
help='Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.',
required=True
"--data-type",
help="Data type. Only support IMAGE, VIDEO, TEXT and AUDIO. Required.",
required=True,
)

parser.add_argument(
'--instruction-gcs-uri',
help='The URI of Google Cloud Storage of the instruction. Required.',
required=True
"--instruction-gcs-uri",
help="The URI of Google Cloud Storage of the instruction. Required.",
required=True,
)

args = parser.parse_args()

create_instruction(
args.project_id,
args.data_type,
args.instruction_gcs_uri
)
create_instruction(args.project_id, args.data_type, args.instruction_gcs_uri)
17 changes: 9 additions & 8 deletions datalabeling/snippets/create_instruction_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,18 +18,18 @@

import backoff
from google.api_core.exceptions import ServerError
from google.cloud import datalabeling
import pytest

import create_instruction
import testing_lib


PROJECT_ID = os.getenv('GOOGLE_CLOUD_PROJECT')
INSTRUCTION_GCS_URI = ('gs://cloud-samples-data/datalabeling'
'/instruction/test.pdf')
PROJECT_ID = os.getenv("GOOGLE_CLOUD_PROJECT")
INSTRUCTION_GCS_URI = "gs://cloud-samples-data/datalabeling" "/instruction/test.pdf"


@pytest.fixture(scope='module')
@pytest.fixture(scope="module")
def cleaner():
resource_names = []

Expand All @@ -40,15 +40,16 @@ def cleaner():


def test_create_instruction(cleaner, capsys):

@backoff.on_exception(
backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE)
backoff.expo, ServerError, max_time=testing_lib.RETRY_DEADLINE
)
def run_sample():
return create_instruction.create_instruction(
PROJECT_ID, 'IMAGE', INSTRUCTION_GCS_URI)
PROJECT_ID, datalabeling.DataType.IMAGE, INSTRUCTION_GCS_URI
)

instruction = run_sample()
cleaner.append(instruction.name)

out, _ = capsys.readouterr()
assert 'The instruction resource name: ' in out
assert "The instruction resource name: " in out
62 changes: 33 additions & 29 deletions datalabeling/snippets/export_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,67 +21,71 @@


# [START datalabeling_export_data_beta]
def export_data(dataset_resource_name, annotated_dataset_resource_name,
export_gcs_uri):
def export_data(dataset_resource_name, annotated_dataset_resource_name, export_gcs_uri):
"""Exports a dataset from the given Google Cloud project."""
from google.cloud import datalabeling_v1beta1 as datalabeling

client = datalabeling.DataLabelingServiceClient()
# [END datalabeling_export_data_beta]
# If provided, use a provided test endpoint - this will prevent tests on
# this snippet from triggering any action by a real human
if 'DATALABELING_ENDPOINT' in os.environ:
opts = ClientOptions(api_endpoint=os.getenv('DATALABELING_ENDPOINT'))
if "DATALABELING_ENDPOINT" in os.environ:
opts = ClientOptions(api_endpoint=os.getenv("DATALABELING_ENDPOINT"))
client = datalabeling.DataLabelingServiceClient(client_options=opts)
# [START datalabeling_export_data_beta]

gcs_destination = datalabeling.types.GcsDestination(
output_uri=export_gcs_uri, mime_type='text/csv')
gcs_destination = datalabeling.GcsDestination(
output_uri=export_gcs_uri, mime_type="text/csv"
)

output_config = datalabeling.types.OutputConfig(
gcs_destination=gcs_destination)
output_config = datalabeling.OutputConfig(gcs_destination=gcs_destination)

response = client.export_data(
dataset_resource_name,
annotated_dataset_resource_name,
output_config
request={
"name": dataset_resource_name,
"annotated_dataset": annotated_dataset_resource_name,
"output_config": output_config,
}
)

print('Dataset ID: {}\n'.format(response.result().dataset))
print('Output config:')
print('\tGcs destination:')
print('\t\tOutput URI: {}\n'.format(
response.result().output_config.gcs_destination.output_uri))
print("Dataset ID: {}\n".format(response.result().dataset))
print("Output config:")
print("\tGcs destination:")
print(
"\t\tOutput URI: {}\n".format(
response.result().output_config.gcs_destination.output_uri
)
)


# [END datalabeling_export_data_beta]


if __name__ == '__main__':
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description=__doc__,
formatter_class=argparse.RawDescriptionHelpFormatter
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
)

parser.add_argument(
'--dataset-resource-name',
help='Dataset resource name. Required.',
required=True
"--dataset-resource-name",
help="Dataset resource name. Required.",
required=True,
)

parser.add_argument(
'--annotated-dataset-resource-name',
help='Annotated Dataset resource name. Required.',
required=True
"--annotated-dataset-resource-name",
help="Annotated Dataset resource name. Required.",
required=True,
)

parser.add_argument(
'--export-gcs-uri',
help='The export GCS URI. Required.',
required=True
"--export-gcs-uri", help="The export GCS URI. Required.", required=True
)

args = parser.parse_args()

export_data(
args.dataset_resource_name,
args.annotated_dataset_resource_name,
args.export_gcs_uri
args.export_gcs_uri,
)
Loading

0 comments on commit cee3077

Please sign in to comment.