Skip to content

Commit

Permalink
feat: move workflow_steps to ocr_workflow object
Browse files Browse the repository at this point in the history
  • Loading branch information
mweidling committed Nov 24, 2022
1 parent c9d313f commit a814c89
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 17 deletions.
2 changes: 1 addition & 1 deletion ocrd_eval.sample.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
[{"@id": "https://github.com/OCR-D/quiver/tree/data/evaluations/wf1-data345-eval1.json", "label": "OCR workflow 1 on workspace 345", "metadata": {"ocr_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/1.nf", "label": "OCR Workflow 1"}, "eval_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf", "label": "Evaluation Workflow 1"}, "gt_workspace": {"@id": "https://gt.ocr-d.de/workspace/789", "label": "GT workspace 789 (19th century fraktur)"}, "ocr_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip", "label": "OCR result workspace 3000"}, "eval_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip", "label": "Evaluation Workspace 345"}, "workflow_steps": {"0": "Processor A", "1": "Processor B"}, "workflow_model": "Fraktur_GT4HistOCR", "document_metadata": {"fonts": ["antiqua", "fraktur"], "publication_century": "1800-1900", "publication_decade": "1850-1860", "publication_year": 1855, "number_of_pages": 100, "layout": "simple"}}, "evaluation": {"document_wide": {"wall_time": 1234, "cer": 0.57, "cer_min_max": [0.2, 0.57]}, "by_page": [{"page_id": "PHYS_0001", "cer": 0.8, "processing_time": 2.1}]}}, {"@id": "https://github.com/OCR-D/quiver/tree/data/evaluations/wf2-data345-eval1.json", "label": "OCR Workflow 2 on Data 345", "metadata": {"ocr_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/2.nf", "label": "OCR Workflow 2"}, "eval_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf", "label": "Evaluation Workflow 1"}, "gt_workspace": {"@id": "https://gt.ocr-d.de/workspace/789", "label": "GT workspace 789 (19th century fraktur)"}, "ocr_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip", "label": "OCR result workspace 3000"}, "eval_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip", "label": "Evaluation Workspace 345"}, "workflow_steps": {"0": "Processor A", "1": "Processor B"}, "workflow_model": "Fraktur_GT4HistOCR", "document_metadata": {"fonts": ["antiqua", "fraktur"], "publication_century": "1800-1900", "publication_decade": "1850-1860", "publication_year": 1855, "number_of_pages": 100, "layout": "simple"}}, "evaluation": {"document_wide": {"wall_time": 4567, "cer": 0.9, "cer_min_max": [0.2, 0.99]}, "by_page": [{"page_id": "PHYS_0001", "cer": 0.9, "processing_time": 2.1}]}}]
[{"@id": "https://github.com/OCR-D/quiver/tree/data/evaluations/wf1-data345-eval1.json", "label": "OCR workflow 1 on workspace 345", "metadata": {"ocr_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/1.nf", "label": "OCR Workflow 1", "steps": {"0": "Processor A", "1": "Processor B"}}, "eval_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf", "label": "Evaluation Workflow 1"}, "gt_workspace": {"@id": "https://gt.ocr-d.de/workspace/789", "label": "GT workspace 789 (19th century fraktur)"}, "ocr_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip", "label": "OCR result workspace 3000"}, "eval_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip", "label": "Evaluation Workspace 345"}, "workflow_model": "Fraktur_GT4HistOCR", "document_metadata": {"fonts": ["antiqua", "fraktur"], "publication_century": "1800-1900", "publication_decade": "1850-1860", "publication_year": 1855, "number_of_pages": 100, "layout": "simple"}}, "evaluation": {"document_wide": {"wall_time": 1234, "cer": 0.57, "cer_min_max": [0.2, 0.57]}, "by_page": [{"page_id": "PHYS_0001", "cer": 0.8, "processing_time": 2.1}]}}, {"@id": "https://github.com/OCR-D/quiver/tree/data/evaluations/wf2-data345-eval1.json", "label": "OCR Workflow 2 on Data 345", "metadata": {"ocr_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/2.nf", "label": "OCR Workflow 2", "steps": {"0": "Processor A", "1": "Processor B"}}, "eval_workflow": {"@id": "https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf", "label": "Evaluation Workflow 1"}, "gt_workspace": {"@id": "https://gt.ocr-d.de/workspace/789", "label": "GT workspace 789 (19th century fraktur)"}, "ocr_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/3000.ocrd.zip", "label": "OCR result workspace 3000"}, "eval_workspace": {"@id": "https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip", "label": "Evaluation Workspace 345"}, "workflow_model": "Fraktur_GT4HistOCR", "document_metadata": {"fonts": ["antiqua", "fraktur"], "publication_century": "1800-1900", "publication_decade": "1850-1860", "publication_year": 1855, "number_of_pages": 100, "layout": "simple"}}, "evaluation": {"document_wide": {"wall_time": 4567, "cer": 0.9, "cer_min_max": [0.2, 0.99]}, "by_page": [{"page_id": "PHYS_0001", "cer": 0.9, "processing_time": 2.1}]}}]
12 changes: 6 additions & 6 deletions ocrd_eval.sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@
ocr_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/1.nf
label: OCR Workflow 1
steps:
'0': Processor A
'1': Processor B
eval_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf
label: Evaluation Workflow 1
Expand All @@ -16,9 +19,6 @@
eval_workspace:
'@id': https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip
label: Evaluation Workspace 345
workflow_steps:
'0': Processor A
'1': Processor B
workflow_model: Fraktur_GT4HistOCR
document_metadata:
fonts:
Expand Down Expand Up @@ -47,6 +47,9 @@
ocr_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/2.nf
label: OCR Workflow 2
steps:
'0': Processor A
'1': Processor B
eval_workflow:
'@id': https://github.com/OCR-D/quiver/tree/data/workflows/eval1.nf
label: Evaluation Workflow 1
Expand All @@ -59,9 +62,6 @@
eval_workspace:
'@id': https://github.com/OCR-D/quiver/tree/data/workspaces/345.ocrd.zip
label: Evaluation Workspace 345
workflow_steps:
'0': Processor A
'1': Processor B
workflow_model: Fraktur_GT4HistOCR
document_metadata:
fonts:
Expand Down
2 changes: 1 addition & 1 deletion ocrd_eval.schema.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://ocr-d.de/en/spec/ocrd_eval.schema.json", "title": "A list of evaluations for OCR-D", "description": "- All references to URL are JSON-LD-like objects with at least an `@id`\n property referencing the URL and `label` for a human-readable label to be\n used in the UI\n", "type": "array", "items": {"required": ["@id", "label", "metadata", "evaluation"], "unevaluatedProperties": false, "allOf": [{"$ref": "#/$defs/LabeledUrl"}, {"properties": {"metadata": {"$ref": "#/$defs/EvaluationMetadata"}, "evaluation": {"$ref": "#/$defs/EvaluationReport"}}}]}, "$defs": {"LabeledUrl": {"type": "object", "required": ["@id"], "properties": {"@id": {"type": "string", "format": "uri", "description": "URL of the thing"}, "label": {"type": "string", "description": "Description of the thing for UI purposes"}}}, "EvaluationMetadata": {"type": "object", "title": "Metadata about one evaluation", "additionalProperties": false, "description": "EvaluationMetadata contains all the info on how an EvaluationReport came to be.\nThere are two OCR-D *workflows* involved:\n - ocr_workflow: The workflow which produced the OCR results to evaluate\n - eval_workflow: The workflow run to evaluate OCR and GT\n\nThere are three OCR-D *workspaces* involved:\n - gt_workspace: The workspace containing the GT\n - ocr_workspace: The workspace containing the OCR results from ocr_workflow\n - eval_workspace: The workspace on which the eval_workflow was run\n", "required": ["ocr_workflow", "ocr_workspace", "eval_workflow", "eval_workspace", "gt_workspace", "document_metadata"], "properties": {"ocr_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the ocr_workspace"}, "ocr_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the OCR"}, "eval_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the eval_workspace"}, "eval_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the evaluation results"}, "gt_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the GT"}, "workflow_steps": {"type": "object", "description": "Human readable description of the individual steps in the workflow (for UI)", "patternProperties": {"^[0-9]+$": {"type": "string", "description": "Description of this workflow step"}}}, "workflow_model": {"type": "string", "description": "Human readable name of the main model used for recognition in the OCR workflow (for UI)"}, "eval_tool": {"type": "string", "description": "Human readable name and version of evaluation tool used (for UI)"}, "document_metadata": {"type": "object", "title": "Bibliographical and typographical metadata about the work to be evaluated", "properties": {"publication_year": {"type": "number", "description": "Year the document was originally published"}, "publication_century": {"type": "string", "description": "Century the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "publication_decade": {"type": "string", "description": "Decade the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "number_of_pages": {"type": "number", "description": "Number of pages in this work (i.e. the number of images in the gt_workspace)"}, "layout": {"type": "string", "enum": ["simple", "complex"]}, "fonts": {"type": "array", "items": {"type": "string", "enum": ["antiqua", "fraktur", "ancient_greek", "hebrew"]}}}}, "provenance": {"type": "object", "description": "Information on which tools in which version were used in determining metrics", "properties": {"parameters": {"type": "object", "description": "Parameters passed to the evaluation processor"}}}}}, "EvaluationReport": {"type": "object", "additionalProperties": false, "description": "The metrics measured for this document", "properties": {"document_wide": {"type": "object", "description": "Document-wide metrics", "properties": {"$ref": "#$defs/EvaluationMetrics"}}, "by_page": {"type": "array", "description": "Metrics page-by-page", "items": {"type": "object", "allOf": [{"properties": {"page_id": {"type": "string", "description": "PAGE ID"}}}, {"properties": {"$ref": "#$defs/EvaluationMetrics"}}]}}}}, "EvaluationMetrics": {"cer": {"description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}, "cer_mean": {"description": "Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "cer_median": {"description": "Median of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "cer_range": {"type": "array", "minItems": 2, "maxItems": 2, "items": {"type": "number", "description": "Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}, "cer_standard_deviation": {"description": "Standard deviation the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wer": {"description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}, "wall_time": {"description": "Actual time needed for processing workflow"}, "cpu_time": {"description": "Cumulative CPU time used for processing workflow"}, "pages_per_minute": {"description": "Number of pages processed per minute"}}}}
{"$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://ocr-d.de/en/spec/ocrd_eval.schema.json", "title": "A list of evaluations for OCR-D", "description": "- All references to URL are JSON-LD-like objects with at least an `@id`\n property referencing the URL and `label` for a human-readable label to be\n used in the UI\n", "type": "array", "items": {"required": ["@id", "label", "metadata", "evaluation"], "unevaluatedProperties": false, "allOf": [{"$ref": "#/$defs/LabeledUrl"}, {"properties": {"metadata": {"$ref": "#/$defs/EvaluationMetadata"}, "evaluation": {"$ref": "#/$defs/EvaluationReport"}}}]}, "$defs": {"LabeledUrl": {"type": "object", "required": ["@id"], "properties": {"@id": {"type": "string", "format": "uri", "description": "URL of the thing"}, "label": {"type": "string", "description": "Description of the thing for UI purposes"}}}, "EvaluationMetadata": {"type": "object", "title": "Metadata about one evaluation", "additionalProperties": false, "description": "EvaluationMetadata contains all the info on how an EvaluationReport came to be.\nThere are two OCR-D *workflows* involved:\n - ocr_workflow: The workflow which produced the OCR results to evaluate\n - eval_workflow: The workflow run to evaluate OCR and GT\n\nThere are three OCR-D *workspaces* involved:\n - gt_workspace: The workspace containing the GT\n - ocr_workspace: The workspace containing the OCR results from ocr_workflow\n - eval_workspace: The workspace on which the eval_workflow was run\n", "required": ["ocr_workflow", "ocr_workspace", "eval_workflow", "eval_workspace", "gt_workspace", "document_metadata"], "properties": {"ocr_workflow": {"type": "object", "required": ["@id", "steps"], "properties": {"@id": {"type": "string", "format": "uri", "description": "URL of the thing"}, "label": {"type": "string", "description": "Description of the thing for UI purposes"}, "steps": {"type": "object", "description": "Human readable description of the individual steps in the workflow (for UI)", "patternProperties": {"^[0-9]+$": {"type": "string", "description": "Description of this workflow step"}}}}, "description": "The OCR-D workflow that produced the ocr_workspace"}, "ocr_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the OCR"}, "eval_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the eval_workspace"}, "eval_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the evaluation results"}, "gt_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the GT"}, "workflow_steps": {"type": "object", "description": "Human readable description of the individual steps in the workflow (for UI)", "patternProperties": {"^[0-9]+$": {"type": "string", "description": "Description of this workflow step"}}}, "workflow_model": {"type": "string", "description": "Human readable name of the main model used for recognition in the OCR workflow (for UI)"}, "eval_tool": {"type": "string", "description": "Human readable name and version of evaluation tool used (for UI)"}, "document_metadata": {"type": "object", "title": "Bibliographical and typographical metadata about the work to be evaluated", "properties": {"publication_year": {"type": "number", "description": "Year the document was originally published"}, "publication_century": {"type": "string", "description": "Century the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "publication_decade": {"type": "string", "description": "Decade the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "number_of_pages": {"type": "number", "description": "Number of pages in this work (i.e. the number of images in the gt_workspace)"}, "layout": {"type": "string", "enum": ["simple", "complex"]}, "fonts": {"type": "array", "items": {"type": "string", "enum": ["antiqua", "fraktur", "ancient_greek", "hebrew"]}}}}, "provenance": {"type": "object", "description": "Information on which tools in which version were used in determining metrics", "properties": {"parameters": {"type": "object", "description": "Parameters passed to the evaluation processor"}}}}}, "EvaluationReport": {"type": "object", "additionalProperties": false, "description": "The metrics measured for this document", "properties": {"document_wide": {"type": "object", "description": "Document-wide metrics", "properties": {"$ref": "#$defs/EvaluationMetrics"}}, "by_page": {"type": "array", "description": "Metrics page-by-page", "items": {"type": "object", "allOf": [{"properties": {"page_id": {"type": "string", "description": "PAGE ID"}}}, {"properties": {"$ref": "#$defs/EvaluationMetrics"}}]}}}}, "EvaluationMetrics": {"cer": {"description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}, "cer_range": {"type": "array", "minItems": 2, "maxItems": 2, "items": {"type": "number", "description": "Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}, "wer": {"description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}, "wall_time": {"description": "Actual time needed for processing workflow"}, "cpu_time": {"description": "Cumulative CPU time used for processing workflow"}, "pages_per_minute": {"description": "Number of pages processed per minute"}}}}
26 changes: 17 additions & 9 deletions ocrd_eval.schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,23 @@ $defs:
properties:

ocr_workflow:
allOf: [{ '$ref': '#/$defs/LabeledUrl' }]
type: object
required: ['@id', 'steps']
properties:
'@id':
type: string
format: uri
description: URL of the thing
label:
type: string
description: Description of the thing for UI purposes
steps:
type: object
description: Human readable description of the individual steps in the workflow (for UI)
patternProperties:
'^[0-9]+$':
type: string
description: Description of this workflow step
description: The OCR-D workflow that produced the ocr_workspace

ocr_workspace:
Expand All @@ -77,14 +93,6 @@ $defs:
allOf: [{ '$ref': '#/$defs/LabeledUrl' }]
description: The workspace containing the GT

workflow_steps:
type: object
description: Human readable description of the individual steps in the workflow (for UI)
patternProperties:
'^[0-9]+$':
type: string
description: Description of this workflow step

workflow_model:
type: string
description: Human readable name of the main model used for recognition in the OCR workflow (for UI)
Expand Down

0 comments on commit a814c89

Please sign in to comment.