-
Notifications
You must be signed in to change notification settings - Fork 5
/
ocrd_eval.schema.json
1 lines (1 loc) · 5.97 KB
/
ocrd_eval.schema.json
1
{"$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://ocr-d.de/en/spec/ocrd_eval.schema.json", "title": "A List of Evaluations for OCR-D", "description": "- All references to URL are JSON-LD-like objects with at least an `@id`\n property referencing the URL and `label` for a human-readable label to be\n used in the UI.\n", "type": "array", "items": {"required": ["@id", "label", "metadata", "evaluation_results"], "unevaluatedProperties": false, "allOf": [{"$ref": "#/$defs/LabeledUrl"}, {"properties": {"metadata": {"$ref": "#/$defs/EvaluationMetadata"}, "evaluation_results": {"$ref": "#/$defs/EvaluationReport"}}}]}, "$defs": {"LabeledUrl": {"type": "object", "required": ["@id"], "properties": {"@id": {"type": "string", "format": "uri", "description": "URL of the thing"}, "label": {"type": "string", "description": "Description of the thing for UI purposes"}}}, "EvaluationMetadata": {"type": "object", "title": "Metadata about one evaluation", "additionalProperties": false, "description": "EvaluationMetadata contains all the info on how an EvaluationReport came to be.\nThere are two OCR-D *workflows* involved:\n - ocr_workflow: The workflow which produced the OCR results to evaluate\n - eval_workflow: The workflow run to evaluate OCR and GT\n\nThere are three OCR-D *workspaces* involved:\n - gt_workspace: The workspace containing the GT\n - ocr_workspace: The workspace containing the OCR results from ocr_workflow\n - eval_workspace: The workspace on which the eval_workflow was run\n", "required": ["ocr_workflow", "ocr_workspace", "eval_workflow", "eval_workspace", "gt_workspace", "document_metadata"], "properties": {"ocr_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the ocr_workspace"}, "ocr_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the OCR"}, "eval_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the eval_workspace"}, "eval_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the evaluation results"}, "gt_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the GT"}, "workflow_steps": {"type": "array", "description": "Human readable description of the individual steps and their parameters in the workflow (for UI)", "minItems": 1, "items": {"type": "object", "properties": {"id": {"type": "string", "description": "The name of the processor used for this workflow step", "pattern": "^ocrd-[a-z\\-]+"}, "params": {"type": "object", "description": "A map of parameters and their values applied to the processor used for this workflow step"}}, "required": ["id", "params"]}}, "workflow_model": {"type": "string", "description": "Human readable name of the main model used for recognition in the OCR workflow (for UI)"}, "eval_tool": {"type": "string", "description": "Human readable name and version of evaluation tool used (for UI)"}, "document_metadata": {"type": "object", "title": "Bibliographical and typographical metadata about the work to be evaluated", "properties": {"publication_year": {"type": "number", "description": "Year the document was originally published"}, "publication_century": {"type": "string", "description": "Century the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "publication_decade": {"type": "string", "description": "Decade the document was originally published", "pattern": "[12][0-9]{2}0-[12][0-9]{2}0"}, "number_of_pages": {"type": "number", "description": "Number of pages in this work (i.e. the number of images in the gt_workspace)"}, "layout": {"type": "string", "enum": ["simple", "complex"]}, "fonts": {"type": "array", "items": {"type": "string", "enum": ["antiqua", "textura", "gotico-antiqua", "rotunda", "italic", "bastarda", "greek", "schwabacher", "hebrew", "fraktur"]}}}}, "provenance": {"type": "object", "description": "Information on which tools in which version were used in determining metrics", "properties": {"parameters": {"type": "object", "description": "Parameters passed to the evaluation processor"}}}}}, "EvaluationReport": {"type": "object", "additionalProperties": false, "description": "The metrics measured for this document", "properties": {"document_wide": {"type": "object", "description": "Document-wide metrics", "allOf": [{"$ref": "#$defs/DocumentEvaluationMetrics"}, {"$ref": "#$defs/CommonEvaluationMetrics"}], "unevaluatedProperties": false}, "by_page": {"type": "array", "description": "Metrics page-by-page", "items": {"type": "object", "allOf": [{"$ref": "#$defs/CommonEvaluationMetrics"}, {"$ref": "#$defs/PageId"}], "unevaluatedProperties": false}}}}, "PageId": {"type": "object", "properties": {"page_id": {"type": "string", "description": "PAGE ID"}}}, "CommonEvaluationMetrics": {"type": "object", "properties": {"cer_mean": {"type": "number", "description": "Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wer": {"type": "number", "description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}}, "DocumentEvaluationMetrics": {"type": "object", "properties": {"cer_median": {"type": "number", "description": "Median of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "cer_range": {"type": "array", "minItems": 2, "maxItems": 2, "items": {"type": "number", "description": "Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}, "cer_standard_deviation": {"type": "number", "description": "Standard deviation the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wall_time": {"type": "number", "description": "Actual time needed for processing workflow"}, "cpu_time": {"type": "number", "description": "Cumulative CPU time used for processing workflow"}, "pages_per_minute": {"type": "number", "description": "Number of pages processed per minute"}}}}}