From 11965f5c9c48a8b46281b10c3d61f8187eed8cb8 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Mon, 19 Dec 2022 13:54:10 +0100 Subject: [PATCH 01/14] remove markdown doc (see #225) --- ocrd_eval.md | 384 --------------------------------------------------- 1 file changed, 384 deletions(-) delete mode 100644 ocrd_eval.md diff --git a/ocrd_eval.md b/ocrd_eval.md deleted file mode 100644 index 49478ca..0000000 --- a/ocrd_eval.md +++ /dev/null @@ -1,384 +0,0 @@ -# Quality Assurance in OCR-D - -## Rationale - -Estimating the quality of OCR requires workflows run on representative data, -evaluation metrics and evaluation tools that need to work together in a -well-defined manner to allow users to make informed decisions about which OCR -solution works best for their use case. - -## Evaluation metrics - -The evaluation of the success (accuracy) of OCR is a complex task for which multiple methods and metrics are available. It aims to capture quality in different aspects, such as the recognition of text, but also the detection of layout, for which different methods and metrics are needed. - -Furthermore, the time and resources required for OCR processing also have to be captured. Here we describe the metrics that were selected for use in OCR-D, how exactly they are applied, and what was the motivation. - -### Scope of these Definitions - -At this stage (Q3 2022) these definitions serve as a basis of common understanding for the metrics used in the benchmarking presented in OCR-D QUIVER. Further implications for evaluation tools do not yet apply. - -### Text Evaluation - -The most important measure to assess the quality of OCR is the accuracy of the recognized text. The majority of metrics for this are based on the Levenshtein distance, an algorithm to compute the distance between two strings. In OCR, one of these strings is generally the Ground Truth text and the other the recognized text which is the result of an OCR. - -#### Levenshtein Distance - -Levenshtein distance between two strings `a` and `b` is the number of edit operations needed to turn `a` into `b`. Edit operations depend on the specific variant of the algorithm but for OCR, relevant operations are deletion, insertion and substitution. - -The Levenshtein distance forms the basis for the calculation of [CER/WER](https://pad.gwdg.de/#CERWER). - -General example: - -The Levenshtein distance between "Monday" and "Tuesday" is 4, because 4 edit operations are necessary to turn "Monday" into "Tuesday": - -* **M**onday --> **T**onday (substitution) -* T**o**nday --> T**u**nday (substitution) -* Tu**n**day --> Tu**e**day (substitution) -* Tueday --> Tue**s**day (insertion) - -OCR example: - -Given a Ground truth that reads `ſind` and the recognized text `fmd`. - -The Levenshtein distance between these texts is 4, because 4 edit operations are necessary to turn `fmd` into `ſind`: - -* `fmd` --> `ſmd` (substitution) -* `ſmd` --> `ſimd` (insertion) -* `ſimd` --> `ſind` (substitution) - - -#### CER and WER - -##### Characters - -A text consists of a set of characters that have a certain meaning. A character is a glyph that represents a word, a letter in a word, or a symbol. - -Examples: - -* the character `a` in the text `babst` represents the German letter `a` -* the character `&` represents the Latin abbreviation `etc.` -* the character `☿` represents an Astronomical symbol for the planet Mercury - -##### Character Error Rate (CER) - -The character error rate (CER) describes how many faulty characters the output of an OCR engine contains compaired to the Ground Truth text in relation to the text length. - -Errors fall into one of the following three categories: - -* **deletion**: a character that is present in the text has been deleted from the output. Example: -![](https://pad.gwdg.de/uploads/304cf855-3436-42b7-86af-87c16106f1ad.jpg) -This reads `Sonnenfinſterniſſe:`. The output contains `Sonnenfinſterniſſe`, deleting `:`. - -* **substitution**: a character is replaced by another character in the output. Example: -![](https://pad.gwdg.de/uploads/d7fa6f23-7c79-4fb2-ad94-7e98084c69d6.jpg) - -This heading reads `Die Finſterniſſe des 1801ſten Jahrs`. The output contains `180iſten`, replacing `1` with `i`. - -* **insertion**: a new character is introduced in the output. Example: -![](https://pad.gwdg.de/uploads/e6b6432e-d79c-4568-9aef-15a026c05b39.jpg) -This reads `diese Strahlen, und`. The output contains `Strahlen ,`, inserting a white space before the comma. - - -CER can be calculated in several ways, depending on whether a normalized CER is used or not. - -Given $i$ as the number of insertions, $d$ the number of deletions, $s$ the number of substitutions and $n$ the total number of characters in a text, the CER can be obtained by - -$CER = \frac{i + s+ d}{n}$ - -If the CER value is calculated this way, it represents the percentage of characters incorrectly recognized by the OCR engine. Also, we can easily reach error rates beyond 100% when the output contains a lot of insertions. - -The *normalized* CER tries to mitigate this effect by considering the number of correct characters, $c$: - -$CER_n = \frac{i + s+ d}{i + s + d + c}$ - -In OCR-D's benchmarking we calculate the *non-normalized* CER where values over 1 should be read as 100%. - - -###### CER Granularity - -In OCR-D we distinguish between the CER per **page** and the **overall** CER of a text. The reasoning behind this is that the material OCR-D mainly aims at (historical prints) is very heterogeneous: Some pages might have an almost simplistic layout while others can be highly complex and difficult to process. Providing only an overall CER would cloud these differences between pages. - -At this point we only provide a CER per page; an overall CER might be calculated as a weighted aggregate at a later stage. - -##### Word Error Rate (WER) - -The word error rate (WER) is closely connected to the CER. While the CER focusses on differences between characters, the WER represents the percentage of words incorrectly recognized in a text. - -CER and WER share categories of errors, and the WER is similarly calculated: - -$WER = \frac{i_w + s_w + d_w}{n_w}$ - -where $i_w$ is the number of inserted, $s_w$ the number of substituted, $d_w$ the number of deleted and $n_w$ the total number of words. - -More specific cases of WER consider only the "significant" words, omitting e.g. stopwords from the calculation. - - -###### WER Granularity - -In OCR-D we distinguish between the WER per **page** and the **overall** WER of a text. The reasoning here follows the one of CER granularity. - -At this point we only provide a WER per page; an overall WER might be calculated at a later stage. - - -#### Bag of Words - -In the "Bag of Words" model a text is represented as a set of its word irregardless of word order or grammar; Only the words themselves and their number of occurence are considered. - -Example: - -![](https://pad.gwdg.de/uploads/4d33b422-6c77-436c-a3e6-bf27e67dc203.jpg) - - -> Eine Mondfinsternis ist die Himmelsbegebenheit welche sich zur Zeit des Vollmondes ereignet, wenn die Erde zwischen der Sonne und dem Monde steht, so daß die Strahlen der Sonne von der Erde aufgehalten werden, und daß man so den Schatten der Erde in dem Monde siehet. In diesem Jahre sind zwey Monfinsternisse, davon ist ebenfalls nur Eine bey uns sichtbar, und zwar am 30sten März des Morgens nach 4 Uhr, und währt bis nach 6 Uhr. - -To get the Bag of Words of this paragraph a set containing each word and its number of occurence is created: - -$BoW$ = -```json= -{ - "Eine": 2, "Mondfinsternis": 1, "ist": 2, "die": 2, "Himmelsbegebenheit": 1, - "welche": 1, "sich": 1, "zur": 1, "Zeit": 1, "des": 2, "Vollmondes": 1, - "ereignet,": 1, "wenn":1, "Erde": 3, "zwischen": 1, "der": 4, "Sonne": 2, - "und": 4, "dem": 2, "Monde": 2, "steht,": 1, "so": 2, "daß": 2, - "Strahlen": 1, "von": 1, "aufgehalten": 1, "werden,": 1, "man": 1, "den": 1, - "Schatten": 1, "in": 1, "siehet.": 1, "In": 1, "diesem": 1, "Jahre": 1, - "sind": 1, "zwey": 1, "Monfinsternisse,": 1, "davon": 1, "ebenfalls": 1, "nur": 1, - "bey": 1, "uns": 1, "sichtbar,": 1, "zwar": 1, "am": 1, "30sten": 1, - "März": 1, "Morgens": 1, "nach": 2, "4": 1, "Uhr,": 1, "währt": 1, - "bis": 1, "6": 1, "Uhr.": 1 -} -``` - - -### Layout Evaluation - -For documents with a complex structure, looking at the recognized text's accuracy alone is often insufficient to accurately determine the quality of OCR. An example can help to illustrate this: in a document containing two columns, all characters and words may be recognized correctly, but when the two columns are detected by layout analysis as just one, the OCR result will contain the text for the first lines of the first and second column, followed by the second lines of the first and second column asf., rendering the sequence of words and paragraphs in the Ground Truth text wrongly, which defeats almost all downstream processes. - -While the comprehensive evaluation of OCR with consideration of layout analysis is still a research topic, several established metrics can be used to capture different aspects of it. - -#### Reading Order - -Reading order describes the order in which segments on a page are intended to be read. While the reading order might be easily obtained in monographs with a single column where only a few page segments exist, identifying the reading order in more complex layouts (e.g. newspapers or multi-column layouts) can be more challenging. - -Example of a simple page layout with reading order: - -![](https://pad.gwdg.de/uploads/bc5258cb-bf91-479e-8a91-abf5ff8bbbfa.jpg) -(http://resolver.sub.uni-goettingen.de/purl?PPN1726778096) - - -Example of a complex page layout with reading order: - -![](https://pad.gwdg.de/uploads/100f14c4-19b0-4810-b3e5-74c674575424.jpg) -(http://resolver.sub.uni-goettingen.de/purl?PPN1726778096) - - - -#### IoU (Intersection over Union) - -Intersection over Union is a term which describes the degree of overlap of two regions of a (document) image defined either by a bounding box or polygon. Example: - -![](https://pad.gwdg.de/uploads/62945a01-a7a7-48f3-86c2-6bb8f97d67fe.jpg) - -(where green represents the Ground Truth and red the detected bounding box) - -Given a region A with an area $area_1$, a region B with the area $area_2$, and their overlap (or intersection) $area_o$, the IoU can then be expressed as - -$IoU = \frac{area_o}{area_1+area_2-area_o}$ - -where $area_1+area_2-area_o$ expresses the union of the two regions ($area_1+area_2$) while not counting the overlapping area twice. - -The IoU ranges between 0 (no overlap at all) and 1 (the two regions overlap perfectly). Users executing object detection can choose a [threshold](#Threshold) that defines which degree of overlap must be given to define a prediction as correct. If e.g. a threshold of 0.6 is chosen, all prediction that have an IoU of 0.6 or higher are correct. - -In OCR-D we use IoU to measure how well segments on a page are recognized during the segmentation step. The area of one region represents the area identified in the Ground Truth, while the second region represents the area identified by an OCR-D processor. - -### Resource Utilization - -Last but not least, it is important to collect information about the resource utilization of each processing step, so that informed decisions can be made when e.g. having to decide between results quality and throughput speed. - -#### CPU Time - -CPU time is the time taken by the CPU to process an instruction. It does not include idle time. - -#### Wall Time - -Wall time (or elapsed time) is the time taken by a processor to process an instruction including idle time. - -#### I/O - -I/O (input / output) is the number of bytes read and written during a process. - -#### Memory Usage - -Memory usage is the number of bytes the process allocates in memory (RAM). - -#### Disk Usage - -Disk usage is the number of bytes the process allocates on hard disk. - -### Unicode normalization - -In Unicode there can be multiple ways to express characters that have multiple components, such as a base letter and an accent. For evaluation it is essential that both Ground Truth and OCR results are normalized *in the same way* before evaluation. - -For example, the letter `ä` can be expressed directly as `ä` (`U+00E4` in Unicode) or as a combination of `a` and `◌̈` (`U+0061 + U+0308`). Both encodings are semantically equivalent but technically different. - -Unicode has the notion of *normalization forms* to provide canonically normalized text. The most common forms are *NFC* (Normalization Form Canonical Composed) and *NFD* (Normalization Form Canonical Decomposed). When a Unicode string is in NFC, all decomposed codepoints are replaced with their decomposed equivalent (e.g. `U+0061 + U+0308` to `U+00E4`). In an NFD encoding, all decomposed codepoints are replaced with their composed equivalents (e.g. `U+00E4` to `U+0061 + U+0308`). - - - -In accordance with the concept of [GT levels in OCR-D](https://ocr-d.de/en/gt-guidelines/trans/trLevels.html), it is preferable for strings to be normalized as NFC. - -The Unicode normalization algorithms rely on data from the Unicode database on equivalence classes and other script- and language-related metadata. For graphemes from the Private Use Area (PUA), such as MUFI, this information is not readily available and can lead to inconsistent normalization. Therefore, it is essential that evaluation tools normalize PUA codepoints in addition to canonical Unicode normalization. - - - -### Metrics Not in Use Yet - -:::info -The following metrics are not part of the MVP (minimal viable product) and will (if ever) be implemented at a later stage. -::: - -#### GPU metrics - -##### GPU time - -GPU time is the time a GPU (graphics card) spent processing instructions - -##### GPU avg memory - -GPU avg memory refers to the average amount of memory of the GPU (in GiB) that was used during processing. - -#### Text Evaluation - -##### Flexible Character Accuracy Measure - -The flexible character accuracy measure has been introduced to mitigate a major flaw of the CER: The CER is heavily dependent on the reading order an OCR engine detects; When content blocks are e.g. mixed up or merged during the text recognition step but single characters have been perfectly recognized, the CER is still very low. - -The flexible character accuracy measure circumvents this effect by splitting the recognized text and the Ground Truth in smaller chunks and measure their partial edit distance. After all partial edit distances have been obtained, they are summed up to receive the overall character accuracy measure. - -The algorithm can be summarized as follows: - -> 1. Split the two input texts into text lines -> 2. Sort the ground truth text lines by length (in descending order) -> 3. For the first ground truth line, find the best matching OCR result line segment (by minimising a penalty that is partly based on string edit distance) -> 4. If full match (full length of line) -> a. Mark as done and remove line from list -> b. Else subdivide and add to respective list of text lines; resort -> 5. If any more lines available repeat step 3 -> 6. Count non-matched lines / strings as insertions or deletions (depending on origin: ground truth or result) -> 7. Sum up all partial edit distances and calculate overall character accuracy - -(C. Clausner, S. Pletschacher and A. Antonacopoulos / Pattern Recognition Letters 131 (2020) 390–397, p. 392) - -#### Layout Evalutation - -##### mAP (mean Average Precision) - -###### Precision and Recall - -**Precision** is a means to describe how accurate a model can identify an object within an image. The higher the precision of a model, the more confidently we can assume that a prediction (e.g. the model having identified a bicycle in an image) is correct. A precision of 1 indicates that each identified object in an image has been correctly identified (true positives) and no false positives have been detected. As the precision value descreases, the result contains more and more false positives. - -**Recall**, on the other hand, measures how well a model performs in finding all instances of an object in an image (true positives), irregardless of false positives. Given a model tries to identify bicycles in an image, a recall of 1 indicates that all bicycles have been found by the model (while not considering other objects that have been falsely labelled as a bicycle). - -###### Prediction Score - -When a model tries to identify objects in an image, it predicts that a certain area in an image represents said object with a certain confidence or prediction score. The prediction score varies between 0 and 1 and represents the percentage of certainty of having correctly identified an object. Given a model tries to identify ornaments on a page. If the model returns an area of a page with a prediction score of 0.6, the model is "60% sure" that this area is an ornament. If this area is then considered to be a positive, depends on the chosen threshold. - -###### Thresholds - -A threshold is a freely chosen number between 0 and 1. It divides the output of a model into two groups: Outputs that have a prediction score or IoU greater than or equal to the threshold represent an object. Outputs with a prediction score or IoU below the threshold are discarded as not representing the object. - -Example: -Given a threshold of 0.6 and a model that tries to detect bicycles in an image. The model returns two areas in an image that might be bicycles, one with a prediction score of 0.4 and one with 0.9. Since the threshold equals 0.6, the first area is tossed and not regarded as bicycle while the second one is kept and counted as recognized. - -###### Precision-Recall-Curve - -Precision and recall are connected to each other since both depend on the true positives detected. A precision-recall-curve is a means to balance these values while maximizing them. - -Given a dataset with 100 images in total of which 50 depict a bicycle. Also given a model trying to identify bicycles on images. The model is run 7 times using the given dataset while gradually increasing the threshold from 0.1 to 0.7. - - -| run | threshold | true positives | false positives | false negatives |precision | recall | -|-----|-----------|----------------|-----------------|-----------------|----------|--------| -| 1 | 0.1 | 50 | 25 | 0 | 0.66 | 1 | -| 2 | 0.2 | 45 | 20 | 5 | 0.69 | 0.9 | -| 3 | 0.3 | 40 | 15 | 10 | 0.73 | 0.8 | -| 4 | 0.4 | 35 | 5 | 15 | 0.88 | 0.7 | -| 5 | 0.5 | 30 | 3 | 20 | 0.91 | 0.6 | -| 6 | 0.6 | 20 | 0 | 30 | 1 | 0.4 | -| 7 | 0.7 | 10 | 0 | 40 | 1 | 0.2 | - -For each threshold a pair of precision and recall can be computed and plotted to a curve: - -![](https://pad.gwdg.de/uploads/2d3c62ff-cab4-4a12-8043-014fe0440459.png) - - -This graph is called Precision-Recall-Curve. - - -###### Average Precision - -The average precision (AP) describes how well a model can detect objects in an image for recall values over 0 to 1 by computing the average of all precisions given in the Precision-Recall-Curve. It is equal to the area under the curve. - -![](https://pad.gwdg.de/uploads/799e6a05-e64a-4956-9ede-440ac0463a3f.png) - -The Average Precision can be computed with the weighted mean of precision at each confidence threshold: - -$AP = \displaystyle\sum_{k=0}^{k=n-1}[r(k) - r(k+1)] * p(k)$ - -with $n$ being the number of thresholds and $r(k)$/$p(k)$ being the respective recall/precision values for the current confidence threshold $k$. - -Example: -Given the example above, we get: - -$$ -\begin{array}{2} -AP & = \displaystyle\sum_{k=0}^{k=n-1}[r(k) - r(k+1)] * p(k) \\ -& = \displaystyle\sum_{k=0}^{k=6}[r(k) - r(k+1)] * p(k) \\ -& = (1-0.9) * 0.66 + (0.9-0.8) * 0.69 + \text{...} + (0.2-0) * 1\\ -& = 0.878 -\end{array} -$$ - -###### mAP (mean Average Precision) - -The mean Average Precision is a metric used to measure how accurate an object detector is. [As stated](#Thresholds), a threshold can be chosen freely, so there is some room for errors when picking one single threshold. To mitigate this effect, the mean Average Precision metric has been introduced which considers a set of IoU thresholds to determine the detector's performance. It is calculated by first computing the Average Precision for each IoU threshold and then finding the average: - -$mAP = \displaystyle\frac{1}{N}\sum_{i=1}^{N}AP_i$ - -with $N$ being the number of thresholds. - - -##### Scenario-driven Performance Evaluation - -Scenario-driven performance evaluation as described in [Clausner et al., 2011](https://primaresearch.org/publications/ICDAR2011_Clausner_PerformanceEvaluation) is currently the most comprehensive and sophisticated approach to evaluate OCR success with consideration of layout. - -The approach is based on the definition of so called evaluation scenarios, which allow the flexible combination of a selection of metrics together with their weights, targeted at a specific use case. - -## Evaluation JSON schema - - - -The results of an evaluation should be expressed in JSON according to -the [`ocrd-eval.json`](https://ocr-d.de/en/spec/ocrd-eval.schema.json). - -## Tools - -See [OCR-D workflow guide](https://ocr-d.de/en/workflows#evaluation) - -## References - -* CER/WER: - * https://sites.google.com/site/textdigitisation/qualitymeasures - * https://towardsdatascience.com/evaluating-ocr-output-quality-with-character-error-rate-cer-and-word-error-rate-wer-853175297510#5aec -* IoU: - * https://medium.com/analytics-vidhya/iou-intersection-over-union-705a39e7acef -* mAP: - * https://blog.paperspace.com/mean-average-precision/ - * https://jonathan-hui.medium.com/map-mean-average-precision-for-object-detection-45c121a31173 -* BoW: - * https://en.wikipedia.org/wiki/Bag-of-words_model -* FCA: - * https://www.primaresearch.org/www/assets/papers/PRL_Clausner_FlexibleCharacterAccuracy.pdf -* More background on evaluation of OCR - * https://doi.org/10.1145/3476887.3476888 - * https://doi.org/10.1515/9783110691597-009 From 13eec90d60ec522141d2b470fbcc6f87b7fe299a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 27 Jan 2023 14:12:04 +0100 Subject: [PATCH 02/14] ocrd_eval.schema.yml: typo Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- ocrd_eval.schema.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index fe1c655..b084992 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -100,7 +100,7 @@ $defs: publication_year: type: number - description: Year he document was originally published + description: Year the document was originally published publication_century: type: string From c0d2b5d9fa425ccece786f77cd59a0383dd3ac52 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 27 Jan 2023 14:13:16 +0100 Subject: [PATCH 03/14] ocrd_eval.schema.yml: typos Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- ocrd_eval.schema.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index b084992..bdacc54 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -91,7 +91,7 @@ $defs: eval_tool: type: string - description: Human readable name and version of evaluation tool used (for UI + description: Human readable name and version of evaluation tool used (for UI) document_metadata: type: object @@ -104,7 +104,7 @@ $defs: publication_century: type: string - description: Century he document was originally published + description: Century the document was originally published pattern: '[12][0-9]{3}-[12][0-9]{3}' publication_decade: @@ -130,7 +130,7 @@ $defs: type: object description: Information on which tools in which version were used in determining metrics properties: - paramters: + parameters: type: object description: Parameters passed to the evaluation processor From b5f5398b31fbf2e87474745ea047fea22b6ebb45 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Fri, 27 Jan 2023 14:19:48 +0100 Subject: [PATCH 04/14] ocrd_eval.schema.yml: publication_decade always starts/ends at 0 Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- ocrd_eval.schema.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index bdacc54..634942e 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -110,7 +110,7 @@ $defs: publication_decade: type: string description: Decade the document was originally published - pattern: '[12][0-9]{3}-[12][0-9]{3}' + pattern: '[12][0-9]{2}0-[12][0-9]{2}0' number_of_pages: type: number From 242bb3caebaf8036e79ca29ce0babe069985c93d Mon Sep 17 00:00:00 2001 From: mweidling <13831557+mweidling@users.noreply.github.com> Date: Thu, 16 Feb 2023 11:32:05 +0100 Subject: [PATCH 05/14] Update ocrd_eval.schema.yml --- ocrd_eval.schema.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index 634942e..42c9bab 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -1,7 +1,7 @@ $schema: https://json-schema.org/draft/2019-09/schema $id: https://ocr-d.de/en/spec/ocrd_eval.schema.json -title: A list of evaluations for OCR-D +title: A List of Evaluations for OCR-D description: > - All references to URL are JSON-LD-like objects with at least an `@id` property referencing the URL and `label` for a human-readable label to be From 5a71a729269df32da4f4a86a8db538397604540c Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 14 Mar 2023 16:02:56 +0100 Subject: [PATCH 06/14] Update ocrd_eval.schema.yml Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- ocrd_eval.schema.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index 42c9bab..7c56684 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -5,7 +5,7 @@ title: A List of Evaluations for OCR-D description: > - All references to URL are JSON-LD-like objects with at least an `@id` property referencing the URL and `label` for a human-readable label to be - used in the UI + used in the UI. type: array items: required: ['@id', 'label', 'metadata', 'evaluation'] From 87a267d43222c3a3a1df1aadc52a330f86bb01be Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 14 Mar 2023 16:03:46 +0100 Subject: [PATCH 07/14] Update ocrd_eval.schema.yml Co-authored-by: mweidling <13831557+mweidling@users.noreply.github.com> --- ocrd_eval.schema.yml | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index 7c56684..d5eee18 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -78,12 +78,20 @@ $defs: description: The workspace containing the GT workflow_steps: - type: object - description: Human readable description of the individual steps in the workflow (for UI) - patternProperties: - '^[0-9]+$': - type: string - description: Description of this workflow step + type: array + description: Human readable description of the individual steps and their parameters in the workflow (for UI) + minItems: 1 + items: + type: object + properties: + id: + type: string + description: The name of the processor used for this workflow step + pattern: '^ocrd-[a-z\-]+' + params: + type: object + description: A map of parameters and their values applied to the processor used for this workflow step + required: ['id', 'params'] workflow_model: type: string From 30f1436fd5ca5ff0618ed45450afb67b5ee10b76 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 14 Mar 2023 16:37:46 +0100 Subject: [PATCH 08/14] ocrd_eval schema: drop unqualified CER metric --- ocrd_eval.schema.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index d5eee18..acec28b 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -165,9 +165,6 @@ $defs: EvaluationMetrics: - cer: - description: CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide) - cer_mean: description: Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page) From 373de459c7b69382ae59dd5b6edd91b396c55a12 Mon Sep 17 00:00:00 2001 From: Michelle Weidling Date: Wed, 15 Mar 2023 11:22:39 +0100 Subject: [PATCH 09/14] ignore venv --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index 93620cc..1247b50 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /.project .idea/ +venv From c926e7495231dee7f17317bab9d672d506fe9c21 Mon Sep 17 00:00:00 2001 From: Michelle Weidling Date: Wed, 15 Mar 2023 11:23:13 +0100 Subject: [PATCH 10/14] ocrd_eval schema: strict evaluation --- ocrd_eval.schema.yml | 88 +++++++++++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 34 deletions(-) diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index acec28b..cc5fbc2 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -8,13 +8,13 @@ description: > used in the UI. type: array items: - required: ['@id', 'label', 'metadata', 'evaluation'] + required: ['@id', 'label', 'metadata', 'evaluation_results'] unevaluatedProperties: false allOf: - { '$ref': '#/$defs/LabeledUrl' } - properties: metadata: { '$ref': '#/$defs/EvaluationMetadata' } - evaluation: { '$ref': '#/$defs/EvaluationReport' } + evaluation_results: { '$ref': '#/$defs/EvaluationReport' } # Reusable definitions $defs: @@ -150,48 +150,68 @@ $defs: document_wide: type: object description: Document-wide metrics - #properties: { $ref: '#$defs/EvaluationMetrics' } + allOf: [ + { $ref: '#$defs/DocumentEvaluationMetrics' }, + { $ref: '#$defs/CommonEvaluationMetrics' } + ] + unevaluatedProperties: false by_page: type: array description: Metrics page-by-page items: type: object - allOf: - - properties: - page_id: - type: string - description: PAGE ID - #- properties: { $ref: '#$defs/EvaluationMetrics' } - - EvaluationMetrics: - - cer_mean: - description: Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page) - - cer_median: - description: Median of the page-wise CER (in document_wide) or regions on a page (in by_page) - - cer_range: - type: array - minItems: 2 - maxItems: 2 - items: + allOf: [ + { $ref: '#$defs/CommonEvaluationMetrics' }, + { $ref: '#$defs/PageId' } + ] + unevaluatedProperties: false + + PageId: + type: object + properties: + page_id: + type: string + description: PAGE ID + + CommonEvaluationMetrics: + type: object + properties: + cer_mean: + type: number + description: Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page) + + wer: type: number - description: Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide) + description: CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide) - cer_standard_deviation: - description: Standard deviation the page-wise CER (in document_wide) or regions on a page (in by_page) - wer: - description: CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide) + DocumentEvaluationMetrics: + type: object + properties: + cer_median: + type: number + description: Median of the page-wise CER (in document_wide) or regions on a page (in by_page) - wall_time: - description: Actual time needed for processing workflow + cer_range: + type: array + minItems: 2 + maxItems: 2 + items: + type: number + description: Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide) - cpu_time: - description: Cumulative CPU time used for processing workflow + cer_standard_deviation: + type: number + description: Standard deviation the page-wise CER (in document_wide) or regions on a page (in by_page) - pages_per_minute: - description: Number of pages processed per minute + wall_time: + type: number + description: Actual time needed for processing workflow + cpu_time: + type: number + description: Cumulative CPU time used for processing workflow + pages_per_minute: + type: number + description: Number of pages processed per minute From 6c19910149a4843e48bba931dc9c5a8a781074fe Mon Sep 17 00:00:00 2001 From: Michelle Weidling Date: Wed, 15 Mar 2023 11:27:57 +0100 Subject: [PATCH 11/14] ocrd_eval schema: re-add sample --- ocrd_eval.sample.json | 102 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 ocrd_eval.sample.json diff --git a/ocrd_eval.sample.json b/ocrd_eval.sample.json new file mode 100644 index 0000000..0d4a0e9 --- /dev/null +++ b/ocrd_eval.sample.json @@ -0,0 +1,102 @@ +[ + { + "@id": "wf-data16_ant_complex_minimal_ocr-eval", + "label": "Workflow on data 16_ant_complex_minimal_ocr", + "metadata": { + "ocr_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/minimal_ocr.txt", + "label": "OCR Workflow minimal_ocr" + }, + "eval_workflow": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/ocrd_workflows/dinglehopper_eval.txt", + "label": "Evaluation Workflow dinglehopper_eval" + }, + "gt_workspace": { + "@id": "https://github.com/OCR-D/quiver-data/blob/main/16_ant_complex.ocrd.zip", + "label": "GT workspace 16th century Antiqua complex layout" + }, + "ocr_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_minimal_ocr_ocr.zip", + "label": "OCR workspace for 16_ant_complex_minimal_ocr" + }, + "eval_workspace": { + "@id": "https://github.com/OCR-D/quiver-back-end/blob/main/workflows/results/16_ant_complex_minimal_ocr_evaluation.zip", + "label": "Evaluation workspace for 16_ant_complex_minimal_ocr" + }, + "workflow_steps": [ + { + "id": "ocrd-tesserocr-recognize", + "params": { + "segmentation_level": "region", + "textequiv_level": "word", + "find_tables": true, + "model": "Fraktur_GT4HistOCR", + "dpi": 0, + "padding": 0, + "overwrite_segments": false, + "overwrite_text": true, + "shrink_polygons": false, + "block_polygons": false, + "find_staves": false, + "sparse_text": false, + "raw_lines": false, + "char_whitelist": "", + "char_blacklist": "", + "char_unblacklist": "", + "tesseract_parameters": {}, + "xpath_parameters": {}, + "xpath_model": {}, + "auto_model": false, + "oem": "DEFAULT" + } + } + ], + "workflow_model": "Fraktur_GT4HistOCR", + "eval_tool": "ocrd-dinglehopper vNone", + "document_metadata": { + "data_properties": { + "fonts": [ + "Antiqua" + ], + "publication_century": "1500-1600", + "publication_decade": "", + "publication_year": "16th century", + "number_of_pages": 3, + "layout": "complex" + } + } + }, + "evaluation_results": { + "document_wide": { + "wall_time": 7.72297, + "cpu_time": 10.385645, + "cer_mean": 0.10240852523716282, + "cer_median": 0.10536980749746708, + "cer_range": [ + 0.07124352331606218, + 0.1306122448979592 + ], + "cer_standard_deviation": 0.02979493530847308, + "wer": 0.23466068901129858, + "pages_per_minute": 23.307095586283516 + }, + "by_page": [ + { + "page_id": "phys_0007", + "cer_mean": 0.07124352331606218, + "wer": 0.2231404958677686 + }, + { + "page_id": "phys_0008", + "cer_mean": 0.10536980749746708, + "wer": 0.2484472049689441 + }, + { + "page_id": "phys_0009", + "cer_mean": 0.1306122448979592, + "wer": 0.2323943661971831 + } + ] + } + } +] \ No newline at end of file From ffe6dc9e25c665cb2beee94b6098a7561aecdcae Mon Sep 17 00:00:00 2001 From: Michelle Weidling Date: Wed, 15 Mar 2023 11:31:36 +0100 Subject: [PATCH 12/14] ocrd_eval schema: re-add JSON schema --- ocrd_eval.schema.json | 1 + 1 file changed, 1 insertion(+) create mode 100644 ocrd_eval.schema.json diff --git a/ocrd_eval.schema.json b/ocrd_eval.schema.json new file mode 100644 index 0000000..10d4c08 --- /dev/null +++ b/ocrd_eval.schema.json @@ -0,0 +1 @@ +{"$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://ocr-d.de/en/spec/ocrd_eval.schema.json", "title": "A List of Evaluations for OCR-D", "description": "- All references to URL are JSON-LD-like objects with at least an `@id`\n property referencing the URL and `label` for a human-readable label to be\n used in the UI.\n", "type": "array", "items": {"required": ["@id", "label", "metadata", "evaluation_results"], "unevaluatedProperties": false, "allOf": [{"$ref": "#/$defs/LabeledUrl"}, {"properties": {"metadata": {"$ref": "#/$defs/EvaluationMetadata"}, "evaluation_results": {"$ref": "#/$defs/EvaluationReport"}}}]}, "$defs": {"LabeledUrl": {"type": "object", "required": ["@id"], "properties": {"@id": {"type": "string", "format": "uri", "description": "URL of the thing"}, "label": {"type": "string", "description": "Description of the thing for UI purposes"}}}, "EvaluationMetadata": {"type": "object", "title": "Metadata about one evaluation", "additionalProperties": false, "description": "EvaluationMetadata contains all the info on how an EvaluationReport came to be.\nThere are two OCR-D *workflows* involved:\n - ocr_workflow: The workflow which produced the OCR results to evaluate\n - eval_workflow: The workflow run to evaluate OCR and GT\n\nThere are three OCR-D *workspaces* involved:\n - gt_workspace: The workspace containing the GT\n - ocr_workspace: The workspace containing the OCR results from ocr_workflow\n - eval_workspace: The workspace on which the eval_workflow was run\n", "required": ["ocr_workflow", "ocr_workspace", "eval_workflow", "eval_workspace", "gt_workspace", "document_metadata"], "properties": {"ocr_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the ocr_workspace"}, "ocr_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the OCR"}, "eval_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the eval_workspace"}, "eval_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the evaluation results"}, "gt_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the GT"}, "workflow_steps": {"type": "array", "description": "Human readable description of the individual steps and their parameters in the workflow (for UI)", "minItems": 1, "items": {"type": "object", "properties": {"id": {"type": "string", "description": "The name of the processor used for this workflow step", "pattern": "^ocrd-[a-z\\-]+"}, "params": {"type": "object", "description": "A map of parameters and their values applied to the processor used for this workflow step"}}, "required": ["id", "params"]}}, "workflow_model": {"type": "string", "description": "Human readable name of the main model used for recognition in the OCR workflow (for UI)"}, "eval_tool": {"type": "string", "description": "Human readable name and version of evaluation tool used (for UI)"}, "document_metadata": {"type": "object", "title": "Bibliographical and typographical metadata about the work to be evaluated", "properties": {"publication_year": {"type": "number", "description": "Year the document was originally published"}, "publication_century": {"type": "string", "description": "Century the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "publication_decade": {"type": "string", "description": "Decade the document was originally published", "pattern": "[12][0-9]{2}0-[12][0-9]{2}0"}, "number_of_pages": {"type": "number", "description": "Number of pages in this work (i.e. the number of images in the gt_workspace)"}, "layout": {"type": "string", "enum": ["simple", "complex"]}, "fonts": {"type": "array", "items": {"type": "string", "enum": ["antiqua", "fraktur"]}}}}, "provenance": {"type": "object", "description": "Information on which tools in which version were used in determining metrics", "properties": {"parameters": {"type": "object", "description": "Parameters passed to the evaluation processor"}}}}}, "EvaluationReport": {"type": "object", "additionalProperties": false, "description": "The metrics measured for this document", "properties": {"document_wide": {"type": "object", "description": "Document-wide metrics", "allOf": [{"$ref": "#$defs/DocumentEvaluationMetrics"}, {"$ref": "#$defs/CommonEvaluationMetrics"}], "unevaluatedProperties": false}, "by_page": {"type": "array", "description": "Metrics page-by-page", "items": {"type": "object", "allOf": [{"$ref": "#$defs/CommonEvaluationMetrics"}, {"$ref": "#$defs/PageId"}], "unevaluatedProperties": false}}}}, "PageId": {"type": "object", "properties": {"page_id": {"type": "string", "description": "PAGE ID"}}}, "CommonEvaluationMetrics": {"type": "object", "properties": {"cer_mean": {"type": "number", "description": "Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wer": {"type": "number", "description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}}, "DocumentEvaluationMetrics": {"type": "object", "properties": {"cer_median": {"type": "number", "description": "Median of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "cer_range": {"type": "array", "minItems": 2, "maxItems": 2, "items": {"type": "number", "description": "Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}, "cer_standard_deviation": {"type": "number", "description": "Standard deviation the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wall_time": {"type": "number", "description": "Actual time needed for processing workflow"}, "cpu_time": {"type": "number", "description": "Cumulative CPU time used for processing workflow"}, "pages_per_minute": {"type": "number", "description": "Number of pages processed per minute"}}}}} \ No newline at end of file From 1a2c1ab1572ad6db99aaa079a2be14eb1ccd83f5 Mon Sep 17 00:00:00 2001 From: Michelle Weidling Date: Wed, 15 Mar 2023 11:37:31 +0100 Subject: [PATCH 13/14] build: fix deps --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 3eeb0e7..b9c833d 100644 --- a/Makefile +++ b/Makefile @@ -7,4 +7,4 @@ validate: json jsonschema --output pretty --validator Draft201909Validator --instance ocrd_eval.sample.json ocrd_eval.schema.json deps: - pip install yaml click jsonschema + pip install pyyaml click jsonschema From 00a9020b02dafeb32b77f7512644bfe21e1f881b Mon Sep 17 00:00:00 2001 From: Michelle Weidling Date: Thu, 16 Mar 2023 13:48:39 +0100 Subject: [PATCH 14/14] update fonts --- ocrd_eval.schema.json | 2 +- ocrd_eval.schema.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ocrd_eval.schema.json b/ocrd_eval.schema.json index 10d4c08..8a38b4c 100644 --- a/ocrd_eval.schema.json +++ b/ocrd_eval.schema.json @@ -1 +1 @@ -{"$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://ocr-d.de/en/spec/ocrd_eval.schema.json", "title": "A List of Evaluations for OCR-D", "description": "- All references to URL are JSON-LD-like objects with at least an `@id`\n property referencing the URL and `label` for a human-readable label to be\n used in the UI.\n", "type": "array", "items": {"required": ["@id", "label", "metadata", "evaluation_results"], "unevaluatedProperties": false, "allOf": [{"$ref": "#/$defs/LabeledUrl"}, {"properties": {"metadata": {"$ref": "#/$defs/EvaluationMetadata"}, "evaluation_results": {"$ref": "#/$defs/EvaluationReport"}}}]}, "$defs": {"LabeledUrl": {"type": "object", "required": ["@id"], "properties": {"@id": {"type": "string", "format": "uri", "description": "URL of the thing"}, "label": {"type": "string", "description": "Description of the thing for UI purposes"}}}, "EvaluationMetadata": {"type": "object", "title": "Metadata about one evaluation", "additionalProperties": false, "description": "EvaluationMetadata contains all the info on how an EvaluationReport came to be.\nThere are two OCR-D *workflows* involved:\n - ocr_workflow: The workflow which produced the OCR results to evaluate\n - eval_workflow: The workflow run to evaluate OCR and GT\n\nThere are three OCR-D *workspaces* involved:\n - gt_workspace: The workspace containing the GT\n - ocr_workspace: The workspace containing the OCR results from ocr_workflow\n - eval_workspace: The workspace on which the eval_workflow was run\n", "required": ["ocr_workflow", "ocr_workspace", "eval_workflow", "eval_workspace", "gt_workspace", "document_metadata"], "properties": {"ocr_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the ocr_workspace"}, "ocr_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the OCR"}, "eval_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the eval_workspace"}, "eval_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the evaluation results"}, "gt_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the GT"}, "workflow_steps": {"type": "array", "description": "Human readable description of the individual steps and their parameters in the workflow (for UI)", "minItems": 1, "items": {"type": "object", "properties": {"id": {"type": "string", "description": "The name of the processor used for this workflow step", "pattern": "^ocrd-[a-z\\-]+"}, "params": {"type": "object", "description": "A map of parameters and their values applied to the processor used for this workflow step"}}, "required": ["id", "params"]}}, "workflow_model": {"type": "string", "description": "Human readable name of the main model used for recognition in the OCR workflow (for UI)"}, "eval_tool": {"type": "string", "description": "Human readable name and version of evaluation tool used (for UI)"}, "document_metadata": {"type": "object", "title": "Bibliographical and typographical metadata about the work to be evaluated", "properties": {"publication_year": {"type": "number", "description": "Year the document was originally published"}, "publication_century": {"type": "string", "description": "Century the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "publication_decade": {"type": "string", "description": "Decade the document was originally published", "pattern": "[12][0-9]{2}0-[12][0-9]{2}0"}, "number_of_pages": {"type": "number", "description": "Number of pages in this work (i.e. the number of images in the gt_workspace)"}, "layout": {"type": "string", "enum": ["simple", "complex"]}, "fonts": {"type": "array", "items": {"type": "string", "enum": ["antiqua", "fraktur"]}}}}, "provenance": {"type": "object", "description": "Information on which tools in which version were used in determining metrics", "properties": {"parameters": {"type": "object", "description": "Parameters passed to the evaluation processor"}}}}}, "EvaluationReport": {"type": "object", "additionalProperties": false, "description": "The metrics measured for this document", "properties": {"document_wide": {"type": "object", "description": "Document-wide metrics", "allOf": [{"$ref": "#$defs/DocumentEvaluationMetrics"}, {"$ref": "#$defs/CommonEvaluationMetrics"}], "unevaluatedProperties": false}, "by_page": {"type": "array", "description": "Metrics page-by-page", "items": {"type": "object", "allOf": [{"$ref": "#$defs/CommonEvaluationMetrics"}, {"$ref": "#$defs/PageId"}], "unevaluatedProperties": false}}}}, "PageId": {"type": "object", "properties": {"page_id": {"type": "string", "description": "PAGE ID"}}}, "CommonEvaluationMetrics": {"type": "object", "properties": {"cer_mean": {"type": "number", "description": "Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wer": {"type": "number", "description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}}, "DocumentEvaluationMetrics": {"type": "object", "properties": {"cer_median": {"type": "number", "description": "Median of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "cer_range": {"type": "array", "minItems": 2, "maxItems": 2, "items": {"type": "number", "description": "Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}, "cer_standard_deviation": {"type": "number", "description": "Standard deviation the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wall_time": {"type": "number", "description": "Actual time needed for processing workflow"}, "cpu_time": {"type": "number", "description": "Cumulative CPU time used for processing workflow"}, "pages_per_minute": {"type": "number", "description": "Number of pages processed per minute"}}}}} \ No newline at end of file +{"$schema": "https://json-schema.org/draft/2019-09/schema", "$id": "https://ocr-d.de/en/spec/ocrd_eval.schema.json", "title": "A List of Evaluations for OCR-D", "description": "- All references to URL are JSON-LD-like objects with at least an `@id`\n property referencing the URL and `label` for a human-readable label to be\n used in the UI.\n", "type": "array", "items": {"required": ["@id", "label", "metadata", "evaluation_results"], "unevaluatedProperties": false, "allOf": [{"$ref": "#/$defs/LabeledUrl"}, {"properties": {"metadata": {"$ref": "#/$defs/EvaluationMetadata"}, "evaluation_results": {"$ref": "#/$defs/EvaluationReport"}}}]}, "$defs": {"LabeledUrl": {"type": "object", "required": ["@id"], "properties": {"@id": {"type": "string", "format": "uri", "description": "URL of the thing"}, "label": {"type": "string", "description": "Description of the thing for UI purposes"}}}, "EvaluationMetadata": {"type": "object", "title": "Metadata about one evaluation", "additionalProperties": false, "description": "EvaluationMetadata contains all the info on how an EvaluationReport came to be.\nThere are two OCR-D *workflows* involved:\n - ocr_workflow: The workflow which produced the OCR results to evaluate\n - eval_workflow: The workflow run to evaluate OCR and GT\n\nThere are three OCR-D *workspaces* involved:\n - gt_workspace: The workspace containing the GT\n - ocr_workspace: The workspace containing the OCR results from ocr_workflow\n - eval_workspace: The workspace on which the eval_workflow was run\n", "required": ["ocr_workflow", "ocr_workspace", "eval_workflow", "eval_workspace", "gt_workspace", "document_metadata"], "properties": {"ocr_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the ocr_workspace"}, "ocr_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the OCR"}, "eval_workflow": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The OCR-D workflow that produced the eval_workspace"}, "eval_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the evaluation results"}, "gt_workspace": {"allOf": [{"$ref": "#/$defs/LabeledUrl"}], "description": "The workspace containing the GT"}, "workflow_steps": {"type": "array", "description": "Human readable description of the individual steps and their parameters in the workflow (for UI)", "minItems": 1, "items": {"type": "object", "properties": {"id": {"type": "string", "description": "The name of the processor used for this workflow step", "pattern": "^ocrd-[a-z\\-]+"}, "params": {"type": "object", "description": "A map of parameters and their values applied to the processor used for this workflow step"}}, "required": ["id", "params"]}}, "workflow_model": {"type": "string", "description": "Human readable name of the main model used for recognition in the OCR workflow (for UI)"}, "eval_tool": {"type": "string", "description": "Human readable name and version of evaluation tool used (for UI)"}, "document_metadata": {"type": "object", "title": "Bibliographical and typographical metadata about the work to be evaluated", "properties": {"publication_year": {"type": "number", "description": "Year the document was originally published"}, "publication_century": {"type": "string", "description": "Century the document was originally published", "pattern": "[12][0-9]{3}-[12][0-9]{3}"}, "publication_decade": {"type": "string", "description": "Decade the document was originally published", "pattern": "[12][0-9]{2}0-[12][0-9]{2}0"}, "number_of_pages": {"type": "number", "description": "Number of pages in this work (i.e. the number of images in the gt_workspace)"}, "layout": {"type": "string", "enum": ["simple", "complex"]}, "fonts": {"type": "array", "items": {"type": "string", "enum": ["antiqua", "textura", "gotico-antiqua", "rotunda", "italic", "bastarda", "greek", "schwabacher", "hebrew", "fraktur"]}}}}, "provenance": {"type": "object", "description": "Information on which tools in which version were used in determining metrics", "properties": {"parameters": {"type": "object", "description": "Parameters passed to the evaluation processor"}}}}}, "EvaluationReport": {"type": "object", "additionalProperties": false, "description": "The metrics measured for this document", "properties": {"document_wide": {"type": "object", "description": "Document-wide metrics", "allOf": [{"$ref": "#$defs/DocumentEvaluationMetrics"}, {"$ref": "#$defs/CommonEvaluationMetrics"}], "unevaluatedProperties": false}, "by_page": {"type": "array", "description": "Metrics page-by-page", "items": {"type": "object", "allOf": [{"$ref": "#$defs/CommonEvaluationMetrics"}, {"$ref": "#$defs/PageId"}], "unevaluatedProperties": false}}}}, "PageId": {"type": "object", "properties": {"page_id": {"type": "string", "description": "PAGE ID"}}}, "CommonEvaluationMetrics": {"type": "object", "properties": {"cer_mean": {"type": "number", "description": "Arithmetic mean of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wer": {"type": "number", "description": "CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}}, "DocumentEvaluationMetrics": {"type": "object", "properties": {"cer_median": {"type": "number", "description": "Median of the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "cer_range": {"type": "array", "minItems": 2, "maxItems": 2, "items": {"type": "number", "description": "Minimum and maximum of CER calculated over the text of a whole page (in by_page) or combined text of all pages (in document_wide)"}}, "cer_standard_deviation": {"type": "number", "description": "Standard deviation the page-wise CER (in document_wide) or regions on a page (in by_page)"}, "wall_time": {"type": "number", "description": "Actual time needed for processing workflow"}, "cpu_time": {"type": "number", "description": "Cumulative CPU time used for processing workflow"}, "pages_per_minute": {"type": "number", "description": "Number of pages processed per minute"}}}}} \ No newline at end of file diff --git a/ocrd_eval.schema.yml b/ocrd_eval.schema.yml index cc5fbc2..539f66a 100644 --- a/ocrd_eval.schema.yml +++ b/ocrd_eval.schema.yml @@ -132,7 +132,7 @@ $defs: type: array items: type: string - enum: ['antiqua', 'fraktur'] + enum: ['antiqua', 'textura', 'gotico-antiqua', 'rotunda', 'italic', 'bastarda', 'greek', 'schwabacher', 'hebrew', 'fraktur'] provenance: type: object