-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathaws_detect_text.go
92 lines (80 loc) · 2.77 KB
/
aws_detect_text.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
package lblconv
// AWS Rekognition detect-text specific functionality.
import (
"encoding/json"
"io/ioutil"
)
// AWSGeometry is the geometry of a text object annotation.
type AWSGeometry struct {
BoundingBox AWSBoundingBox
Polygon []AWSPoint
}
// AWSTextDetection is a single text annotation within an AWS detect-text label file.
type AWSTextDetection struct {
Confidence float64 // Range [0, 100].
DetectedText string
Geometry AWSGeometry
ID int
ParentID *int // Nil when Type=="LINE".
Type string // LINE or WORD.
}
// AWSDTAnnotatedFile defines the AWS text detection annotation structure for a single file.
type AWSDTAnnotatedFile struct {
Annotations []AWSTextDetection `json:"TextDetections"`
FilePath string `json:"-"`
}
// FromAWSDetectText reads and parses AWS detect-text annotations from labelDir and matches them
// to the images in imageDir.
func FromAWSDetectText(labelDir, imageDir string) ([]AnnotatedFile, error) {
return parseLabelsWithOneToOneImages(labelDir, ".json", imageDir, parseAWSDetectTextFile)
}
// parseAWSDetectTextFile parses the label file at labelPath and reads metadata from the
// corresponding image at imagePath to construct an AnnotatedFile struct and return it.
//
// The extracted annotations have label "Text_Line" or "Text_Word" (and fallback "Text"), according
// to the AWSTextDetection.Type.
func parseAWSDetectTextFile(labelPath, imagePath string) (AnnotatedFile, error) {
// Unmarshal JSON.
enc, err := ioutil.ReadFile(labelPath)
if err != nil {
return AnnotatedFile{}, err
}
var awsFileData AWSDTAnnotatedFile
err = json.Unmarshal(enc, &awsFileData)
if err != nil {
return AnnotatedFile{}, err
}
// Get the image width and height.
img, _, err := decodeImageConfig(imagePath)
if err != nil {
return AnnotatedFile{}, err
}
// Convert to the intermediate representation.
fileData := AnnotatedFile{
Annotations: make([]Annotation, 0, len(awsFileData.Annotations)),
FilePath: imagePath,
}
for _, a := range awsFileData.Annotations {
annotation := Annotation{
Attributes: map[string]interface{}{
Confidence: a.Confidence / 100,
DetectedText: a.DetectedText,
},
// Scale normalised coordinates to image coordinates.
Coords: [4]float64{
a.Geometry.BoundingBox.Left * float64(img.Width),
a.Geometry.BoundingBox.Top * float64(img.Height),
(a.Geometry.BoundingBox.Left + a.Geometry.BoundingBox.Width) * float64(img.Width),
(a.Geometry.BoundingBox.Top + a.Geometry.BoundingBox.Height) * float64(img.Height),
},
Label: "Text",
}
if a.Type == "LINE" {
annotation.Label = "Text_Line"
} else if a.Type == "WORD" {
annotation.Label = "Text_Word"
}
fileData.Annotations = append(fileData.Annotations, annotation)
}
return fileData, nil
}