forked from yunwoong7/korean_ocr_using_pororo
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathmain.py
158 lines (121 loc) · 5.03 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import cv2
from abc import ABC, abstractmethod
from pororo import Pororo
from pororo.pororo import SUPPORTED_TASKS
from utils.image_util import plt_imshow, put_text
from utils.image_convert import convert_coord, crop
from utils.pre_processing import load_with_filter, roi_filter
from easyocr import Reader
import warnings
warnings.filterwarnings('ignore')
class BaseOcr(ABC):
def __init__(self):
self.img_path = None
self.ocr_result = {}
def get_ocr_result(self):
return self.ocr_result
def get_img_path(self):
return self.img_path
def show_img(self):
plt_imshow(img=self.img_path)
def show_img_with_ocr(self, bounding, description, vertices, point):
img = cv2.imread(self.img_path) if isinstance(self.img_path, str) \
else self.img_path
roi_img = img.copy()
color = (0, 200, 0)
x, y = point
ocr_result = self.ocr_result if bounding is None \
else self.ocr_result[bounding]
for text_result in ocr_result:
text = text_result[description]
rect = text_result[vertices]
topLeft, topRight, bottomRight, bottomLeft = [
(round(point[x]), round(point[y])) for point in rect
]
cv2.line(roi_img, topLeft, topRight, color, 2)
cv2.line(roi_img, topRight, bottomRight, color, 2)
cv2.line(roi_img, bottomRight, bottomLeft, color, 2)
cv2.line(roi_img, bottomLeft, topLeft, color, 2)
roi_img = put_text(roi_img, text, topLeft[0], topLeft[1] - 20, color=color)
plt_imshow(["Original", "ROI"], [img, roi_img], figsize=(16, 10))
@abstractmethod
def run_ocr(self, img_path: str, debug: bool = False):
pass
class PororoOcr(BaseOcr):
def __init__(self, model: str = "brainocr", lang: str = "ko", **kwargs):
super().__init__()
self._ocr = Pororo(task="ocr", lang=lang, model=model, **kwargs)
def run_ocr(self, img_path: str, debug: bool = False):
self.img_path = img_path
self.ocr_result = self._ocr(img_path, detail=True)
if self.ocr_result['description']:
ocr_text = self.ocr_result["description"]
else:
ocr_text = "No text detected."
if debug:
self.show_img_with_ocr("bounding_poly", "description", "vertices", ["x", "y"])
return ocr_text
@staticmethod
def get_available_langs():
return SUPPORTED_TASKS["ocr"].get_available_langs()
@staticmethod
def get_available_models():
return SUPPORTED_TASKS["ocr"].get_available_models()
# https://www.jaided.ai/easyocr/documentation/
class EasyOcr(BaseOcr):
def __init__(self, lang: list[str] = ["ko", "en"], gpu=False, **kwargs):
super().__init__()
self._ocr = Reader(lang_list=lang, gpu=gpu, **kwargs).readtext
def run_ocr(self, img_path: str, debug: bool = False):
self.img_path = img_path
self.ocr_result = self._ocr(img_path, detail=1)
if len(self.ocr_result) != 0:
ocr_text = list(map(lambda result: result[1], self.ocr_result))
else:
ocr_text = "No text detected."
if debug:
self.show_img_with_ocr(None, 1, 0, [0, 1])
return ocr_text
class EasyPororoOcr(BaseOcr):
def __init__(self, lang: list[str] = ["ko", "en"], gpu=False, **kwargs):
super().__init__()
self._detector = Reader(lang_list=lang, gpu=gpu, **kwargs).detect
self.detect_result = None
def create_result(self, points):
roi = crop(self.img, points)
result = self._ocr(roi_filter(roi))
text = " ".join(result)
return [points, text]
def run_ocr(self, img_path: str, debug: bool = False, **kwargs):
self.img_path = img_path
self.img = cv2.imread(img_path) if isinstance(img_path, str) \
else self.img_path
self._ocr = Pororo(task="ocr", lang="ko", model="brainocr", **kwargs)
self.detect_result = self._detector(self.img, slope_ths=0.3, height_ths=1)
if debug:
print(self.detect_result)
horizontal_list, free_list = self.detect_result
rois = [convert_coord(point) for point in horizontal_list[0]] + free_list[0]
self.ocr_result = list(filter(
lambda result: len(result[1]) > 0,
[self.create_result(roi) for roi in rois]
))
if len(self.ocr_result) != 0:
ocr_text = list(map(lambda result: result[1], self.ocr_result))
else:
ocr_text = "No text detected."
if debug:
self.show_img_with_ocr(None, 1, 0, [0, 1])
return ocr_text
if __name__ == "__main__":
# p_ocr = PororoOcr()
# e_ocr = EasyOcr()
m_ocr = EasyPororoOcr()
image_path = input("Enter image path: ")
image = load_with_filter(image_path)
# text = p_ocr.run_ocr(image, debug=True)
# print('Pororo:', text)
# text = e_ocr.run_ocr(image, debug=True)
# print('EasyOCR:', text)
text = m_ocr.run_ocr(image, debug=True)
print('EasyPororoOCR:', text)