-
Notifications
You must be signed in to change notification settings - Fork 0
/
img2text.py
29 lines (20 loc) · 884 Bytes
/
img2text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
import os
from pathlib import Path
from PIL import Image
import pytesseract
img_folder = r".\data\requetes_img3"
txt_folder = r".\data\requetes_txt3"
req_folders = os.listdir(img_folder)
for req_folder in req_folders:
text_file = txt_folder / Path(req_folder + ".txt")
image_file_list = os.listdir(img_folder + '/' + req_folder)
if os.path.exists(text_file):
print("txt file: ", text_file, "already exists.")
else:
with open(text_file, "w+", encoding='utf-8') as output_file:
for image_file in image_file_list:
image_file = os.path.abspath(img_folder + '/' + req_folder + '/' + image_file)
# --- récupération du texte
text = str((pytesseract.image_to_string(Image.open(image_file), lang='fra')))
output_file.write(text)
print(req_folder, "--to txt--> done")