Skip to content

Commit

Permalink
Merge pull request #2 from moheladwy/UpdateSetupScript
Browse files Browse the repository at this point in the history
added documentations for the functions in the python script
  • Loading branch information
moheladwy authored Dec 14, 2024
2 parents 7712f82 + d94795c commit 6d1089d
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 15 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/python-lint.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,4 @@ jobs:
- name: Lint with pylint
run: |
pylint --disable=R,C OCR4Linux.py
pylint --rcfile=.pylintrc OCR4Linux.py
7 changes: 7 additions & 0 deletions .pylintrc
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
[MESSAGES CONTROL]
disable=
C0114, # Missing module docstring
C0115, # Missing class docstring
E1101, # No member
W0612, # Unused variable
W0718 # Broad exception caught
113 changes: 99 additions & 14 deletions OCR4Linux.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,6 @@
# The script uses tesseract to extract text from the image.
# The script uses wl-copy and cliphist for Wayland and xclip for X11 to copy the extracted text to the clipboard.
# The script uses a python script to extract text from the image.
# The script requires the following packages to be installed:
# - python
# - tesseract
# - grimblast or scrot
# - wl-clipboard or xclip
# - cliphist
# ========================================================================================================================
from PIL import Image
import pytesseract
Expand All @@ -23,14 +17,65 @@


class TesseractConfig:
"""
TesseractConfig is a class that provides functionality to preprocess images,
and extract text from them using Tesseract OCR.
Methods:
__init__():
Initializes the TesseractConfig instance with command line arguments.
preprocess_image(image):
Preprocesses the given image to improve OCR accuracy.
Args:
image (PIL.Image): The image to preprocess.
Returns:
PIL.Image: The preprocessed image.
extract_text_with_lines(image):
Extracts text from the given image while preserving line breaks.
Args:
image (PIL.Image): The image from which to extract text.
Returns:
str: The extracted text with line breaks preserved.
help():
Prints the usage information for the script.
main():
The main method that processes the image and extracts text.
Returns:
int: 0 if successful, 1 otherwise.
"""

def __init__(self):
self.Args_num = 3
"""
Initializes the OCR4Linux class with command-line arguments.
Attributes:
args_num (int): The number of expected command-line arguments.
script_name (str): The name of the script being executed.
image_path (str): The path to the input image file.
output_path (str): The path to the output file where results will be saved.
"""
self.args_num = 3
self.script_name = sys.argv[0]
self.image_path = sys.argv[1]
self.output_path = sys.argv[2]

def preprocess_image(self, image):
"""Preprocess image for better OCR accuracy"""
def preprocess_image(self, image) -> Image:
"""
Preprocess image for better OCR accuracy.
This function converts the input image to grayscale, applies thresholding
to binarize the image, and removes noise using a median blur filter.
Args:
image (PIL.Image.Image): The input image to preprocess.
Returns:
PIL.Image.Image: The preprocessed image.
"""
# Convert to grayscale
gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
# Apply thresholding
Expand All @@ -40,8 +85,21 @@ def preprocess_image(self, image):
denoised = cv2.medianBlur(thresh, 3)
return Image.fromarray(denoised)

def extract_text_with_lines(self, image):
"""Extract text while preserving line breaks"""
def extract_text_with_lines(self, image: Image) -> str:
"""
Extract text from an image while preserving line breaks.
This method uses Tesseract OCR to extract text from the provided image,
preserving the layout and line breaks. It filters out low-confidence
results to improve the accuracy of the extracted text.
Args:
image: The image from which to extract text. This should be a format
supported by the pytesseract library.
Returns:
A string containing the extracted text with line breaks preserved.
"""
# Get image dimensions
custom_config = r'--oem 3 --psm 6'
# Extract text with layout preservation
Expand All @@ -65,16 +123,43 @@ def extract_text_with_lines(self, image):
# Join text preserving line breaks
return '\n'.join(' '.join(line).strip() for line in lines.values() if ''.join(line).strip())

def help(self):
def help(self) -> None:
"""
Prints the usage instructions for the OCR4Linux script.
This method displays the correct way to run the script, including the required
arguments and their descriptions.
Usage:
python <script_name> <image_path> <output_path>
Arguments:
file_path: Path to the python script
image_path: Path to the image file
output_path: Path to the output text file
"""
print(f"Usage: python {self.script_name} <image_path> <output_path>")
print("Arguments:")
print(" file_path: Path to the python script")
print(" image_path: Path to the image file")
print(" output_path: Path to the output text file")

def main(self):
def main(self) -> int:
"""
Main function to process the image and extract text.
This function performs the following steps:
1. Checks command line arguments for validity.
2. Verifies if the specified image file exists.
3. Opens and processes the image.
4. Extracts text from the processed image while preserving line breaks.
5. Saves the extracted text to an output file.
Returns:
int: 0 if text extraction is successful, 1 otherwise.
"""
# Check command line arguments
if len(sys.argv) != self.Args_num or sys.argv[1] in ['-h', '--help']:
if len(sys.argv) != self.args_num or sys.argv[1] in ['-h', '--help']:
self.help()
return 1

Expand Down

0 comments on commit 6d1089d

Please sign in to comment.