Merge pull request #2 from moheladwy/UpdateSetupScript

added documentations for the functions in the python script
moheladwy · Dec 14, 2024 · 6d1089d · 6d1089d
2 parents 7712f82 + d94795c
commit 6d1089d
Show file tree

Hide file tree

Showing 3 changed files with 107 additions and 15 deletions.
diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml
@@ -28,4 +28,4 @@ jobs:
 
             - name: Lint with pylint
               run: |
-                  pylint --disable=R,C OCR4Linux.py
+                  pylint --rcfile=.pylintrc OCR4Linux.py
diff --git a/.pylintrc b/.pylintrc
@@ -0,0 +1,7 @@
+[MESSAGES CONTROL]
+disable=
+    C0114,  # Missing module docstring
+    C0115,  # Missing class docstring
+    E1101,  # No member
+    W0612,  # Unused variable
+    W0718   # Broad exception caught
diff --git a/OCR4Linux.py b/OCR4Linux.py
@@ -7,12 +7,6 @@
 #     The script uses tesseract to extract text from the image.
 #     The script uses wl-copy and cliphist for Wayland and xclip for X11 to copy the extracted text to the clipboard.
 #     The script uses a python script to extract text from the image.
-#     The script requires the following packages to be installed:
-#         - python
-#         - tesseract
-#         - grimblast or scrot
-#         - wl-clipboard or xclip
-#         - cliphist
 # ========================================================================================================================
 from PIL import Image
 import pytesseract
@@ -23,14 +17,65 @@
 
 
 class TesseractConfig:
+    """
+    TesseractConfig is a class that provides functionality to preprocess images,
+    and extract text from them using Tesseract OCR.
+
+    Methods:
+        __init__():
+            Initializes the TesseractConfig instance with command line arguments.
+
+        preprocess_image(image):
+            Preprocesses the given image to improve OCR accuracy.
+            Args:
+                image (PIL.Image): The image to preprocess.
+            Returns:
+                PIL.Image: The preprocessed image.
+
+        extract_text_with_lines(image):
+            Extracts text from the given image while preserving line breaks.
+            Args:
+                image (PIL.Image): The image from which to extract text.
+            Returns:
+                str: The extracted text with line breaks preserved.
+
+        help():
+            Prints the usage information for the script.
+
+        main():
+            The main method that processes the image and extracts text.
+            Returns:
+                int: 0 if successful, 1 otherwise.
+    """
+
     def __init__(self):
-        self.Args_num = 3
+        """
+        Initializes the OCR4Linux class with command-line arguments.
+
+        Attributes:
+            args_num (int): The number of expected command-line arguments.
+            script_name (str): The name of the script being executed.
+            image_path (str): The path to the input image file.
+            output_path (str): The path to the output file where results will be saved.
+        """
+        self.args_num = 3
         self.script_name = sys.argv[0]
         self.image_path = sys.argv[1]
         self.output_path = sys.argv[2]
 
-    def preprocess_image(self, image):
-        """Preprocess image for better OCR accuracy"""
+    def preprocess_image(self, image) -> Image:
+        """
+        Preprocess image for better OCR accuracy.
+
+        This function converts the input image to grayscale, applies thresholding 
+        to binarize the image, and removes noise using a median blur filter.
+
+        Args:
+            image (PIL.Image.Image): The input image to preprocess.
+
+        Returns:
+            PIL.Image.Image: The preprocessed image.
+        """
         # Convert to grayscale
         gray = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2GRAY)
         # Apply thresholding
@@ -40,8 +85,21 @@ def preprocess_image(self, image):
         denoised = cv2.medianBlur(thresh, 3)
         return Image.fromarray(denoised)
 
-    def extract_text_with_lines(self, image):
-        """Extract text while preserving line breaks"""
+    def extract_text_with_lines(self, image: Image) -> str:
+        """
+        Extract text from an image while preserving line breaks.
+
+        This method uses Tesseract OCR to extract text from the provided image,
+        preserving the layout and line breaks. It filters out low-confidence
+        results to improve the accuracy of the extracted text.
+
+        Args:
+            image: The image from which to extract text. This should be a format
+                   supported by the pytesseract library.
+
+        Returns:
+            A string containing the extracted text with line breaks preserved.
+        """
         # Get image dimensions
         custom_config = r'--oem 3 --psm 6'
         # Extract text with layout preservation
@@ -65,16 +123,43 @@ def extract_text_with_lines(self, image):
         # Join text preserving line breaks
         return '\n'.join(' '.join(line).strip() for line in lines.values() if ''.join(line).strip())
 
-    def help(self):
+    def help(self) -> None:
+        """
+        Prints the usage instructions for the OCR4Linux script.
+
+        This method displays the correct way to run the script, including the required
+        arguments and their descriptions.
+
+        Usage:
+            python <script_name> <image_path> <output_path>
+
+        Arguments:
+            file_path: Path to the python script
+            image_path: Path to the image file
+            output_path: Path to the output text file
+        """
         print(f"Usage: python {self.script_name} <image_path> <output_path>")
         print("Arguments:")
         print("  file_path: Path to the python script")
         print("  image_path: Path to the image file")
         print("  output_path: Path to the output text file")
 
-    def main(self):
+    def main(self) -> int:
+        """
+        Main function to process the image and extract text.
+
+        This function performs the following steps:
+        1. Checks command line arguments for validity.
+        2. Verifies if the specified image file exists.
+        3. Opens and processes the image.
+        4. Extracts text from the processed image while preserving line breaks.
+        5. Saves the extracted text to an output file.
+
+        Returns:
+            int: 0 if text extraction is successful, 1 otherwise.
+        """
         # Check command line arguments
-        if len(sys.argv) != self.Args_num or sys.argv[1] in ['-h', '--help']:
+        if len(sys.argv) != self.args_num or sys.argv[1] in ['-h', '--help']:
             self.help()
             return 1