From 8e642e13386e9d491c13c6c16c65d4d4f579aa4f Mon Sep 17 00:00:00 2001
From: vanithakattumuri <vanithakattumuri32@gmail.com>
Date: Fri, 18 Oct 2024 19:47:17 +0530
Subject: [PATCH] #2 written the code documentation of file CSV2Parquet.py

---
 PAMI/extras/convert/CSV2Parquet.py | 234 ++++++++++++++++++++++++-----
 1 file changed, 198 insertions(+), 36 deletions(-)

diff --git a/PAMI/extras/convert/CSV2Parquet.py b/PAMI/extras/convert/CSV2Parquet.py
index ab9ccd10..4e7bd24a 100644
--- a/PAMI/extras/convert/CSV2Parquet.py
+++ b/PAMI/extras/convert/CSV2Parquet.py
@@ -1,39 +1,34 @@
-# #csvParquet is a code used to convert temporal and utility types into sparse and dense format.
-# #
-# #**Importing this algorithm into a python program**
-# #--------------------------------------------------------
-# #
-# #             from PAMI.extras.csvParquet import csvParquet as cp
-# #
-# #             obj = cp.csvParquet(iFile, "\t", " ", " " )
-# #
-# #             obj.save()
-# #
-# #             obj.csvParquet("FileName") # To generate file in form of sparse or dense
-# #
-# #             obj.parquetFormat("FileName") # To generate file in form of sparse or dense
-# #
-
-
-
-
-
-# __copyright__ = """
-# Copyright (C)  2021 Rage Uday Kiran
-
-#      This program is free software: you can redistribute it and/or modify
-#      it under the terms of the GNU General Public License as published by
-#      the Free Software Foundation, either version 3 of the License, or
-#      (at your option) any later version.
-
-#      This program is distributed in the hope that it will be useful,
-#      but WITHOUT ANY WARRANTY; without even the implied warranty of
-#      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-#      GNU General Public License for more details.
-
-#      You should have received a copy of the GNU General Public License
-#      along with this program.  If not, see <https://www.gnu.org/licenses/>.
-# """
+# csvParquet is a code used to convert temporal and utility types into sparse and dense format.
+#
+# **Importing this algorithm into a python program**
+#
+#             from PAMI.extras.csvParquet import csvParquet or Parquet2CSV as cp
+#
+#             obj = cp.CSV2Parquet(sampleDB.csv, output.parquet, sep)
+#
+#             obj = cp.Parquet2CSV(input.parquet, sampleDB.csv, sep)
+#
+#             obj.convert()
+#
+#             obj.printStats()
+#
+
+__copyright__ = """
+Copyright (C)  2021 Rage Uday Kiran
+
+     This program is free software: you can redistribute it and/or modify
+     it under the terms of the GNU General Public License as published by
+     the Free Software Foundation, either version 3 of the License, or
+     (at your option) any later version.
+
+     This program is distributed in the hope that it will be useful,
+     but WITHOUT ANY WARRANTY; without even the implied warranty of
+     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+     GNU General Public License for more details.
+
+     You should have received a copy of the GNU General Public License
+     along with this program.  If not, see <https://www.gnu.org/licenses/>.
+"""
 
 # import pandas as pd
 # import numpy as np
@@ -354,12 +349,73 @@
 
 
 class CSV2Parquet:
+    """
+        **About this algorithm**
+
+        :**Description**:  This class is to convert CSV files into Parquet format.
+
+        :**Reference**:
+
+        :**Parameters**:    - **inputFile** (*str*) -- *Path to the input CSV file.*
+                            - **outputFile** (*str*) -- *Path to the output Parquet file.*
+                            - **sep** (*str*) -- *This variable is used to distinguish items from one another. The default seperator is tab space. However, the users can override their default separator.*
+
+        :**Attributes**:    - **getMemoryUSS** (*float*) -- *Returns the memory used by the process in USS.*
+                            - **getMemoryRSS** (*float*) -- *Returns the memory used by the process in RSS.*
+                            - **getRuntime()** (*float*) -- *Returns the time taken to execute the conversion.*
+                            - **printStats()** -- * Prints statistics about memory usage and runtime.*
+
+        :**Methods**:       - **convert()** -- *Reads the input file, converts it to a Parquet file, and tracks memory usage and runtime.*
+
+
+        **Execution methods**
+
+        **Terminal command**
+
+        .. code-block:: console
+
+          Format:
+
+          (.venv) $ python3 CSV2Parquet.py <inputFile> <outputFile> <sep>
+
+          Example Usage:
+
+          (.venv) $ python3 CSV2Parquet.py sampleDB.csv output.parquet \t
+
+
+        **Calling from a python program**
+
+        .. code-block:: python
+
+                import PAMI.extras.convert.CSV2Parquet as cp
+
+                inputFile = 'sampleDB.csv'
+
+                sep = "\t"
+
+                outputFile = 'output.parquet'
+
+                obj = cp.CSV2Parquet(inputFile, outputFile, sep)
+
+                obj.convert()
+
+                obj.printStats()
+
+
+        **Credits**
+
+        The complete program was written by P. Likhitha  and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran.
+
+    """
     def __init__(self, inputFile, outputFile, sep):
         self.inputFile = inputFile
         self.outputFile = outputFile
         self.sep = sep
 
     def convert(self):
+        """
+        This function converts the input CSV file to a data frame, which is then transformed into a Parquet file.
+        """
         self.start = time.time()
         file = []
         maxLen = 0
@@ -383,15 +439,38 @@ def convert(self):
         self.memoryRSS = process.memory_info().rss
 
     def getMemoryUSS(self):
+        """
+        Returns the memory used by the process in USS (Unique Set Size).
+
+        :return: The amount of memory (in bytes) used exclusively by the process
+        :rtype: int
+        """
         return self.memoryUSS
     
     def getMemoryRSS(self):
+        """
+        Returns the memory used by the process in RSS (Resident Set Size).
+
+        :return: The total memory (in bytes) used by the process in RAM.
+        :rtype: int
+        """
         return self.memoryRSS
     
     def getRuntime(self):
+        """
+        Returns the time taken to complete the CSV to Parquet conversion.
+
+        :return: The runtime of the conversion process in seconds.
+        :rtype: float
+        """
         return self.end - self.start
     
     def printStats(self):
+        """
+        Prints the resource usage statistics including memory consumption (USS and RSS) and the runtime.
+
+        :return: Prints memory usage and runtime to the console.
+        """
         print("Memory usage (USS):", self.memoryUSS)
         print("Memory usage (RSS):", self.memoryRSS)
         print("Runtime:", self.end - self.start)
@@ -406,12 +485,72 @@ def printStats(self):
 
 
 class Parquet2CSV:
+    """
+        **About this algorithm**
+
+        :**Description**:  This class is to convert Parquet format into CSV file.
+
+        :**Reference**:
+
+        :**Parameters**:    - **inputFile** (*str*) -- *Path to the input Parquet file.*
+                            - **outputFile** (*str*) -- *Path to the output CSV file.*
+                            - **sep** (*str*) -- *This variable is used to distinguish items from one another. The default seperator is tab space. However, the users can override their default separator.*
+
+        :**Attributes**:    - **getMemoryUSS** (*int*) -- *Returns the memory used by the process in USS.*
+                            - **getMemoryRSS** (*int*) -- *Returns the memory used by the process in RSS.*
+                            - **getRuntime()** (*float*) -- *Returns the time taken to execute the conversion.*
+                            - **printStats()** -- * Prints statistics about memory usage and runtime.*
+
+        :**Methods**:       - **convert()** -- *Reads the Parquet file, converts it to a CSV file, and tracks memory usage and runtime.*
+
+        **Execution methods**
+
+        **Terminal command**
+
+        .. code-block:: console
+
+          Format:
+
+          (.venv) $ python3 CSV2Parquet.py <inputFile> <outputFile> <sep>
+
+          Example Usage:
+
+          (.venv) $ python3 CSV2Parquet.py output.parquet sampleDB.csv \t
+
+
+        **Calling from a python program**
+
+        .. code-block:: python
+
+                import PAMI.extras.convert.Parquet2CSV as pc
+
+                inputFile = 'output.parquet'
+
+                sep = "\t"
+
+                outputFile = 'sampleDB.csv'
+
+                obj = pc.Parquet2CSV(inputFile, outputFile, sep)
+
+                obj.convert()
+
+                obj.printStats()
+
+
+        **Credits**
+
+        The complete program was written by P. Likhitha  and revised by Tarun Sreepada under the supervision of Professor Rage Uday Kiran.
+
+    """
     def __init__(self, inputFile, outputFile, sep):
         self.inputFile = inputFile
         self.outputFile = outputFile
         self.sep = sep
 
     def convert(self):
+        """
+        This function converts the input Parquet file into a CSV file where each row is joined by the specified separator and written to the output file.
+        """
         self.start = time.time()
         df = pd.read_parquet(self.inputFile)
 
@@ -427,15 +566,38 @@ def convert(self):
         self.memoryRSS = process.memory_info().rss
 
     def getMemoryUSS(self):
+        """
+        Returns the memory used by the process in USS (Unique Set Size).
+
+        :return: The amount of memory (in bytes) used exclusively by the process
+        :rtype: int
+        """
         return self.memoryUSS
     
     def getMemoryRSS(self):
+        """
+        Returns the memory used by the process in RSS (Resident Set Size).
+
+        :return: The total memory (in bytes) used by the process in RAM.
+        :rtype: int
+        """
         return self.memoryRSS
     
     def getRuntime(self):
+        """
+        Returns the time taken to complete the Parquet to CSV conversion.
+
+        :return: The runtime of the conversion process in seconds.
+        :rtype: float
+        """
         return self.end - self.start
     
     def printStats(self):
+        """
+        Prints the resource usage statistics including memory consumption (USS and RSS) and the runtime.
+
+        :return: Prints memory usage and runtime to the console.
+        """
         print("Memory usage (USS):", self.memoryUSS)
         print("Memory usage (RSS):", self.memoryRSS)
         print("Runtime:", self.end - self.start)