Skip to content

Commit

Permalink
fix(pdf-text): add error management
Browse files Browse the repository at this point in the history
  • Loading branch information
leogail committed Sep 23, 2024
1 parent 3d8ed2c commit 2f72145
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 22 deletions.
18 changes: 7 additions & 11 deletions services/pdf-text/v1/pdf-raw.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,7 @@

def convert_pdf_to_xml(input_path):
result = subprocess.run(['pdftohtml','-xml', '-stdout','-hidden','-i','-q','-f',p, input_path], capture_output=True, text=True)

# Check if conversion was successful
if result.returncode == 0:
# Get the XML content in a variable
xml_content = result.stdout
return xml_content
else:
# print error message if needed
sys.stderr.write(f"Conversion of PDF {input_path} to XML has failed.")
sys.stderr.write("\n")
return None
return result.stdout


def remove_xml_tags(xml_string):
Expand Down Expand Up @@ -212,6 +202,12 @@ def get_alphabetic_numeric_ratio(chaine):
except OSError :
#print("Erreur lors de la suppression du fichier PDF:", e)
line0['value']="Erreur lors de la suppression du fichier PDF"

except Exception :
line0['value']="Erreur lors de la conversion du PDF en texte"

if line0['value']=="":
line0['value']="Erreur lors de la conversion du PDF en texte"

sys.stdout.write(json.dumps(line0))
sys.stdout.write('\n')
18 changes: 7 additions & 11 deletions services/pdf-text/v1/pdf2txt.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,7 @@

def convert_pdf_to_xml(input_path):
result = subprocess.run(['pdftohtml','-xml', '-stdout','-hidden','-i','-q','-f',p, input_path], capture_output=True, text=True)

# Check if conversion was successful
if result.returncode == 0:
# Get the XML content in a variable
xml_content = result.stdout
return xml_content
else:
# print error message if needed
sys.stderr.write(f"Conversion of PDF {input_path} to XML has failed.")
sys.stderr.write("\n")
return None
return result.stdout


def remove_xml_tags(xml_string):
Expand Down Expand Up @@ -226,6 +216,12 @@ def get_alphabetic_numeric_ratio(chaine):
except OSError :
#print("Erreur lors de la suppression du fichier PDF:", e)
line0['value']="Erreur lors de la suppression du fichier PDF"

except Exception :
line0['value']="Erreur lors de la conversion du PDF en texte"

if line0['value']=="":
line0['value']="Erreur lors de la conversion du PDF en texte"

sys.stdout.write(json.dumps(line0))
sys.stdout.write('\n')

0 comments on commit 2f72145

Please sign in to comment.