-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpdf_to_text.py
34 lines (25 loc) · 1006 Bytes
/
pdf_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
import os
from pdfminer.high_level import extract_text
def ensure_directory_exists(path):
if not os.path.exists(path):
os.makedirs(path, exist_ok=True)
def process_pdf_to_text(input_pdf_path):
if not os.path.isfile(input_pdf_path):
print(f"Error: File {input_pdf_path} not found.")
return
# Extract text from PDF
text = extract_text(input_pdf_path)
if not text:
print("Error: Failed to extract text from PDF.")
return
# Prepare output file path
output_txt_path = os.path.splitext(input_pdf_path)[0] + '.txt'
# Save extracted text to a .txt file
with open(output_txt_path, 'w', encoding='utf-8') as text_file:
text_file.write(text)
print(f"Text extracted and saved successfully to {output_txt_path}")
if __name__ == '__main__':
# Example usage
filename = "_YAHYAGHANI_.pdf" # Replace with actual PDF file name
input_pdf_path = os.path.abspath(filename)
process_pdf_to_text(input_pdf_path)