Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prefer logging to warning #673

Merged
merged 14 commits into from
Jan 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
- Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677))

## [20201018]
Expand Down
12 changes: 12 additions & 0 deletions pdfminer/pdfdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class PDFNoValidXRef(PDFSyntaxError):


class PDFNoValidXRefWarning(SyntaxWarning):
"""Legacy warning for missing xref.

Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass


Expand All @@ -41,10 +45,18 @@ class PDFEncryptionError(PDFException):


class PDFEncryptionWarning(UserWarning):
"""Legacy warning for failed decryption.

Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass


class PDFTextExtractionNotAllowedWarning(UserWarning):
"""Legacy warning for PDF that does not allow extraction.

Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass


Expand Down
7 changes: 3 additions & 4 deletions pdfminer/pdfpage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from pdfminer.utils import Rect
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
import warnings
from . import settings
from .psparser import LIT
from .pdftypes import PDFObjectNotFound
Expand All @@ -11,7 +10,6 @@
from .pdftypes import dict_value
from .pdfparser import PDFParser
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
from .pdfdocument import PDFTextExtractionNotAllowedWarning


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -155,8 +153,9 @@ def get_pages(
warning_msg = 'The PDF %r contains a metadata field '\
'indicating that it should not allow ' \
'text extraction. Ignoring this field ' \
'and proceeding.' % fp
warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning)
'and proceeding. Use the check_extractable ' \
'if you want to raise an error in this case' % fp
log.warning(warning_msg)
# Process each page contained in the document.
for (pageno, page) in enumerate(cls.create_pages(doc)):
if pagenos and (pageno not in pagenos):
Expand Down
9 changes: 3 additions & 6 deletions pdfminer/pdftypes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import zlib
import warnings
import logging
import io
import sys
Expand All @@ -21,7 +20,7 @@
from .pdfdocument import PDFDocument


log = logging.getLogger(__name__)
logger = logging.getLogger(__name__)

LITERAL_CRYPT = LIT('Crypt')

Expand Down Expand Up @@ -204,7 +203,7 @@ def dict_value(x: object) -> Dict[Any, Any]:
x = resolve1(x)
if not isinstance(x, dict):
if settings.STRICT:
log.error('PDFTypeError : Dict required: %r', x)
logger.error('PDFTypeError : Dict required: %r', x)
raise PDFTypeError('Dict required: %r' % x)
return {}
return x
Expand Down Expand Up @@ -236,9 +235,7 @@ def decompress_corrupted(data):
except zlib.error:
# Let the error propagates if we're not yet in the CRC checksum
if i < len(data) - 3:
# Import here to prevent circualr import
from .pdfdocument import PDFEncryptionWarning
warnings.warn("Data-loss while decompressing corrupted data", PDFEncryptionWarning)
logger.warning("Data-loss while decompressing corrupted data")
return result_str


Expand Down
16 changes: 5 additions & 11 deletions tests/test_tools_dumppdf.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import warnings
import unittest
import logging
from nose.tools import raises
from helpers import absolute_sample_path
from tempfilepath import TemporaryFilePath
from pdfminer.pdfdocument import PDFNoValidXRefWarning
from tools import dumppdf


Expand All @@ -18,12 +18,9 @@ def run(filename, options=None):
dumppdf.main(s.split(' ')[1:])


class TestDumpPDF():
class TestDumpPDF(unittest.TestCase):
def test_simple1(self):
"""dumppdf.py simple1.pdf raises a warning because it has no xref"""
with warnings.catch_warnings(record=True) as ws:
run('simple1.pdf', '-t -a')
assert any(w.category == PDFNoValidXRefWarning for w in ws)
run('simple1.pdf', '-t -a')

def test_simple2(self):
run('simple2.pdf', '-t -a')
Expand All @@ -32,10 +29,7 @@ def test_jo(self):
run('jo.pdf', '-t -a')

def test_simple3(self):
"""dumppdf.py simple3.pdf raises a warning because it has no xref"""
with warnings.catch_warnings(record=True) as ws:
run('simple3.pdf', '-t -a')
assert any(w.category == PDFNoValidXRefWarning for w in ws)
run('simple3.pdf', '-t -a')

def test_2(self):
run('nonfree/dmca.pdf', '-t -a')
Expand Down
7 changes: 3 additions & 4 deletions tools/dumppdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
import sys
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
Union, cast
import warnings
from argparse import ArgumentParser

import pdfminer
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback, \
PDFNoValidXRefWarning
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
Expand All @@ -20,6 +18,7 @@
from pdfminer.utils import isnumber

logging.basicConfig()
logger = logging.getLogger(__name__)

ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')

Expand Down Expand Up @@ -115,7 +114,7 @@ def dumptrailers(
msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
'you want to display the content of a fallback xref that ' \
'contains all objects.'
warnings.warn(msg, PDFNoValidXRefWarning)
logger.warning(msg)
return


Expand Down