Skip to content

Commit

Permalink
Use logger.warn instead of warnings.warn if warning cannot be prevent…
Browse files Browse the repository at this point in the history
…ed by user (#673)

* Use logging.Logger.warning instead of warning.warn in most cases, following
 the Python official guidance that warning.warn is directed at _developers_,
 not users

 * (pdfdocument.py) remove declarations of PDFTextExtractionNotAllowedWarning,
			PDFNoValidXRefWarning

 * (pdfpage.py) Don't import warning, don't use PDFTextExtractionNotAllowedWarning

 * (tools/dumppdf.py) Don't import warning, don't use PDFNoValidXRefWarning

 * (tests/test_tools_dumppdf.py) Don't import warning, check for logging.WARN rather
				  than PDFNoValidXRefWarning

* get name right

* make flake8 happy

* Keep warning classes such that this does not crash code when these warnings are explictly ignored

* Update changelog to include pr ref

* Small textual change

* Remove patch

* No need for testing if the warning is actually raised. The test_tootls_dumppdf.py are just test cases if these pdfs are supported.

* Use logger as name for logger

* Add docs to legacy warnings

* Use logger.Logger.warn for failed decompression

* Add reference to docs describing when to use logger and warnings

Co-authored-by: Henry S. Thompson <ht@home.hst.name>
Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>
  • Loading branch information
3 people authored Jan 26, 2022
1 parent c4ac514 commit dc530f3
Show file tree
Hide file tree
Showing 6 changed files with 27 additions and 25 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
- Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
- Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
- Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
- Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677))

## [20201018]
Expand Down
12 changes: 12 additions & 0 deletions pdfminer/pdfdocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,10 @@ class PDFNoValidXRef(PDFSyntaxError):


class PDFNoValidXRefWarning(SyntaxWarning):
"""Legacy warning for missing xref.
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass


Expand All @@ -41,10 +45,18 @@ class PDFEncryptionError(PDFException):


class PDFEncryptionWarning(UserWarning):
"""Legacy warning for failed decryption.
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass


class PDFTextExtractionNotAllowedWarning(UserWarning):
"""Legacy warning for PDF that does not allow extraction.
Not used anymore because warnings.warn is replaced by logger.Logger.warn.
"""
pass


Expand Down
7 changes: 3 additions & 4 deletions pdfminer/pdfpage.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import logging
from pdfminer.utils import Rect
from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
import warnings
from . import settings
from .psparser import LIT
from .pdftypes import PDFObjectNotFound
Expand All @@ -11,7 +10,6 @@
from .pdftypes import dict_value
from .pdfparser import PDFParser
from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
from .pdfdocument import PDFTextExtractionNotAllowedWarning


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -155,8 +153,9 @@ def get_pages(
warning_msg = 'The PDF %r contains a metadata field '\
'indicating that it should not allow ' \
'text extraction. Ignoring this field ' \
'and proceeding.' % fp
warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning)
'and proceeding. Use the check_extractable ' \
'if you want to raise an error in this case' % fp
log.warning(warning_msg)
# Process each page contained in the document.
for (pageno, page) in enumerate(cls.create_pages(doc)):
if pagenos and (pageno not in pagenos):
Expand Down
9 changes: 3 additions & 6 deletions pdfminer/pdftypes.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import zlib
import warnings
import logging
import io
import sys
Expand All @@ -21,7 +20,7 @@
from .pdfdocument import PDFDocument


log = logging.getLogger(__name__)
logger = logging.getLogger(__name__)

LITERAL_CRYPT = LIT('Crypt')

Expand Down Expand Up @@ -205,7 +204,7 @@ def dict_value(x: object) -> Dict[Any, Any]:
x = resolve1(x)
if not isinstance(x, dict):
if settings.STRICT:
log.error('PDFTypeError : Dict required: %r', x)
logger.error('PDFTypeError : Dict required: %r', x)
raise PDFTypeError('Dict required: %r' % x)
return {}
return x
Expand Down Expand Up @@ -237,9 +236,7 @@ def decompress_corrupted(data):
except zlib.error:
# Let the error propagates if we're not yet in the CRC checksum
if i < len(data) - 3:
# Import here to prevent circualr import
from .pdfdocument import PDFEncryptionWarning
warnings.warn("Data-loss while decompressing corrupted data", PDFEncryptionWarning)
logger.warning("Data-loss while decompressing corrupted data")
return result_str


Expand Down
16 changes: 5 additions & 11 deletions tests/test_tools_dumppdf.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import warnings
import unittest
import logging
from nose.tools import raises
from helpers import absolute_sample_path
from tempfilepath import TemporaryFilePath
from pdfminer.pdfdocument import PDFNoValidXRefWarning
from tools import dumppdf


Expand All @@ -18,12 +18,9 @@ def run(filename, options=None):
dumppdf.main(s.split(' ')[1:])


class TestDumpPDF():
class TestDumpPDF(unittest.TestCase):
def test_simple1(self):
"""dumppdf.py simple1.pdf raises a warning because it has no xref"""
with warnings.catch_warnings(record=True) as ws:
run('simple1.pdf', '-t -a')
assert any(w.category == PDFNoValidXRefWarning for w in ws)
run('simple1.pdf', '-t -a')

def test_simple2(self):
run('simple2.pdf', '-t -a')
Expand All @@ -32,10 +29,7 @@ def test_jo(self):
run('jo.pdf', '-t -a')

def test_simple3(self):
"""dumppdf.py simple3.pdf raises a warning because it has no xref"""
with warnings.catch_warnings(record=True) as ws:
run('simple3.pdf', '-t -a')
assert any(w.category == PDFNoValidXRefWarning for w in ws)
run('simple3.pdf', '-t -a')

def test_2(self):
run('nonfree/dmca.pdf', '-t -a')
Expand Down
7 changes: 3 additions & 4 deletions tools/dumppdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
import sys
from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
Union, cast
import warnings
from argparse import ArgumentParser

import pdfminer
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback, \
PDFNoValidXRefWarning
from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
from pdfminer.pdfpage import PDFPage
from pdfminer.pdfparser import PDFParser
from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
Expand All @@ -20,6 +18,7 @@
from pdfminer.utils import isnumber

logging.basicConfig()
logger = logging.getLogger(__name__)

ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')

Expand Down Expand Up @@ -115,7 +114,7 @@ def dumptrailers(
msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
'you want to display the content of a fallback xref that ' \
'contains all objects.'
warnings.warn(msg, PDFNoValidXRefWarning)
logger.warning(msg)
return


Expand Down

0 comments on commit dc530f3

Please sign in to comment.