Use logger.warn instead of warnings.warn if warning cannot be prevent…

…ed by user (#673) * Use logging.Logger.warning instead of warning.warn in most cases, following the Python official guidance that warning.warn is directed at _developers_, not users * (pdfdocument.py) remove declarations of PDFTextExtractionNotAllowedWarning, PDFNoValidXRefWarning * (pdfpage.py) Don't import warning, don't use PDFTextExtractionNotAllowedWarning * (tools/dumppdf.py) Don't import warning, don't use PDFNoValidXRefWarning * (tests/test_tools_dumppdf.py) Don't import warning, check for logging.WARN rather than PDFNoValidXRefWarning * get name right * make flake8 happy * Keep warning classes such that this does not crash code when these warnings are explictly ignored * Update changelog to include pr ref * Small textual change * Remove patch * No need for testing if the warning is actually raised. The test_tootls_dumppdf.py are just test cases if these pdfs are supported. * Use logger as name for logger * Add docs to legacy warnings * Use logger.Logger.warn for failed decompression * Add reference to docs describing when to use logger and warnings Co-authored-by: Henry S. Thompson <ht@home.hst.name> Co-authored-by: Pieter Marsman <pietermarsman@gmail.com>
pdfminer · Jan 26, 2022 · dc530f3 · dc530f3
1 parent c4ac514
commit dc530f3
Show file tree

Hide file tree

Showing 6 changed files with 27 additions and 25 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
 - Support for Python 3.4 and 3.5 ([#522](https://github.com/pdfminer/pdfminer.six/pull/522))
 - Unused dependency on `sortedcontainers` package ([#525](https://github.com/pdfminer/pdfminer.six/pull/525))
 - Support for non-standard output streams that are not binary ([#523](https://github.com/pdfminer/pdfminer.six/pull/523))
+- Replace warnings.warn with logging.Logger.warning in line with [recommended use](https://docs.python.org/3/howto/logging.html#when-to-use-logging) ([#673](https://github.com/pdfminer/pdfminer.six/pull/673))
 - Dependency on typing-extensions introduced by [#661](https://github.com/pdfminer/pdfminer.six/pull/661) ([#677](https://github.com/pdfminer/pdfminer.six/pull/677))
 
 ## [20201018]

diff --git a/pdfminer/pdfdocument.py b/pdfminer/pdfdocument.py
@@ -25,6 +25,10 @@ class PDFNoValidXRef(PDFSyntaxError):
 
 
 class PDFNoValidXRefWarning(SyntaxWarning):
+    """Legacy warning for missing xref.
+
+    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
+    """
     pass
 
 
@@ -41,10 +45,18 @@ class PDFEncryptionError(PDFException):
 
 
 class PDFEncryptionWarning(UserWarning):
+    """Legacy warning for failed decryption.
+
+    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
+    """
     pass
 
 
 class PDFTextExtractionNotAllowedWarning(UserWarning):
+    """Legacy warning for PDF that does not allow extraction.
+
+    Not used anymore because warnings.warn is replaced by logger.Logger.warn.
+    """
     pass
 
 

diff --git a/pdfminer/pdfpage.py b/pdfminer/pdfpage.py
@@ -1,7 +1,6 @@
 import logging
 from pdfminer.utils import Rect
 from typing import BinaryIO, Container, Dict, Iterator, List, Optional, Tuple
-import warnings
 from . import settings
 from .psparser import LIT
 from .pdftypes import PDFObjectNotFound
@@ -11,7 +10,6 @@
 from .pdftypes import dict_value
 from .pdfparser import PDFParser
 from .pdfdocument import PDFDocument, PDFTextExtractionNotAllowed
-from .pdfdocument import PDFTextExtractionNotAllowedWarning
 
 
 log = logging.getLogger(__name__)
@@ -155,8 +153,9 @@ def get_pages(
                 warning_msg = 'The PDF %r contains a metadata field '\
                             'indicating that it should not allow '   \
                             'text extraction. Ignoring this field '  \
-                            'and proceeding.' % fp
-                warnings.warn(warning_msg, PDFTextExtractionNotAllowedWarning)
+                            'and proceeding. Use the check_extractable ' \
+                            'if you want to raise an error in this case' % fp
+                log.warning(warning_msg)
         # Process each page contained in the document.
         for (pageno, page) in enumerate(cls.create_pages(doc)):
             if pagenos and (pageno not in pagenos):

diff --git a/pdfminer/pdftypes.py b/pdfminer/pdftypes.py
@@ -1,5 +1,4 @@
 import zlib
-import warnings
 import logging
 import io
 import sys
@@ -21,7 +20,7 @@
     from .pdfdocument import PDFDocument
 
 
-log = logging.getLogger(__name__)
+logger = logging.getLogger(__name__)
 
 LITERAL_CRYPT = LIT('Crypt')
 
@@ -205,7 +204,7 @@ def dict_value(x: object) -> Dict[Any, Any]:
     x = resolve1(x)
     if not isinstance(x, dict):
         if settings.STRICT:
-            log.error('PDFTypeError : Dict required: %r', x)
+            logger.error('PDFTypeError : Dict required: %r', x)
             raise PDFTypeError('Dict required: %r' % x)
         return {}
     return x
@@ -237,9 +236,7 @@ def decompress_corrupted(data):
     except zlib.error:
         # Let the error propagates if we're not yet in the CRC checksum
         if i < len(data) - 3:
-            # Import here to prevent circualr import
-            from .pdfdocument import PDFEncryptionWarning
-            warnings.warn("Data-loss while decompressing corrupted data", PDFEncryptionWarning)
+            logger.warning("Data-loss while decompressing corrupted data")
     return result_str
 
 

diff --git a/tests/test_tools_dumppdf.py b/tests/test_tools_dumppdf.py
@@ -1,8 +1,8 @@
-import warnings
+import unittest
+import logging
 from nose.tools import raises
 from helpers import absolute_sample_path
 from tempfilepath import TemporaryFilePath
-from pdfminer.pdfdocument import PDFNoValidXRefWarning
 from tools import dumppdf
 
 
@@ -18,12 +18,9 @@ def run(filename, options=None):
         dumppdf.main(s.split(' ')[1:])
 
 
-class TestDumpPDF():
+class TestDumpPDF(unittest.TestCase):
     def test_simple1(self):
-        """dumppdf.py simple1.pdf raises a warning because it has no xref"""
-        with warnings.catch_warnings(record=True) as ws:
-            run('simple1.pdf', '-t -a')
-            assert any(w.category == PDFNoValidXRefWarning for w in ws)
+        run('simple1.pdf', '-t -a')
 
     def test_simple2(self):
         run('simple2.pdf', '-t -a')
@@ -32,10 +29,7 @@ def test_jo(self):
         run('jo.pdf', '-t -a')
 
     def test_simple3(self):
-        """dumppdf.py simple3.pdf raises a warning because it has no xref"""
-        with warnings.catch_warnings(record=True) as ws:
-            run('simple3.pdf', '-t -a')
-            assert any(w.category == PDFNoValidXRefWarning for w in ws)
+        run('simple3.pdf', '-t -a')
 
     def test_2(self):
         run('nonfree/dmca.pdf', '-t -a')

diff --git a/tools/dumppdf.py b/tools/dumppdf.py
@@ -6,12 +6,10 @@
 import sys
 from typing import Any, Container, Dict, Iterable, List, Optional, TextIO, \
     Union, cast
-import warnings
 from argparse import ArgumentParser
 
 import pdfminer
-from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback, \
-    PDFNoValidXRefWarning
+from pdfminer.pdfdocument import PDFDocument, PDFNoOutlines, PDFXRefFallback
 from pdfminer.pdfpage import PDFPage
 from pdfminer.pdfparser import PDFParser
 from pdfminer.pdftypes import PDFObjectNotFound, PDFValueError
@@ -20,6 +18,7 @@
 from pdfminer.utils import isnumber
 
 logging.basicConfig()
+logger = logging.getLogger(__name__)
 
 ESC_PAT = re.compile(r'[\000-\037&<>()"\042\047\134\177-\377]')
 
@@ -115,7 +114,7 @@ def dumptrailers(
         msg = 'This PDF does not have an xref. Use --show-fallback-xref if ' \
               'you want to display the content of a fallback xref that ' \
               'contains all objects.'
-        warnings.warn(msg, PDFNoValidXRefWarning)
+        logger.warning(msg)
     return