diff --git a/README.md b/README.md index 6ef9904..77b260f 100644 --- a/README.md +++ b/README.md @@ -191,6 +191,20 @@ output = pypandoc.convert_file('demo.md', 'pdf', outputfile='demo.pdf', it won't work. This gotcha has to do with the way [`subprocess.Popen`](https://docs.python.org/2/library/subprocess.html#subprocess.Popen) works. +## Logging Messages + +Pypandoc logs messages using the [Python logging library](https://docs.python.org/3/library/logging.html). +By default, it will send messages to the console, including any messages +generated by Pandoc. If desired, this behaviour can be changed by adding +[handlers](https://docs.python.org/3/library/logging.html#handler-objects) to +the pypandoc logger **before calling any functions**. For example, to mute all +logging add a [null handler](https://docs.python.org/3/library/logging.handlers.html#nullhandler): + +```python +import logging +logging.getLogger('pypandoc').addHandler(logging.NullHandler()) +``` + ## Getting Pandoc Version As it can be useful sometimes to check what pandoc version is available at your system or which diff --git a/pypandoc/__init__.py b/pypandoc/__init__.py index cd27f00..cafee17 100644 --- a/pypandoc/__init__.py +++ b/pypandoc/__init__.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, with_statement +import logging import os import re import subprocess @@ -9,6 +10,7 @@ import textwrap import warnings +from .handler import _check_log_handler from .pandoc_download import DEFAULT_TARGET_FOLDER, download_pandoc from .py3compat import cast_bytes, cast_unicode, string_types, url2path, urlparse @@ -19,6 +21,9 @@ 'get_pandoc_formats', 'get_pandoc_version', 'get_pandoc_path', 'download_pandoc'] +# Set up the module level logger +logger = logging.getLogger(__name__) + def convert(source, to, format=None, extra_args=(), encoding='utf-8', outputfile=None, filters=None): @@ -69,7 +74,8 @@ def convert(source, to, format=None, extra_args=(), encoding='utf-8', def convert_text(source, to, format, extra_args=(), encoding='utf-8', - outputfile=None, filters=None, verify_format=True, sandbox=True): + outputfile=None, filters=None, verify_format=True, + sandbox=True): """Converts given `source` from `format` to `to`. :param str source: Unicode string or bytes (see encoding) @@ -109,7 +115,8 @@ def convert_text(source, to, format, extra_args=(), encoding='utf-8', def convert_file(source_file, to, format=None, extra_args=(), encoding='utf-8', - outputfile=None, filters=None, verify_format=True, sandbox=True): + outputfile=None, filters=None, verify_format=True, + sandbox=True): """Converts given `source` from `format` to `to`. :param str source_file: file path (see encoding) @@ -267,8 +274,11 @@ def _validate_formats(format, to, outputfile): return format, to -def _convert_input(source, format, input_type, to, extra_args=(), outputfile=None, - filters=None, verify_format=True, sandbox=True): +def _convert_input(source, format, input_type, to, extra_args=(), + outputfile=None, filters=None, verify_format=True, + sandbox=True): + + _check_log_handler() _ensure_pandoc_path() if verify_format: @@ -351,11 +361,56 @@ def _convert_input(source, format, input_type, to, extra_args=(), outputfile=Non raise RuntimeError( 'Pandoc died with exitcode "%s" during conversion: %s' % (p.returncode, stderr) ) + + # if there is output on stderr, process it and send to logger + if stderr: + for level, msg in _classify_pandoc_logging(stderr): + logger.log(level, msg) # if there is an outputfile, then stdout is likely empty! return stdout +def _classify_pandoc_logging(raw, default_level="WARNING"): + # Process raw and yeild the contained logging levels and messages. + # Assumes that the messages are formatted like "[LEVEL] message". If the + # first message does not have a level, use the default_level value instead. + + level_map = {"CRITICAL": 50, + "ERROR": 40, + "WARNING": 30, + "INFO": 20, + "DEBUG": 10, + "NOTSET": 0} + + msgs = raw.split("\n") + first = msgs.pop(0) + + search = re.search(r"\[(.*?)\]", first) + + # Use the default if the first message doesn't have a level + if search is None: + level = default_level + else: + level = first[search.start(1):search.end(1)] + + log_msgs = [first.replace('[{}] '.format(level), '')] + + for msg in msgs: + + search = re.search(r"\[(.*?)\]", msg) + + if search is not None: + yield level_map[level], "\n".join(log_msgs) + level = msg[search.start(1):search.end(1)] + log_msgs = [msg.replace('[{}] '.format(level), '')] + continue + + log_msgs.append(msg) + + yield level_map[level], "\n".join(log_msgs) + + def _get_base_format(format): ''' According to http://johnmacfarlane.net/pandoc/README.html#general-options, @@ -522,10 +577,13 @@ def ensure_pandoc_maximal_version(major, minor=9999): if version[0] < int(major): # if we have pandoc1 but major is request to be 2 return True return version[0] <= int(major) and version[1] <= int(minor) - -def _ensure_pandoc_path(quiet=False): - global __pandoc_path + +def _ensure_pandoc_path(): + global __pandoc_path + + _check_log_handler() + if __pandoc_path is None: included_pandoc = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files", "pandoc") @@ -567,13 +625,13 @@ def _ensure_pandoc_path(quiet=False): # print("Trying: %s" % path) try: version_string = _get_pandoc_version(path) - except Exception as e: + except Exception: # we can't use that path... if os.path.exists(path): # path exist but is not useable -> not executable? - if not quiet: - print("Found %s, but not using it because of an error:" % (path), file=sys.stderr) - print(e, file=sys.stderr) + log_msg = ("Found {}, but not using it because of an " + "error:".format(path)) + logging.exception(log_msg) continue version = [int(x) for x in version_string.split(".")] while len(version) < len(curr_version): @@ -587,58 +645,69 @@ def _ensure_pandoc_path(quiet=False): if __pandoc_path is None: # Only print hints if requested - if not quiet: - if os.path.exists('/usr/local/bin/brew'): - sys.stderr.write(textwrap.dedent("""\ - Maybe try: - - brew install pandoc - """)) - elif os.path.exists('/usr/bin/apt-get'): - sys.stderr.write(textwrap.dedent("""\ - Maybe try: - - sudo apt-get install pandoc - """)) - elif os.path.exists('/usr/bin/yum'): - sys.stderr.write(textwrap.dedent("""\ - Maybe try: - - sudo yum install pandoc - """)) - sys.stderr.write(textwrap.dedent("""\ - See http://johnmacfarlane.net/pandoc/installing.html - for installation options + if os.path.exists('/usr/local/bin/brew'): + logger.info(textwrap.dedent("""\ + Maybe try: + + brew install pandoc """)) - sys.stderr.write(textwrap.dedent("""\ - --------------------------------------------------------------- + elif os.path.exists('/usr/bin/apt-get'): + logger.info(textwrap.dedent("""\ + Maybe try: + sudo apt-get install pandoc """)) + elif os.path.exists('/usr/bin/yum'): + logger.info(textwrap.dedent("""\ + Maybe try: + + sudo yum install pandoc + """)) + logger.info(textwrap.dedent("""\ + See http://johnmacfarlane.net/pandoc/installing.html + for installation options + """)) + logger.info(textwrap.dedent("""\ + --------------------------------------------------------------- + + """)) raise OSError("No pandoc was found: either install pandoc and add it\n" "to your PATH or or call pypandoc.download_pandoc(...) or\n" "install pypandoc wheels with included pandoc.") -def ensure_pandoc_installed(url=None, targetfolder=None, version="latest", quiet=False, delete_installer=False): +def ensure_pandoc_installed(url=None, + targetfolder=None, + version="latest", + quiet=None, + delete_installer=False): """Try to install pandoc if it isn't installed. Parameters are passed to download_pandoc() :raises OSError: if pandoc cannot be installed """ + + if quiet is not None: + msg = ("The quiet flag in PyPandoc has been deprecated in favour of " + "logging. See README.md for more information.") + warnings.warn(msg, DeprecationWarning, stacklevel=2) + # Append targetfolder to the PATH environment variable so it is found by subprocesses if targetfolder is not None: os.environ["PATH"] = os.environ.get("PATH", "") + os.pathsep + os.path.abspath(os.path.expanduser(targetfolder)) try: - # Perform the test quietly if asked - _ensure_pandoc_path(quiet=quiet) + _ensure_pandoc_path() except OSError: - download_pandoc(url=url, targetfolder=targetfolder, version=version, quiet=quiet, delete_installer=delete_installer) + download_pandoc(url=url, + targetfolder=targetfolder, + version=version, + delete_installer=delete_installer) # Show errors in case of secondary failure - _ensure_pandoc_path(quiet=False) + _ensure_pandoc_path() # ----------------------------------------------------------------------------- diff --git a/pypandoc/handler.py b/pypandoc/handler.py new file mode 100644 index 0000000..8b62d89 --- /dev/null +++ b/pypandoc/handler.py @@ -0,0 +1,24 @@ + +import logging + +logger = logging.getLogger(__name__.split('.')[0]) + + +def _check_log_handler(): + + # If logger has a handler do nothing + if logger.handlers: return + + # create console handler and set level to debug + ch = logging.StreamHandler() + ch.setLevel(logging.DEBUG) + logging.root.setLevel(logging.DEBUG) + + # create formatter + formatter = logging.Formatter('[%(levelname)s] %(message)s') + + # add formatter to ch + ch.setFormatter(formatter) + + # add ch to logger + logger.addHandler(ch) diff --git a/pypandoc/pandoc_download.py b/pypandoc/pandoc_download.py index df01434..f621548 100644 --- a/pypandoc/pandoc_download.py +++ b/pypandoc/pandoc_download.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- +import logging import os import os.path import platform @@ -8,12 +9,17 @@ import subprocess import sys import tempfile +import warnings try: from urllib.request import urlopen except ImportError: from urllib import urlopen +from .handler import _check_log_handler + +logger = logging.getLogger(__name__.split('.')[0]) + DEFAULT_TARGET_FOLDER = { "win32": "~\\AppData\\Local\\Pandoc", "linux": "~/bin", @@ -64,12 +70,12 @@ def _get_pandoc_urls(version="latest"): def _make_executable(path): mode = os.stat(path).st_mode mode |= (mode & 0o444) >> 2 # copy R bits to X - print("* Making %s executeable..." % (path)) + logger.info("Making %s executeable..." % (path)) os.chmod(path, mode) def _handle_linux(filename, targetfolder): - print("* Unpacking %s to tempfolder..." % (filename)) + logger.info("Unpacking %s to tempfolder..." % (filename)) tempfolder = tempfile.mkdtemp() cur_wd = os.getcwd() @@ -87,21 +93,21 @@ def _handle_linux(filename, targetfolder): exe = "pandoc" src = os.path.join(tempfolder, "usr", "bin", exe) dst = os.path.join(targetfolder, exe) - print("* Copying %s to %s ..." % (exe, targetfolder)) + logger.info("Copying %s to %s ..." % (exe, targetfolder)) shutil.copyfile(src, dst) _make_executable(dst) exe = "pandoc-citeproc" src = os.path.join(tempfolder, "usr", "bin", exe) dst = os.path.join(targetfolder, exe) - print("* Copying %s to %s ..." % (exe, targetfolder)) + logger.info("Copying %s to %s ..." % (exe, targetfolder)) try: shutil.copyfile(src, dst) _make_executable(dst) except FileNotFoundError: - print("Didn't copy pandoc-citeproc") + logger.exception("Didn't copy pandoc-citeproc") src = os.path.join(tempfolder, "usr", "share", "doc", "pandoc", "copyright") dst = os.path.join(targetfolder, "copyright.pandoc") - print("* Copying copyright to %s ..." % (targetfolder)) + logger.info("Copying copyright to %s ..." % (targetfolder)) shutil.copyfile(src, dst) finally: os.chdir(cur_wd) @@ -109,7 +115,7 @@ def _handle_linux(filename, targetfolder): def _handle_darwin(filename, targetfolder): - print("* Unpacking %s to tempfolder..." % (filename)) + logger.info("Unpacking %s to tempfolder..." % (filename)) tempfolder = tempfile.mkdtemp() @@ -128,27 +134,27 @@ def _handle_darwin(filename, targetfolder): exe = "pandoc" src = os.path.join(pkgutilfolder, "usr", "local", "bin", exe) dst = os.path.join(targetfolder, exe) - print("* Copying %s to %s ..." % (exe, targetfolder)) + logger.info("Copying %s to %s ..." % (exe, targetfolder)) shutil.copyfile(src, dst) _make_executable(dst) exe = "pandoc-citeproc" src = os.path.join(pkgutilfolder, "usr", "local", "bin", exe) dst = os.path.join(targetfolder, exe) - print("* Copying %s to %s ..." % (exe, targetfolder)) + logger.info("Copying %s to %s ..." % (exe, targetfolder)) try: shutil.copyfile(src, dst) _make_executable(dst) except FileNotFoundError: - print("Didn't copy pandoc-citeproc") + logger.exception("Didn't copy pandoc-citeproc") # remove temporary dir shutil.rmtree(tempfolder) - print("* Done.") + logger.info("Done.") def _handle_win32(filename, targetfolder): - print("* Unpacking %s to tempfolder..." % (filename)) + logger.info("Unpacking %s to tempfolder..." % (filename)) tempfolder = tempfile.mkdtemp() @@ -161,30 +167,35 @@ def _handle_win32(filename, targetfolder): exe = "pandoc.exe" src = os.path.join(tempfolder, "Pandoc", exe) dst = os.path.join(targetfolder, exe) - print("* Copying %s to %s ..." % (exe, targetfolder)) + logger.info("Copying %s to %s ..." % (exe, targetfolder)) shutil.copyfile(src, dst) exe = "pandoc-citeproc.exe" src = os.path.join(tempfolder, "Pandoc", exe) dst = os.path.join(targetfolder, exe) - print("* Copying %s to %s ..." % (exe, targetfolder)) + logger.info("Copying %s to %s ..." % (exe, targetfolder)) try: shutil.copyfile(src, dst) except FileNotFoundError: - print("Didn't copy pandoc-citeproc.exe") + logger.exception("Didn't copy pandoc-citeproc.exe") exe = "COPYRIGHT.txt" src = os.path.join(tempfolder, "Pandoc", exe) dst = os.path.join(targetfolder, exe) - print("* Copying %s to %s ..." % (exe, targetfolder)) + logger.info("Copying %s to %s ..." % (exe, targetfolder)) shutil.copyfile(src, dst) # remove temporary dir shutil.rmtree(tempfolder) - print("* Done.") + logger.info("Done.") -def download_pandoc(url=None, targetfolder=None, version="latest", quiet=False, delete_installer=False, download_folder=None): +def download_pandoc(url=None, + targetfolder=None, + version="latest", + quiet=None, + delete_installer=False, + download_folder=None): """Download and unpack pandoc Downloads prebuild binaries for pandoc from `url` and unpacks it into @@ -202,8 +213,13 @@ def download_pandoc(url=None, targetfolder=None, version="latest", quiet=False, :param str download_folder: Directory, where the installer should download files before unpacking to the target folder. If no `download_folder` is given, uses the current directory. example: `/tmp/`, `/tmp` """ - if quiet: - sys.stdout = open(os.devnull, 'w') + + if quiet is not None: + msg = ("The quiet flag in PyPandoc has been deprecated in favour of " + "logging. See README.md for more information.") + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + _check_log_handler() pf = sys.platform @@ -230,9 +246,9 @@ def download_pandoc(url=None, targetfolder=None, version="latest", quiet=False, filename = os.path.join(os.path.expanduser(download_folder), filename) if os.path.isfile(filename): - print("* Using already downloaded file %s" % (filename)) + logger.info("Using already downloaded file %s" % (filename)) else: - print("* Downloading pandoc from %s ..." % url) + logger.info("Downloading pandoc from %s ..." % url) # https://stackoverflow.com/questions/30627937/tracebaclk-attributeerroraddinfourl-instance-has-no-attribute-exit response = urlopen(url) with open(filename, 'wb') as out_file: @@ -254,5 +270,3 @@ def download_pandoc(url=None, targetfolder=None, version="latest", quiet=False, unpack(filename, targetfolder) if delete_installer: os.remove(filename) - if quiet: - sys.stdout = sys.__stdout__ diff --git a/tests.py b/tests.py index 68cdbb3..916a24d 100755 --- a/tests.py +++ b/tests.py @@ -3,7 +3,9 @@ import contextlib import io +import logging import os +import re import shutil import subprocess import sys @@ -15,6 +17,17 @@ from pypandoc.py3compat import path2url, string_types, unicode_type +@contextlib.contextmanager +def capture(command, *args, **kwargs): + err, sys.stderr = sys.stderr, io.StringIO() + try: + command(*args, **kwargs) + sys.stderr.seek(0) + yield sys.stderr.read() + finally: + sys.stderr = err + + @contextlib.contextmanager def closed_tempfile(suffix, text=None, dir_name=None): file_name = None @@ -274,6 +287,77 @@ def test_conversion_with_empty_filter(self): found = re.search(r'10.1038', written) self.assertTrue(found is None) + + def test_classify_pandoc_logging(self): + + test = ("[WARNING] This is some message on\ntwo lines\n" + "[ERROR] This is a second message.") + + expected_levels = [30, 40] + expected_msgs = ["This is some message on\ntwo lines", + "This is a second message."] + + for i, (l, m) in enumerate(pypandoc._classify_pandoc_logging(test)): + self.assertEqual(expected_levels[i], l) + self.assertEqual(expected_msgs[i], m) + + + def test_classify_pandoc_logging_default(self): + + test = ("This is some message on\ntwo lines\n" + "[ERROR] This is a second message.") + expected_levels = [30, 40] + expected_msgs = ["This is some message on\ntwo lines", + "This is a second message."] + + for i, (l, m) in enumerate(pypandoc._classify_pandoc_logging(test)): + self.assertEqual(expected_levels[i], l) + self.assertEqual(expected_msgs[i], m) + + + def test_conversion_stderr(self): + + # Clear logger handlers + logger = logging.getLogger("pypandoc") + logger.handlers = [] + + with closed_tempfile('.docx') as file_name: + text = ('![Mock](missing.png)\n' + '![Mock](missing.png)\n') + with capture(pypandoc.convert_text, + text, + to='docx', + format='md', + outputfile=file_name) as output: + output = re.sub(r'\r', '', output) + output = output.replace("'missing.png'", + "missing.png") + expected = (u'[WARNING] Could not fetch resource ' + u'missing.png: PandocResourceNotFound ' + u'"missing.png"\n' + u'[WARNING] Could not fetch resource ' + u'missing.png: PandocResourceNotFound ' + u'"missing.png"\n\n') + self.assertEqual(expected, output) + + + def test_conversion_stderr_nullhandler(self): + + # Replace any logging handlers with a null handler + logger = logging.getLogger("pypandoc") + logger.handlers = [logging.NullHandler()] + + with closed_tempfile('.docx') as file_name: + text = ('![Mock](missing.png)\n' + '![Mock](missing.png)\n') + with capture(pypandoc.convert_text, + text, + to='docx', + format='md', + outputfile=file_name) as output: + self.assertFalse(output) + + def test_conversion_error(self): # pandoc dies on wrong commandline arguments def f():