From 4ffb9671de4c623b4c9143303a150e5d175bfa19 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Thu, 26 Mar 2020 11:27:15 +1100 Subject: [PATCH 1/6] Include link to the cell with an error, in failure heading Papermill currently inserts an error message "An Exception was encountered at 'In [NNN]'" at the top of a notebook that fails to execute, where NNN is the cell execution count of the failing cell. This patch expands the "In [NNN]" text to be a link to the relevant cell, to make it easy to jump to. This works by inserting another markdown cell immediately before the failing cell, where the markdown cell contains an HTML anchor (specifically, `id="papermill-error-cell"`). The heading then links to it with a `#papermill-error-cell` fragment HREF. This helps reducing the scrolling and searching required for long notebooks where the error cell might be one of dozens, or hidden among long output. For an example of the latter, see . --- papermill/exceptions.py | 3 +- papermill/execute.py | 22 ++++++++++--- papermill/tests/notebooks/broken1.ipynb | 21 ++++++++++++ papermill/tests/test_execute.py | 44 +++++++++++++++++-------- 4 files changed, 71 insertions(+), 19 deletions(-) diff --git a/papermill/exceptions.py b/papermill/exceptions.py index 38a81687..ac3c49f1 100644 --- a/papermill/exceptions.py +++ b/papermill/exceptions.py @@ -22,7 +22,8 @@ class PapermillMissingParameterException(PapermillException): class PapermillExecutionError(PapermillException): """Raised when an exception is encountered in a notebook.""" - def __init__(self, exec_count, source, ename, evalue, traceback): + def __init__(self, cell_index, exec_count, source, ename, evalue, traceback): + self.cell_index = cell_index self.exec_count = exec_count self.source = source self.ename = ename diff --git a/papermill/execute.py b/papermill/execute.py index dd5853d8..17bfe527 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -146,7 +146,13 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): ERROR_MESSAGE_TEMPLATE = ( '' - "An Exception was encountered at 'In [%s]'." + "An Exception was encountered at 'In [%s]'." + '' +) + +ERROR_ANCHOR_MSG = ( + '' + 'Papermill encountered exception here:' '' ) @@ -162,7 +168,7 @@ def raise_for_execution_errors(nb, output_path): Path to write executed notebook """ error = None - for cell in nb.cells: + for index, cell in enumerate(nb.cells): if cell.get("outputs") is None: continue @@ -171,6 +177,7 @@ def raise_for_execution_errors(nb, output_path): if output.ename == "SystemExit" and (output.evalue == "" or output.evalue == "0"): continue error = PapermillExecutionError( + cell_index=index, exec_count=cell.execution_count, source=cell.source, ename=output.ename, @@ -180,9 +187,16 @@ def raise_for_execution_errors(nb, output_path): break if error: - # Write notebook back out with the Error Message at the top of the Notebook. + # Write notebook back out with the Error Message at the top of the Notebook, and a link to + # the relevant cell (by adding a note just before the failure with an HTML anchor) error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count) error_msg_cell = nbformat.v4.new_markdown_cell(error_msg) - nb.cells = [error_msg_cell] + nb.cells + error_anchor_cell = nbformat.v4.new_markdown_cell(ERROR_ANCHOR_MSG) + + # put the anchor before the cell with the error, before all the indices change due to the + # heading-prepending + nb.cells.insert(error.cell_index, error_anchor_cell) + nb.cells.insert(0, error_msg_cell) + write_ipynb(nb, output_path) raise error diff --git a/papermill/tests/notebooks/broken1.ipynb b/papermill/tests/notebooks/broken1.ipynb index b24dcba6..4da9470e 100644 --- a/papermill/tests/notebooks/broken1.ipynb +++ b/papermill/tests/notebooks/broken1.ipynb @@ -1,5 +1,12 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A markdown cell that makes the execution counts different to indices within the list of all cells." + ] + }, { "cell_type": "code", "execution_count": null, @@ -41,6 +48,20 @@ "print(\"We're good.\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another markdown cell" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "A third one." + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/papermill/tests/test_execute.py b/papermill/tests/test_execute.py index f7e290b1..4d43f108 100644 --- a/papermill/tests/test_execute.py +++ b/papermill/tests/test_execute.py @@ -149,11 +149,19 @@ def test(self): execute_notebook(path, result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r"^$") - self.assertEqual(nb.cells[1].execution_count, 1) - self.assertEqual(nb.cells[2].execution_count, 2) - self.assertEqual(nb.cells[2].outputs[0].output_type, 'error') - self.assertEqual(nb.cells[3].execution_count, None) + self.assertRegex(nb.cells[0].source, r'^$') + + self.assertEqual(nb.cells[1].cell_type, "markdown") + self.assertEqual(nb.cells[2].execution_count, 1) + self.assertEqual(nb.cells[3].cell_type, "markdown") + self.assertEqual(nb.cells[4].cell_type, "markdown") + + self.assertEqual(nb.cells[5].cell_type, "markdown") + self.assertRegex(nb.cells[5].source, '') + self.assertEqual(nb.cells[6].execution_count, 2) + self.assertEqual(nb.cells[6].outputs[0].output_type, 'error') + + self.assertEqual(nb.cells[7].execution_count, None) class TestBrokenNotebook2(unittest.TestCase): @@ -170,12 +178,16 @@ def test(self): execute_notebook(path, result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r"^$") + self.assertRegex(nb.cells[0].source, r'^.*In \[2\].*$') self.assertEqual(nb.cells[1].execution_count, 1) - self.assertEqual(nb.cells[2].execution_count, 2) - self.assertEqual(nb.cells[2].outputs[0].output_type, 'display_data') - self.assertEqual(nb.cells[2].outputs[1].output_type, 'error') - self.assertEqual(nb.cells[3].execution_count, None) + + self.assertEqual(nb.cells[2].cell_type, "markdown") + self.assertRegex(nb.cells[2].source, '') + self.assertEqual(nb.cells[3].execution_count, 2) + self.assertEqual(nb.cells[3].outputs[0].output_type, 'display_data') + self.assertEqual(nb.cells[3].outputs[1].output_type, 'error') + + self.assertEqual(nb.cells[4].execution_count, None) class TestReportMode(unittest.TestCase): @@ -284,8 +296,12 @@ def test_sys_exit1(self): execute_notebook(get_notebook_path(notebook_name), result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r"^$") + self.assertRegex(nb.cells[0].source, r'^$') self.assertEqual(nb.cells[1].execution_count, 1) - self.assertEqual(nb.cells[2].execution_count, 2) - self.assertEqual(nb.cells[2].outputs[0].output_type, 'error') - self.assertEqual(nb.cells[3].execution_count, None) + + self.assertEqual(nb.cells[2].cell_type, "markdown") + self.assertRegex(nb.cells[2].source, '') + self.assertEqual(nb.cells[3].execution_count, 2) + self.assertEqual(nb.cells[3].outputs[0].output_type, 'error') + + self.assertEqual(nb.cells[4].execution_count, None) From 5ca839b7d0823539222906cb989136d0d2233abe Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Mon, 30 Mar 2020 16:41:22 +1100 Subject: [PATCH 2/6] Make lines shorter --- papermill/execute.py | 5 +++-- papermill/tests/test_execute.py | 15 ++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/papermill/execute.py b/papermill/execute.py index 17bfe527..9cea2858 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -144,14 +144,15 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): return nb +ERROR_STYLE = 'style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;"' ERROR_MESSAGE_TEMPLATE = ( - '' + '' "An Exception was encountered at 'In [%s]'." '' ) ERROR_ANCHOR_MSG = ( - '' + '' 'Papermill encountered exception here:' '' ) diff --git a/papermill/tests/test_execute.py b/papermill/tests/test_execute.py index 4d43f108..207db4c7 100644 --- a/papermill/tests/test_execute.py +++ b/papermill/tests/test_execute.py @@ -149,7 +149,10 @@ def test(self): execute_notebook(path, result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r'^$') + self.assertRegex( + nb.cells[0].source, + r'^$' + ) self.assertEqual(nb.cells[1].cell_type, "markdown") self.assertEqual(nb.cells[2].execution_count, 1) @@ -178,7 +181,10 @@ def test(self): execute_notebook(path, result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r'^.*In \[2\].*$') + self.assertRegex( + nb.cells[0].source, + r'^.*In \[2\].*$' + ) self.assertEqual(nb.cells[1].execution_count, 1) self.assertEqual(nb.cells[2].cell_type, "markdown") @@ -296,7 +302,10 @@ def test_sys_exit1(self): execute_notebook(get_notebook_path(notebook_name), result_path) nb = load_notebook_node(result_path) self.assertEqual(nb.cells[0].cell_type, "markdown") - self.assertRegex(nb.cells[0].source, r'^$') + self.assertRegex( + nb.cells[0].source, + r'^$' + ) self.assertEqual(nb.cells[1].execution_count, 1) self.assertEqual(nb.cells[2].cell_type, "markdown") From 49dbce4c6140e9cc14eade06a63fd97ff3e77fd3 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Mon, 30 Mar 2020 17:16:43 +1100 Subject: [PATCH 3/6] Wrap the ERROR_STYLE line too --- papermill/execute.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/papermill/execute.py b/papermill/execute.py index 9cea2858..a2d7eef2 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -144,7 +144,9 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): return nb -ERROR_STYLE = 'style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;"' +ERROR_STYLE = ( + 'style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;"' +) ERROR_MESSAGE_TEMPLATE = ( '' "An Exception was encountered at 'In [%s]'." From c0b761ccc73680e0cb2d6f1fcfed215450f32c45 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 1 Apr 2020 08:57:34 +1100 Subject: [PATCH 4/6] Update papermill/execute.py Co-Authored-By: Matthew Seal --- papermill/execute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/papermill/execute.py b/papermill/execute.py index a2d7eef2..59c2faee 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -155,7 +155,7 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): ERROR_ANCHOR_MSG = ( '' - 'Papermill encountered exception here:' + 'Execution encountered an exception here and stopped:' '' ) From bd759c43c408258dfff3beaa1f0a34a9925a47ca Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Wed, 1 Apr 2020 09:32:40 +1100 Subject: [PATCH 5/6] Remove left-over error marker cells when re-executing a notebook --- papermill/execute.py | 17 +++++++++++++++++ papermill/tests/notebooks/broken1.ipynb | 22 ++++++++++++++++++++++ papermill/tests/test_execute.py | 17 +++++++++++++++++ 3 files changed, 56 insertions(+) diff --git a/papermill/execute.py b/papermill/execute.py index 59c2faee..78560bf0 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -83,6 +83,8 @@ def execute_notebook( nb = parameterize_notebook(nb, parameters, report_mode) nb = prepare_notebook_metadata(nb, input_path, output_path, report_mode) + # clear out any existing error markers from previous papermill runs + nb = remove_error_markers(nb) if not prepare_only: # Fetch the kernel name if it's not supplied @@ -144,9 +146,12 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): return nb +ERROR_MARKER_TAG = "papermill-error-cell-tag" + ERROR_STYLE = ( 'style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;"' ) + ERROR_MESSAGE_TEMPLATE = ( '' "An Exception was encountered at 'In [%s]'." @@ -160,6 +165,16 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): ) +def remove_error_markers(nb): + nb = copy.deepcopy(nb) + nb.cells = [ + cell + for cell in nb.cells + if ERROR_MARKER_TAG not in cell.metadata.get("tags", []) + ] + return nb + + def raise_for_execution_errors(nb, output_path): """Assigned parameters into the appropriate place in the input notebook @@ -194,7 +209,9 @@ def raise_for_execution_errors(nb, output_path): # the relevant cell (by adding a note just before the failure with an HTML anchor) error_msg = ERROR_MESSAGE_TEMPLATE % str(error.exec_count) error_msg_cell = nbformat.v4.new_markdown_cell(error_msg) + error_msg_cell.metadata['tags'] = [ERROR_MARKER_TAG] error_anchor_cell = nbformat.v4.new_markdown_cell(ERROR_ANCHOR_MSG) + error_anchor_cell.metadata['tags'] = [ERROR_MARKER_TAG] # put the anchor before the cell with the error, before all the indices change due to the # heading-prepending diff --git a/papermill/tests/notebooks/broken1.ipynb b/papermill/tests/notebooks/broken1.ipynb index 4da9470e..5b8f5e94 100644 --- a/papermill/tests/notebooks/broken1.ipynb +++ b/papermill/tests/notebooks/broken1.ipynb @@ -1,5 +1,16 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "papermill-error-cell-tag" + ] + }, + "source": [ + "An Exception was encountered at 'In [1]'." + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -7,6 +18,17 @@ "A markdown cell that makes the execution counts different to indices within the list of all cells." ] }, + { + "cell_type": "markdown", + "metadata": { + "tags": [ + "papermill-error-cell-tag" + ] + }, + "source": [ + "Execution encountered an exception here and stopped:" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/papermill/tests/test_execute.py b/papermill/tests/test_execute.py index 207db4c7..1872ec38 100644 --- a/papermill/tests/test_execute.py +++ b/papermill/tests/test_execute.py @@ -144,6 +144,15 @@ def tearDown(self): def test(self): path = get_notebook_path('broken1.ipynb') + + # check that the notebook has two existing marker cells, so that this test is sure to be + # validating the removal logic (the markers are simulatin an error in the first code cell + # that has since been fixed) + original_nb = load_notebook_node(path) + self.assertEqual(original_nb.cells[0].metadata["tags"], ["papermill-error-cell-tag"]) + self.assertIn("In [1]", original_nb.cells[0].source) + self.assertEqual(original_nb.cells[2].metadata["tags"], ["papermill-error-cell-tag"]) + result_path = os.path.join(self.test_dir, 'broken1.ipynb') with self.assertRaises(PapermillExecutionError): execute_notebook(path, result_path) @@ -153,6 +162,7 @@ def test(self): nb.cells[0].source, r'^$' ) + self.assertEqual(nb.cells[0].metadata["tags"], ["papermill-error-cell-tag"]) self.assertEqual(nb.cells[1].cell_type, "markdown") self.assertEqual(nb.cells[2].execution_count, 1) @@ -161,11 +171,18 @@ def test(self): self.assertEqual(nb.cells[5].cell_type, "markdown") self.assertRegex(nb.cells[5].source, '') + self.assertEqual(nb.cells[5].metadata["tags"], ["papermill-error-cell-tag"]) self.assertEqual(nb.cells[6].execution_count, 2) self.assertEqual(nb.cells[6].outputs[0].output_type, 'error') self.assertEqual(nb.cells[7].execution_count, None) + # double check the removal (the new cells above should be the only two tagged ones) + self.assertEqual( + sum("papermill-error-cell-tag" in cell.metadata.get("tags", []) for cell in nb.cells), + 2 + ) + class TestBrokenNotebook2(unittest.TestCase): def setUp(self): From d752f4edf17687b3be4926aaf6eba599752489e8 Mon Sep 17 00:00:00 2001 From: Huon Wilson Date: Thu, 2 Apr 2020 11:57:57 +1100 Subject: [PATCH 6/6] Update papermill/execute.py --- papermill/execute.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/papermill/execute.py b/papermill/execute.py index 78560bf0..1c14760d 100644 --- a/papermill/execute.py +++ b/papermill/execute.py @@ -160,7 +160,7 @@ def prepare_notebook_metadata(nb, input_path, output_path, report_mode=False): ERROR_ANCHOR_MSG = ( '' - 'Execution encountered an exception here and stopped:' + 'Execution using papermill encountered an exception here and stopped:' '' )