Skip to content

Commit

Permalink
Implement execution dependency extension.
Browse files Browse the repository at this point in the history
  • Loading branch information
benelot committed Jan 29, 2018
1 parent a4544ec commit 627b3cc
Show file tree
Hide file tree
Showing 3 changed files with 143 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
execution_dependencies
======================

Writing extensive notebooks can become very complicated since many cells act as stepping stones to produce intermediate results for later cells. Thus, it becomes tedious to
keep track of the cells that have to be run in order to run a certain cell. This extension simplifies handling the execution dependencies by introducing tag annotations to
identify each cell and indicate a dependency on others. This improves on the current state which requires remembering all dependencies by heart or annotating the cells in the comments.

The two annotations are added to the tags of a cell and are as follows:

* add a hashmark (#) and an identification tag to the tags to identify a cell (e.g. #initializer-cell).
* add an arrow (=>) and an identification tag to the tags to add a dependency on a certain cell (e.g. =>initializer-cell).

Based on these dependencies, the kernel will now execute the dependencies before the cell that depends on them. If the cell's dependencies have further dependencies, these will in turn
be executed before them. In conclusion, the kernel looks through the tree of dependencies of the cell executed by the user and executes its dependencies in their appropriate order,
then executes the cell.

A more extensive example is described below:

A cell A has the identifier #A.

| Cell A [tags: #A] |
| ------------- |
| Content Cell |
| Content Cell |


A cell B has the identifier #B and depends on A (=>A).


| Cell B [tags: #B, =>A] |
| ------------- |
| Content Cell |
| Content Cell |

If the user runs A, only A is executed, since it has no dependencies. On the other hand, if the user runs B, the kernel finds the dependency on A, and thus first runs A and then runs B.

Running a cell C that is dependent on B and on A as well, the kernel then first runs A and then runs B before running C, avoiding to run cell A twice.

Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
define([
'base/js/namespace',
'notebook/js/codecell'
], function (
Jupyter,
codecell
) {
"use strict";

var CodeCell = codecell.CodeCell;

return {
load_ipython_extension: function () {
console.log('[exec_deps] patching CodeCell.execute');
var orig_execute = codecell.CodeCell.prototype.execute; // get original cell execute function
CodeCell.prototype.execute = function (stop_on_error) {
var root_tags = this.metadata.tags || [];
if(root_tags != [] && root_tags.some(tag => /=>.*/.test(tag))) { // if the root cell contains any dependencies, resolve dependency tree...
var root_cell = this;
var identified_cells = Jupyter.notebook.get_cells().filter(function (cell, idx, cells) { // ...get all cells which have at least one id (these are the only ones we could have in deps)
var tags = cell.metadata.tags || [];
return (cell === root_cell || tags.some(tag => /#.*/.test(tag)));
});

console.log('Collect ids and dependencies...');
var cell_map = {}
var dep_graph = {}
identified_cells.forEach(function (cell) { // ...get all identified cells (the ones that have at least one #tag)
var tags = cell.metadata.tags || [];
var identities = tags.filter(tag => /#.*/.test(tag)).map(tag => tag.substring(1)); // ...get all identities and drop the #
if(cell === root_cell && !tags.some(tag => /#.*/.test(tag))) {
identities.push("DD27AE1D138027D0D7AB824FD0DDDC61367D5CCA4AAB42CE50840762B053764D"); // ...generate an id for the root cell for internal usage
}

var deps = tags.filter(tag => /=>.*/.test(tag)).map(tag => tag.substring(2)); // ...get all dependencies and drop the =>
identities.forEach(function (id) {
cell_map[id] = cell;
dep_graph[id] = deps;
});
});

console.log('Collect in-degrees...');
var in_degree = {}; // ...collect in-degrees of nodes
for(var key in dep_graph) {
for (var i=0, tot=dep_graph[key].length; i < tot; i++) {
var dep = dep_graph[key][i];
in_degree[key] = in_degree[key] || 0;
in_degree[dep] = in_degree[dep] === undefined ? 1 : ++in_degree[dep];
}
}

console.log('Fill processing queue...');
var processing_queue = []; // ...add all nodes with in-degree 0 to queue
for(var key in dep_graph) {
if(in_degree[key] == 0) {
processing_queue.push(key);
}
}

console.log('Start topological sort...');
var processed_nodes = 0; // ...number of processed nodes (to detect circular dependencies)
var processing_order = [];

while(processing_queue.length > 0 && processed_nodes < Object.keys(dep_graph).length) { // ...stay processing deps while the queue contains nodes and the processed nodes are below total node quantity
console.log('Processing queue: ', processing_queue);
console.log('Processing order: ', processing_order);
var id = processing_queue.shift(); // .....pop front of queue and front-push it to the processing order
processing_order.unshift(id);
console.log('Process node:', id);

for (var i=0, tot=dep_graph[id].length; i < tot; i++) { // ......iterate over dependent nodes of current id and decrease their in-degree by 1
var dep = dep_graph[id][i];
in_degree[dep]--;
if(in_degree[dep] == 0) { // ......queue dependency if in-degree is 0
processing_queue.unshift(dep);
}
}
processed_nodes++;
}

if(processed_nodes >= Object.keys(dep_graph).length) {
console.error('There is a circular dependency in your execute dependencies!');
}
else{
console.log("Map processing order to cells...", processing_order)
var dependency_cells = processing_order.map(id =>cell_map[id]); // ...get dependent cells by their id
console.log("Execute cells..", dependency_cells)
dependency_cells.forEach(function (cell) { cell.execute(stop_on_error); }); // ...execute all dependent cells in order
}
}
orig_execute.call(this, stop_on_error); // execute original cell execute function
};
console.log('[exec_deps] loaded');
}
};
});
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Type: Jupyter Notebook Extension
Compatibility: 3.x, 4.x, 5.x
Name: Execution Dependencies
Main: execution_dependencies.js
Link: README.md
Description: |
Introduce tag annotations to identify each cell and indicate a dependency on others.
Upon running a cell, its dependencies are run first to prepare all dependencies.
Then the cell triggered by the user is run as soon as all its dependencies are met.

0 comments on commit 627b3cc

Please sign in to comment.