Skip to content

Commit

Permalink
Fallback to kernelspec to check if it's a Python notebook (#12875)
Browse files Browse the repository at this point in the history
## Summary

This PR adds a fallback logic for `is_python_notebook` to check the
`kernelspec.language` field.

Reference implementation in VS Code:
https://github.com/microsoft/vscode/blob/1c31e758985efe11bc0453a45ea0bb6887e670a4/extensions/ipynb/src/deserializers.ts#L20-L22

It's also required for the kernel to provide the `language` they're
implementing based on
https://jupyter-client.readthedocs.io/en/stable/kernels.html#kernel-specs
reference although that's for the `kernel.json` file but is also
included in the notebook metadata.

Closes: #12281

## Test Plan

Add a test case for `is_python_notebook` and include the test notebook
for round trip validation.

The test notebook contains two cells, one is JavaScript (denoted via the
`vscode.languageId` metadata) and the other is Python (no metadata). The
notebook metadata only contains `kernelspec` and the `language_info` is
absent.

I also verified that this is a valid notebook by opening it in Jupyter
Lab, VS Code and using `nbformat` validator.
  • Loading branch information
dhruvmanila authored Aug 14, 2024
1 parent 89c8b49 commit 2520ebb
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 24 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Kernel spec language\n",
"\n",
"This is a test notebook for validating the fallback logic of `is_python_notebook` to check `kernelspec.language` if `language_info` is absent.\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"vscode": {
"languageId": "javascript"
}
},
"outputs": [],
"source": [
"function add(x, y) {\n",
" return x + y;\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"print(\"hello world\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
42 changes: 21 additions & 21 deletions crates/ruff_notebook/src/notebook.rs
Original file line number Diff line number Diff line change
Expand Up @@ -408,13 +408,18 @@ impl Notebook {
&self.raw.metadata
}

/// Return `true` if the notebook is a Python notebook, `false` otherwise.
/// Check if it's a Python notebook.
///
/// This is determined by checking the `language_info` or `kernelspec` in the notebook
/// metadata. If neither is present, it's assumed to be a Python notebook.
pub fn is_python_notebook(&self) -> bool {
self.raw
.metadata
.language_info
.as_ref()
.map_or(true, |language| language.name == "python")
if let Some(language_info) = self.raw.metadata.language_info.as_ref() {
return language_info.name == "python";
}
if let Some(kernel_spec) = self.raw.metadata.kernelspec.as_ref() {
return kernel_spec.language.as_deref() == Some("python");
}
true
}

/// Write the notebook back to the given [`Write`] implementer.
Expand Down Expand Up @@ -456,18 +461,12 @@ mod tests {
Path::new("./resources/test/fixtures/jupyter").join(path)
}

#[test]
fn test_python() -> Result<(), NotebookError> {
let notebook = Notebook::from_path(&notebook_path("valid.ipynb"))?;
assert!(notebook.is_python_notebook());
Ok(())
}

#[test]
fn test_r() -> Result<(), NotebookError> {
let notebook = Notebook::from_path(&notebook_path("R.ipynb"))?;
assert!(!notebook.is_python_notebook());
Ok(())
#[test_case("valid.ipynb", true)]
#[test_case("R.ipynb", false)]
#[test_case("kernelspec_language.ipynb", true)]
fn is_python_notebook(filename: &str, expected: bool) {
let notebook = Notebook::from_path(&notebook_path(filename)).unwrap();
assert_eq!(notebook.is_python_notebook(), expected);
}

#[test]
Expand Down Expand Up @@ -597,9 +596,10 @@ print("after empty cells")
Ok(())
}

#[test]
fn round_trip() {
let path = notebook_path("vscode_language_id.ipynb");
#[test_case("vscode_language_id.ipynb")]
#[test_case("kernelspec_language.ipynb")]
fn round_trip(filename: &str) {
let path = notebook_path(filename);
let expected = std::fs::read_to_string(&path).unwrap();
let actual = super::round_trip(&path).unwrap();
assert_eq!(actual, expected);
Expand Down
23 changes: 20 additions & 3 deletions crates/ruff_notebook/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ pub struct CellMetadata {
/// preferred language.
/// <https://github.com/microsoft/vscode/blob/e6c009a3d4ee60f352212b978934f52c4689fbd9/extensions/ipynb/src/serializers.ts#L117-L122>
pub vscode: Option<CodeCellMetadataVSCode>,
/// Catch-all for metadata that isn't required by Ruff.
/// For additional properties that isn't required by Ruff.
#[serde(flatten)]
pub extra: HashMap<String, Value>,
}
Expand All @@ -190,8 +190,8 @@ pub struct RawNotebookMetadata {
/// The author(s) of the notebook document
pub authors: Option<Value>,
/// Kernel information.
pub kernelspec: Option<Value>,
/// Kernel information.
pub kernelspec: Option<Kernelspec>,
/// Language information.
pub language_info: Option<LanguageInfo>,
/// Original notebook format (major number) before converting the notebook between versions.
/// This should never be written to a file.
Expand All @@ -206,6 +206,23 @@ pub struct RawNotebookMetadata {
/// Kernel information.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct Kernelspec {
/// The language name. This isn't mentioned in the spec but is populated by various tools and
/// can be used as a fallback if [`language_info`] is missing.
///
/// This is also used by VS Code to determine the preferred language of the notebook:
/// <https://github.com/microsoft/vscode/blob/1c31e758985efe11bc0453a45ea0bb6887e670a4/extensions/ipynb/src/deserializers.ts#L20-L22>.
///
/// [`language_info`]: RawNotebookMetadata::language_info
pub language: Option<String>,
/// For additional properties that isn't required by Ruff.
#[serde(flatten)]
pub extra: HashMap<String, Value>,
}

/// Language information.
#[skip_serializing_none]
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq)]
pub struct LanguageInfo {
/// The codemirror mode to use for code in this language.
pub codemirror_mode: Option<Value>,
Expand Down

0 comments on commit 2520ebb

Please sign in to comment.