Add PEP 723 support to uv run

astral-sh · Jun 30, 2024 · 5dcc7b9 · 5dcc7b9
1 parent b4c53fd
commit 5dcc7b9
Show file tree

Hide file tree

Showing 11 changed files with 552 additions and 39 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -43,6 +43,7 @@ uv-macros = { path = "crates/uv-macros" }
 uv-normalize = { path = "crates/uv-normalize" }
 uv-requirements = { path = "crates/uv-requirements" }
 uv-resolver = { path = "crates/uv-resolver" }
+uv-scripts = { path = "crates/uv-scripts" }
 uv-settings = { path = "crates/uv-settings" }
 uv-state = { path = "crates/uv-state" }
 uv-tool = { path = "crates/uv-tool" }
@@ -94,6 +95,7 @@ itertools = { version = "0.13.0" }
 junction = { version = "1.0.0" }
 mailparse = { version = "0.15.0" }
 md-5 = { version = "0.10.6" }
+memchr = { version = "2.7.4" }
 miette = { version = "7.2.0" }
 nanoid = { version = "0.4.0" }
 once_cell = { version = "1.19.0" }

diff --git a/crates/uv-requirements/src/specification.rs b/crates/uv-requirements/src/specification.rs
@@ -321,4 +321,15 @@ impl RequirementsSpecification {
     ) -> Result<Self> {
         Self::from_sources(requirements, &[], &[], client_builder).await
     }
+
+    /// Initialize a [`RequirementsSpecification`] from a list of [`Requirement`].
+    pub fn from_requirements(requirements: Vec<Requirement>) -> Self {
+        Self {
+            requirements: requirements
+                .into_iter()
+                .map(UnresolvedRequirementSpecification::from)
+                .collect(),
+            ..Self::default()
+        }
+    }
 }
diff --git a/crates/uv-scripts/Cargo.toml b/crates/uv-scripts/Cargo.toml
@@ -0,0 +1,23 @@
+[package]
+name = "uv-scripts"
+version = "0.0.1"
+edition = "2021"
+description = "Parse PEP 723-style Python scripts."
+
+[lints]
+workspace = true
+
+[dependencies]
+pep440_rs = { workspace = true }
+pep508_rs = { workspace = true }
+pypi-types = { workspace = true }
+
+fs-err = { workspace = true, features = ["tokio"] }
+memchr = { workspace = true }
+once_cell = { workspace = true }
+serde = { workspace = true, features = ["derive"] }
+thiserror = { workspace = true }
+toml = { workspace = true }
+
+[dev-dependencies]
+indoc = { workspace = true }
diff --git a/crates/uv-scripts/src/lib.rs b/crates/uv-scripts/src/lib.rs
@@ -0,0 +1,283 @@
+use std::io;
+use std::path::Path;
+
+use memchr::memmem::Finder;
+use once_cell::sync::Lazy;
+use pypi_types::VerbatimParsedUrl;
+use serde::{Deserialize, Serialize};
+use thiserror::Error;
+
+static FINDER: Lazy<Finder> = Lazy::new(|| Finder::new(b"# /// script"));
+
+/// PEP 723 metadata as parsed from a `script` comment block.
+///
+/// See: <https://peps.python.org/pep-0723/>
+#[derive(Debug, Serialize, Deserialize)]
+#[serde(rename_all = "kebab-case")]
+pub struct Pep723Metadata {
+    pub dependencies: Vec<pep508_rs::Requirement<VerbatimParsedUrl>>,
+    pub requires_python: Option<pep440_rs::VersionSpecifiers>,
+}
+
+#[derive(Debug, Error)]
+pub enum Pep723Error {
+    #[error(transparent)]
+    Io(#[from] io::Error),
+    #[error(transparent)]
+    Utf8(#[from] std::str::Utf8Error),
+    #[error(transparent)]
+    Toml(#[from] toml::de::Error),
+}
+
+/// Read the PEP 723 `script` metadata from a Python file, if it exists.
+///
+/// See: <https://peps.python.org/pep-0723/>
+pub async fn read_pep723_metadata(
+    file: impl AsRef<Path>,
+) -> Result<Option<Pep723Metadata>, Pep723Error> {
+    let contents = match fs_err::tokio::read(file).await {
+        Ok(contents) => contents,
+        Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(None),
+        Err(err) => return Err(err.into()),
+    };
+
+    // Extract the `script` tag.
+    let Some(contents) = extract_script_tag(&contents)? else {
+        return Ok(None);
+    };
+
+    // Parse the metadata.
+    let metadata = toml::from_str(&contents)?;
+
+    Ok(Some(metadata))
+}
+
+/// Given the contents of a Python file, extract the `script` metadata block, with leading comment
+/// hashes removed.
+///
+/// See: <https://peps.python.org/pep-0723/>
+fn extract_script_tag(contents: &[u8]) -> Result<Option<String>, Pep723Error> {
+    // Identify the opening pragma.
+    let Some(index) = FINDER.find(contents) else {
+        return Ok(None);
+    };
+
+    // The opening pragma must be the first line, or immediately preceded by a newline.
+    if !(index == 0 || matches!(contents[index - 1], b'\r' | b'\n')) {
+        return Ok(None);
+    }
+
+    // Decode as UTF-8.
+    let contents = &contents[index..];
+    let contents = std::str::from_utf8(contents)?;
+
+    let mut lines = contents.lines();
+
+    // Ensure that the first line is exactly `# /// script`.
+    if !lines.next().is_some_and(|line| line == "# /// script") {
+        return Ok(None);
+    }
+
+    // > Every line between these two lines (# /// TYPE and # ///) MUST be a comment starting
+    // > with #. If there are characters after the # then the first character MUST be a space. The
+    // > embedded content is formed by taking away the first two characters of each line if the
+    // > second character is a space, otherwise just the first character (which means the line
+    // > consists of only a single #).
+    let mut toml = vec![];
+    for line in lines {
+        // Remove the leading `#`.
+        let Some(line) = line.strip_prefix('#') else {
+            break;
+        };
+
+        // If the line is empty, continue.
+        if line.is_empty() {
+            toml.push("");
+            continue;
+        }
+
+        // Otherwise, the line _must_ start with ` `.
+        let Some(line) = line.strip_prefix(' ') else {
+            break;
+        };
+        toml.push(line);
+    }
+
+    // Find the closing `# ///`. The precedence is such that we need to identify the _last_ such
+    // line.
+    //
+    // For example, given:
+    // ```python
+    // # /// script
+    // #
+    // # ///
+    // #
+    // # ///
+    // ```
+    //
+    // The latter `///` is the closing pragma
+    let Some(index) = toml.iter().rev().position(|line| *line == "///") else {
+        return Ok(None);
+    };
+    let index = toml.len() - index;
+
+    // Discard any lines after the closing `# ///`.
+    //
+    // For example, given:
+    // ```python
+    // # /// script
+    // #
+    // # ///
+    // #
+    // #
+    // ```
+    //
+    // We need to discard the last two lines.
+    toml.truncate(index - 1);
+
+    // Join the lines into a single string.
+    let toml = toml.join("\n") + "\n";
+
+    Ok(Some(toml))
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn missing_space() {
+        let contents = indoc::indoc! {r"
+            # /// script
+            #requires-python = '>=3.11'
+            # ///
+        "};
+
+        assert_eq!(
+            super::extract_script_tag(contents.as_bytes()).unwrap(),
+            None
+        );
+    }
+
+    #[test]
+    fn no_closing_pragma() {
+        let contents = indoc::indoc! {r"
+            # /// script
+            # requires-python = '>=3.11'
+            # dependencies = [
+            #   'requests<3',
+            #   'rich',
+            # ]
+        "};
+
+        assert_eq!(
+            super::extract_script_tag(contents.as_bytes()).unwrap(),
+            None
+        );
+    }
+
+    #[test]
+    fn leading_content() {
+        let contents = indoc::indoc! {r"
+            pass # /// script
+            # requires-python = '>=3.11'
+            # dependencies = [
+            #   'requests<3',
+            #   'rich',
+            # ]
+            # ///
+            #
+            #
+        "};
+
+        assert_eq!(
+            super::extract_script_tag(contents.as_bytes()).unwrap(),
+            None
+        );
+    }
+
+    #[test]
+    fn simple() {
+        let contents = indoc::indoc! {r"
+            # /// script
+            # requires-python = '>=3.11'
+            # dependencies = [
+            #   'requests<3',
+            #   'rich',
+            # ]
+            # ///
+        "};
+
+        let expected = indoc::indoc! {r"
+            requires-python = '>=3.11'
+            dependencies = [
+              'requests<3',
+              'rich',
+            ]
+        "};
+
+        let actual = super::extract_script_tag(contents.as_bytes())
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn embedded_comment() {
+        let contents = indoc::indoc! {r"
+            # /// script
+            # embedded-csharp = '''
+            # /// <summary>
+            # /// text
+            # ///
+            # /// </summary>
+            # public class MyClass { }
+            # '''
+            # ///
+        "};
+
+        let expected = indoc::indoc! {r"
+            embedded-csharp = '''
+            /// <summary>
+            /// text
+            ///
+            /// </summary>
+            public class MyClass { }
+            '''
+        "};
+
+        let actual = super::extract_script_tag(contents.as_bytes())
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(actual, expected);
+    }
+
+    #[test]
+    fn trailing_lines() {
+        let contents = indoc::indoc! {r"
+            # /// script
+            # requires-python = '>=3.11'
+            # dependencies = [
+            #   'requests<3',
+            #   'rich',
+            # ]
+            # ///
+            #
+            #
+        "};
+
+        let expected = indoc::indoc! {r"
+            requires-python = '>=3.11'
+            dependencies = [
+              'requests<3',
+              'rich',
+            ]
+        "};
+
+        let actual = super::extract_script_tag(contents.as_bytes())
+            .unwrap()
+            .unwrap();
+
+        assert_eq!(actual, expected);
+    }
+}
diff --git a/crates/uv/Cargo.toml b/crates/uv/Cargo.toml
@@ -33,9 +33,10 @@ uv-installer = { workspace = true }
 uv-normalize = { workspace = true }
 uv-requirements = { workspace = true }
 uv-resolver = { workspace = true }
+uv-scripts = { workspace = true }
 uv-settings = { workspace = true, features = ["schemars"] }
-uv-toolchain = { workspace = true, features = ["schemars"]}
 uv-tool = { workspace = true }
+uv-toolchain = { workspace = true, features = ["schemars"]}
 uv-types = { workspace = true }
 uv-virtualenv = { workspace = true }
 uv-warnings = { workspace = true }