From 96fc2d8d95249ccead804d6dcf6d8a3850b456b1 Mon Sep 17 00:00:00 2001 From: CARLENS Jean-Philippe Date: Sun, 25 Aug 2024 19:24:29 +0200 Subject: [PATCH] Parses wheels WHEEL and METADATA files as email messages Fixing some wheel parsing crashes, like for the ziglang package on Linux. --- Cargo.lock | 1 + crates/install-wheel-rs/Cargo.toml | 1 + crates/install-wheel-rs/src/wheel.rs | 57 ++++++++++++++++++---------- 3 files changed, 39 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d5f42c18e6cf4..16754df989fa6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1840,6 +1840,7 @@ dependencies = [ "distribution-filename", "fs-err", "indoc", + "mailparse", "pathdiff", "pep440_rs", "platform-info", diff --git a/crates/install-wheel-rs/Cargo.toml b/crates/install-wheel-rs/Cargo.toml index eec879177f6e0..a7f6be0e40836 100644 --- a/crates/install-wheel-rs/Cargo.toml +++ b/crates/install-wheel-rs/Cargo.toml @@ -33,6 +33,7 @@ configparser = { workspace = true } csv = { workspace = true } data-encoding = { workspace = true } fs-err = { workspace = true } +mailparse.workspace = true pathdiff = { workspace = true } platform-info = { workspace = true } reflink-copy = { workspace = true } diff --git a/crates/install-wheel-rs/src/wheel.rs b/crates/install-wheel-rs/src/wheel.rs index 3698fca56cb5c..06f7ca70d0030 100644 --- a/crates/install-wheel-rs/src/wheel.rs +++ b/crates/install-wheel-rs/src/wheel.rs @@ -1,11 +1,12 @@ use std::collections::HashMap; -use std::io::{BufRead, BufReader, Cursor, Read, Seek, Write}; +use std::io::{BufReader, Cursor, Read, Seek, Write}; use std::path::{Path, PathBuf}; use std::{env, io}; use data_encoding::BASE64URL_NOPAD; use fs_err as fs; use fs_err::{DirEntry, File}; +use mailparse::parse_headers; use rustc_hash::FxHashMap; use sha2::{Digest, Sha256}; use tracing::{instrument, warn}; @@ -356,7 +357,7 @@ pub enum LibKind { /// > basic key: value format: pub fn parse_wheel_file(wheel_text: &str) -> Result { // {distribution}-{version}.dist-info/WHEEL is metadata about the archive itself in the same basic key: value format: - let data = parse_key_value_file(&mut wheel_text.as_bytes(), "WHEEL")?; + let data = parse_email_message_file(&mut wheel_text.as_bytes(), "WHEEL")?; // Determine whether Root-Is-Purelib == ‘true’. // If it is, the wheel is pure, and should be installed into purelib. @@ -797,29 +798,31 @@ pub fn read_record_file(record: &mut impl Read) -> Result, Erro .collect() } -/// Parse a file with `Key: value` entries such as WHEEL and METADATA -fn parse_key_value_file( +/// Parse a file with email message format such as WHEEL and METADATA +fn parse_email_message_file( file: impl Read, debug_filename: &str, ) -> Result>, Error> { let mut data: FxHashMap> = FxHashMap::default(); let file = BufReader::new(file); - for (line_no, line) in file.lines().enumerate() { - let line = line?.trim().to_string(); - if line.is_empty() { - continue; - } - let (key, value) = line.split_once(':').ok_or_else(|| { - Error::InvalidWheel(format!( - "Line {} of the {debug_filename} file is invalid", - line_no + 1 - )) - })?; - data.entry(key.trim().to_string()) + let content = file.bytes().collect::, _>>()?; + + let headers = parse_headers(content.as_slice()) + .map_err(|err| { + Error::InvalidWheel(format!("Failed to parse {debug_filename} file: {err}")) + })? + .0; + + for header in headers { + let name = header.get_key(); + let value = header.get_value(); + + data.entry(name.trim().to_string()) .or_default() .push(value.trim().to_string()); } + Ok(data) } @@ -836,11 +839,11 @@ mod test { use crate::Error; use super::{ - get_script_executable, parse_key_value_file, parse_wheel_file, read_record_file, Script, + get_script_executable, parse_email_message_file, parse_wheel_file, read_record_file, Script, }; #[test] - fn test_parse_key_value_file() { + fn test_parse_email_message_file() { let text = indoc! {" Wheel-Version: 1.0 Generator: bdist_wheel (0.37.1) @@ -849,7 +852,21 @@ mod test { Tag: cp38-cp38-manylinux2014_x86_64 "}; - parse_key_value_file(&mut text.as_bytes(), "WHEEL").unwrap(); + parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap(); + } + + #[test] + fn test_parse_email_message_file_with_value_starting_with_linesep_and_two_space() { + // Check: https://files.pythonhosted.org/packages/0c/b7/ecfdce6368cc3664d301f7f52db4fe1004aa7da7a12c4a9bf1de534ff6ab/ziglang-0.13.0-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.musllinux_1_1_x86_64.whl + let text = indoc! {" + Wheel-Version: 1.0 + Generator: ziglang make_wheels.py + Root-Is-Purelib: false + Tag: + py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.musllinux_1_1_aarch64 + "}; + + parse_email_message_file(&mut text.as_bytes(), "WHEEL").unwrap(); } #[test] @@ -996,7 +1013,7 @@ mod test { " }; let reader = Cursor::new(wheel.to_string().into_bytes()); - let wheel_file = parse_key_value_file(reader, "WHEEL")?; + let wheel_file = parse_email_message_file(reader, "WHEEL")?; assert_eq!( wheel_file.get("Wheel-Version"), Some(&["1.0".to_string()].to_vec())