diff --git a/docs/history.rst b/docs/history.rst index 231df52a5..5efc6027a 100644 --- a/docs/history.rst +++ b/docs/history.rst @@ -18,6 +18,27 @@ Blog Posts Version History =============== +next +---- + +*Not yet released* + +Backwards Compatibility Notes +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +* The format of embedded Python module data has changed. The ``pyembed`` crate + and ``pyoxidizer`` versions must match exactly or else the ``pyembed`` crate + will likely crash at run-time when parsing module data. + +Bug Fixes +^^^^^^^^^ + +* The in-memory module importer now correctly populates ``__package__`` in + more cases than it did previously. Before, whether a module was a package + was derived from the presence of a ``foo.bar`` module. Now, a module will be + identified as a package if the file providing it is named ``__init__``. This + more closely matches the behavior of Python's filesystem based importer. (#53) + 0.2.0 ----- diff --git a/docs/pyembed.rst b/docs/pyembed.rst index 4194cc87c..d8800a35b 100644 --- a/docs/pyembed.rst +++ b/docs/pyembed.rst @@ -328,10 +328,14 @@ The first 4 bytes are a little endian u32 containing the total number of modules in this data. Let's call this value ``total``. Following is an array of length ``total`` with each array element being -a 3-tuples of packed (no interior or exterior padding) composed of 3 +a 3-tuple of packed (no interior or exterior padding) composed of 4 little endian u32 values. These values correspond to the module name length (``name_length``), module source data length (``source_length``), -and module bytecode data length (``bytecode_length``), respectively. +module bytecode data length (``bytecode_length``), and a ``flags`` field +to denote special behavior, respectively. + +The least significant bit of the ``flags`` field is set if the +corresponding module name is a package. Following the lengths array is a vector of the module name strings. This vector has ``total`` elements. Each element is a non-NULL terminated diff --git a/pyoxidizer/src/pyembed/importer.rs b/pyoxidizer/src/pyembed/importer.rs index 414b0d81e..f404db215 100644 --- a/pyoxidizer/src/pyembed/importer.rs +++ b/pyoxidizer/src/pyembed/importer.rs @@ -65,6 +65,10 @@ impl PythonModuleData { /// /// This is essentially an index over a raw backing blob. struct PythonModulesData { + /// Packages in this set of modules. + packages: HashSet<&'static str>, + + /// Maps module name to source/bytecode. data: HashMap<&'static str, PythonModuleData>, } @@ -80,6 +84,7 @@ impl PythonModulesData { let mut index = Vec::with_capacity(count as usize); let mut total_names_length = 0; let mut total_sources_length = 0; + let mut package_count = 0; for _ in 0..count { let name_length = reader @@ -94,20 +99,30 @@ impl PythonModulesData { .read_u32::() .or_else(|_| Err("failed reading bytecode length"))? as usize; + let flags = reader + .read_u32::() + .or_else(|_| Err("failed reading module flags"))?; - index.push((name_length, source_length, bytecode_length)); + let is_package = flags & 0x01 != 0; + + if is_package { + package_count += 1; + } + + index.push((name_length, source_length, bytecode_length, is_package)); total_names_length += name_length; total_sources_length += source_length; } let mut res = HashMap::with_capacity(count as usize); + let mut packages = HashSet::with_capacity(package_count); let sources_start_offset = reader.position() as usize + total_names_length; let bytecodes_start_offset = sources_start_offset + total_sources_length; let mut sources_current_offset: usize = 0; let mut bytecodes_current_offset: usize = 0; - for (name_length, source_length, bytecode_length) in index { + for (name_length, source_length, bytecode_length, is_package) in index { let offset = reader.position() as usize; let name = @@ -132,10 +147,21 @@ impl PythonModulesData { sources_current_offset += source_length; bytecodes_current_offset += bytecode_length; - res.insert(name, PythonModuleData { source, bytecode }); + if is_package { + packages.insert(name); + } + + // Extension modules will have their names present to populate the + // packages set. So only populate module data if we have data for it. + if source.is_some() || bytecode.is_some() { + res.insert(name, PythonModuleData { source, bytecode }); + } } - Ok(PythonModulesData { data: res }) + Ok(PythonModulesData { + packages, + data: res, + }) } } @@ -495,15 +521,6 @@ py_class!(class PyOxidizerResourceReader |py| { } }); -fn populate_packages(packages: &mut HashSet<&'static str>, name: &'static str) { - let mut search = name; - - while let Some(idx) = search.rfind('.') { - packages.insert(&search[0..idx]); - search = &search[0..idx]; - } -} - const DOC: &[u8] = b"Binary representation of Python modules\0"; /// Represents global module state to be passed at interpreter initialization time. @@ -723,9 +740,6 @@ fn module_setup( known_modules.insert(name_str, KnownModuleFlavor::Frozen); } - // TODO consider baking set of packages into embedded data. - let mut packages: HashSet<&'static str> = HashSet::with_capacity(modules_data.data.len()); - for (name, record) in modules_data.data { known_modules.insert( name, @@ -733,7 +747,6 @@ fn module_setup( module_data: record, }, ); - populate_packages(&mut packages, name); } let resources_data = match PythonResourcesData::from(state.py_resources_data) { @@ -779,7 +792,7 @@ fn module_setup( module_spec_type, decode_source, exec_fn, - packages, + modules_data.packages, known_modules, resources_data.packages, resource_readers, diff --git a/pyoxidizer/src/pyrepackager/repackage.rs b/pyoxidizer/src/pyrepackager/repackage.rs index 2de53f6ea..cbe542abd 100644 --- a/pyoxidizer/src/pyrepackager/repackage.rs +++ b/pyoxidizer/src/pyrepackager/repackage.rs @@ -249,6 +249,7 @@ impl BuildContext { /// Represents a single module's data record. pub struct ModuleEntry { pub name: String, + pub is_package: bool, pub source: Option>, pub bytecode: Option>, } @@ -279,6 +280,7 @@ impl EmbeddedPythonResources { records.push(ModuleEntry { name: name.clone(), + is_package: self.all_packages.contains(name), source: match source { Some(value) => Some(value.source.clone()), None => None, @@ -818,6 +820,8 @@ pub fn resolve_python_resources( } for (name, extension) in &embedded_built_extension_modules { + all_embedded_modules.insert(name.clone()); + if extension.is_package { annotated_package_names.insert(name.clone()); } @@ -936,6 +940,13 @@ pub fn write_modules_entries( } else { 0 })?; + + let mut flags = 0; + if entry.is_package { + flags |= 1; + } + + dest.write_u32::(flags)?; } for entry in entries.iter() {