Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sniff for libclang-version-correct headers #1325

Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,11 @@ jobs:
postgresql-$PG_VER \
postgresql-server-dev-$PG_VER

echo ""
echo "----- pg_config -----"
pg_config
echo ""


- name: Set up PostgreSQL permissions
run: sudo chmod a+rwx `/usr/lib/postgresql/$PG_VER/bin/pg_config --pkglibdir` `/usr/lib/postgresql/$PG_VER/bin/pg_config --sharedir`/extension /var/run/postgresql/
Expand Down
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 18 additions & 6 deletions pgrx-pg-config/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,12 +181,10 @@ impl PgConfig {
const PREFIX: &str = "PGRX_PG_CONFIG_";

let mut known_props = BTreeMap::new();
for (k, v) in std::env::vars() {
if k.starts_with(PREFIX) {
// reformat the key to look like an argument option to `pg_config`
let prop = format!("--{}", k.trim_start_matches(PREFIX).to_lowercase());
known_props.insert(prop, v);
}
for (k, v) in std::env::vars().filter(|(k, _)| k.starts_with(PREFIX)) {
// reformat the key to look like an argument option to `pg_config`
let prop = format!("--{}", k.trim_start_matches(PREFIX).to_lowercase());
known_props.insert(prop, v);
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
}

Ok(Self {
Expand Down Expand Up @@ -368,6 +366,20 @@ impl PgConfig {
Ok(path)
}

/// a vaguely-parsed "--configure"
pub fn configure(&self) -> eyre::Result<BTreeMap<String, String>> {
let stdout = self.run("--configure")?;
Ok(stdout
.split('\'')
.filter(|s| s != &"" && s != &" ")
.map(|entry| match entry.split_once('=') {
Some((k, v)) => (k.to_owned(), v.to_owned()),
// some keys are about mere presence
None => (entry.to_owned(), String::from("")),
})
.collect())
}
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved

pub fn includedir_server(&self) -> eyre::Result<PathBuf> {
Ok(self.run("--includedir-server")?.into())
}
Expand Down
2 changes: 2 additions & 0 deletions pgrx-pg-sys/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,10 +48,12 @@ libc = "0.2"

[build-dependencies]
bindgen = { version = "0.68.1", default-features = false, features = ["runtime"] }
clang-sys = { version = "1", features = ["clang_6_0", "runtime"] }
pgrx-pg-config= { path = "../pgrx-pg-config/", version = "=0.11.0" }
proc-macro2 = "1.0.69"
quote = "1.0.33"
syn = { version = "1.0.109", features = [ "extra-traits", "full", "fold", "parsing" ] }
eyre = "0.6.8"
shlex = "1.2.0" # shell lexing, also used by many of our deps
once_cell = "1.18.0"
walkdir = "2"
12 changes: 11 additions & 1 deletion pgrx-pg-sys/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ use syn::{ForeignItem, Item, ItemConst};
const BLOCKLISTED_TYPES: [&str; 3] = ["Datum", "NullableDatum", "Oid"];

mod build {
pub(super) mod clang;
pub(super) mod sym_blocklist;
}

Expand Down Expand Up @@ -714,14 +715,23 @@ fn run_bindgen(
include_h: &PathBuf,
) -> eyre::Result<String> {
eprintln!("Generating bindings for pg{major_version}");
let configure = pg_config.configure()?;
let preferred_clang: Option<&std::path::Path> = configure.get("CLANG").map(|s| s.as_ref());
eprintln!("pg_config --configure CLANG = {:?}", preferred_clang);
let (autodetect, includes) = build::clang::detect_include_paths_for(preferred_clang);
eprintln!("passed detect_include_paths_for_correct_clang");
let mut binder = bindgen::Builder::default();
binder = add_blocklists(binder);
binder = add_derives(binder);
if !autodetect {
let builtin_includes = includes.iter().filter_map(|p| Some(format!("-I{}", p.to_str()?)));
binder = binder.clang_args(builtin_includes);
};
let bindings = binder
.header(include_h.display().to_string())
.clang_args(extra_bindgen_clang_args(pg_config)?)
.clang_args(pg_target_include_flags(major_version, pg_config)?)
.detect_include_paths(target_env_tracked("PGRX_BINDGEN_NO_DETECT_INCLUDES").is_none())
.detect_include_paths(autodetect)
.parse_callbacks(Box::new(PgrxOverrides::default()))
// The NodeTag enum is closed: additions break existing values in the set, so it is not extensible
.rustified_non_exhaustive_enum("NodeTag")
Expand Down
129 changes: 129 additions & 0 deletions pgrx-pg-sys/build/clang.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
use crate::target_env_tracked;
use bindgen::ClangVersion;
use clang_sys::support::Clang as ClangSys;
use std::{ffi::OsStr, path::PathBuf};
use walkdir::{DirEntry, WalkDir};

/// pgrx's bindgen needs to detect include paths, to keep code building,
/// but the way rust-bindgen does it breaks on Postgres 16 due to code like
/// ```c
/// #include <emmintrin.h>
/// ```
/// This will pull in builtin headers, but rust-bindgen uses a $CLANG_PATH lookup from clang-sys
/// which is not guaranteed to find the clang that uses the $LIBCLANG_PATH that bindgen intends.
///
/// Returns the set of paths to include.
pub(crate) fn detect_include_paths_for(
preferred_clang: Option<&std::path::Path>,
) -> (bool, Vec<PathBuf>) {
if target_env_tracked("PGRX_BINDGEN_NO_DETECT_INCLUDES").is_some() {
return (false, vec![]);
}

// By asking bindgen for the version, we force it to pull an appropriate libclang,
// allowing users to override it however they would usually override bindgen.
let clang_major = match bindgen::clang_version() {
ClangVersion { parsed: Some((major, _)), full } => {
eprintln!("Bindgen found {full}");
major
}
ClangVersion { full, .. } => {
// If bindgen doesn't know what version it has, bail and hope for the best.
eprintln!("Bindgen failed to parse clang version: {full}");
return (true, vec![]);
}
};

// If Postgres is configured --with-llvm, then it may have recorded a CLANG to use
// Ask if there's a clang at the path that Postgres would use for JIT purposes.
// Unfortunately, the responses from clang-sys include clangs from far-off paths,
// so we can only use clangs that match bindgen's libclang major version.
if let Some(ClangSys { path, version: Some(v), c_search_paths, .. }) =
ClangSys::find(preferred_clang, &[])
{
if Some(&*path) == preferred_clang && v.Major as u32 == clang_major {
return (false, c_search_paths.unwrap_or_default());
}
}
Comment on lines +16 to +47
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the part that I feel deeply confident is basically guaranteed-correct for macOS, but I must admit I might inadvertantly be counting on "clang vs. appleclang doesn't matter for the headers we need".


// Oh no, still here?
// Let's go behind bindgen's back to get libclang's path
let libclang_path =
clang_sys::get_library().expect("libclang should have been loaded?").path().to_owned();
eprintln!("found libclang at {}", libclang_path.display());
// libclang will probably be in a dynamic library directory,
// which means it will probably be adjacent to its headers, e.g.
// - "/usr/lib/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}"
// - "/usr/lib/clang/${CLANG_MAJOR}/include"
let clang_major_fmt = clang_major.to_string();
let mut paths = vec![];
// by adjacent, that does not mean it is always immediately so, e.g.
// - "/usr/lib/x86_64-linux-gnu/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}"
// - "/usr/lib/clang/${CLANG_MAJOR}/include"
// or
// - "/usr/lib64/libclang-${CLANG_MAJOR}.so.${CLANG_MAJOR}.${CLANG_MINOR}.${CLANG_SUBMINOR}"
// - "/usr/lib/clang/${CLANG_MAJOR}/include"
// so, crawl back up the ancestral tree
for ancestor in libclang_path.ancestors() {
Comment on lines +49 to +67
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the part where the secret ingredient is Crime. The idea is that this crime starts its journey relative to the libclang.so dir so that we ideally find the right headers to pull even if we're starting from an unusual place.

paths = WalkDir::new(ancestor)
.min_depth(1)
.max_depth(6)
.sort_by_file_name()
.into_iter()
// On Unix-y systems this will be like "/usr/lib/clang/$CLANG_MAJOR/include"
// so don't even descend if the directory doesn't have one of those parts
.filter_entry(|entry| {
!is_hidden(entry) && {
entry_contains(entry, "clang")
|| entry_contains(entry, "include")
|| entry_contains(entry, &*clang_major_fmt)
// we always want to descend from a lib dir, but only one step
// as we don't really want to search all of /usr/lib's subdirs
|| os_str_contains(entry.file_name(), "lib")
}
})
.filter_map(|e| e.ok()) // be discreet
// We now need something that looks like it actually satisfies all our constraints
.filter(|entry| {
entry_contains(entry, &*clang_major_fmt)
&& entry_contains(entry, "clang")
&& entry_contains(entry, "include")
})
Comment on lines +73 to +91
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The fast-and-loose way this may seem to be written is to account for possible differences like

  • /usr/lib/clang-15/include
  • /usr/lib/clang/15/include

It may seem unnecessary, but we don't necessarily want to have a hard dependency on there being a clang executable. This is because it is possible for libclang to be packaged without its executable and this is a configuration that is actually shipped by real distros.

You may think that seems... bad... but it's less goofy than the configurations that package libclang separately from its headers!

// we need to pull the actual directories that include the SIMD headers
.filter(|entry| {
os_str_contains(entry.file_name(), "emmintrin.h")
|| os_str_contains(entry.file_name(), "arm_neon.h")
})
.filter_map(|entry| {
let mut pbuf = entry.into_path();
if pbuf.pop() && pbuf.is_dir() && os_str_contains(&*pbuf.file_name()?, "include") {
Some(pbuf)
} else {
None
}
})
Comment on lines +92 to +104
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is where I'm hoping this is basically enough validation that we eliminate any real risk of pulling the wrong headers. This also prevents pulling entries like:

  • /usr/lib/clang/17/include/ppc_wrappers/emmintrin.h

.collect::<Vec<_>>();

if paths.len() > 0 {
paths.sort();
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
paths.dedup();
workingjubilee marked this conversation as resolved.
Show resolved Hide resolved
break;
}
}
// If we have anything better to recommend, don't autodetect!
let autodetect = paths.len() == 0;
eprintln!("Found include dirs {:?}", paths);
(autodetect, paths)
}

fn is_hidden(entry: &DirEntry) -> bool {
entry.file_name().to_str().map(|s| s.starts_with(".")).unwrap_or(false)
}

fn entry_contains(entry: &DirEntry, needle: &str) -> bool {
entry.path().components().any(|part| os_str_contains(part.as_os_str(), needle))
}

fn os_str_contains(os_s: &OsStr, needle: &str) -> bool {
os_s.to_str().filter(|part| part.contains(needle)).is_some()
}