diff --git a/src/contracts.rs b/src/contracts.rs index 455dd84..793f875 100644 --- a/src/contracts.rs +++ b/src/contracts.rs @@ -1,9 +1,8 @@ -//! # Nickel contract generation for certain JSON schemas +//! # Nickel contract generation for JSON schemas //! -//! Since generating lazy Nickel contracts for arbitrary JSON schemas is -//! impossible, this module restricts itself to generating record contracts for -//! JSON schemas that are simple enough. A JSON schema can be successfully -//! turned into a record contract if it takes the form +//! Since generating lazy Nickel contracts for arbitrary JSON schemas is impossible, this module +//! restricts itself to generating record contracts for JSON schemas that are simple enough. A JSON +//! schema can be successfully turned into a record contract if it takes the form //! //! ```json //! { @@ -25,6 +24,8 @@ //! ``` //! //! is turned into the Nickel type `Bool`. +use crate::definitions::RefUsage; +use schemars::schema::RootSchema; use std::collections::{BTreeMap, BTreeSet}; use nickel_lang_core::{ @@ -42,7 +43,12 @@ use schemars::schema::{ }; use serde_json::Value; -use crate::{definitions, predicates::Predicate, utils::static_access}; +use crate::{ + definitions::{self, RefsUsage}, + predicates::{AsPredicate, Predicate}, + utils::static_access, + PREDICATES_LIBRARY_ID, +}; fn only_ignored_fields(extensions: &BTreeMap) -> bool { const IGNORED_FIELDS: &[&str] = &["$comment"]; @@ -58,132 +64,67 @@ fn only_ignored_fields(extensions: &BTreeMap) -> bool { #[derive(Clone)] pub struct Contract(Vec); -impl From for Contract { - fn from(rt: RichTerm) -> Self { - Contract(vec![rt]) - } -} +impl Contract { + /// Convert a root JSON schema to a contract. Returns `None` if the schema couldn't be + /// converted to a (lazy) contract, and thus requires to go through a predicate. + /// Upon success, returns the contract and the references used in the schema. + pub fn from_root_schema(root_schema: &RootSchema) -> Option<(Self, RefsUsage)> { + let mut refs_usage = RefsUsage::new(); -impl From for RichTerm { - fn from(Contract(c): Contract) -> Self { - match c.as_slice() { - [] => Predicate::from(&Schema::Bool(true)).into(), - // TODO: shouldn't need to clone here - [rt] => rt.clone(), - _ => { - let arr = Term::Array(Array::new(c.into_iter().collect()), Default::default()); - mk_app!(static_access("std", ["contract", "Sequence"]), arr) - } - } + root_schema + .schema + .try_as_contract(&mut refs_usage) + .map(|ctr| (ctr, refs_usage)) } -} -impl From for Contract { - fn from(value: Term) -> Self { - Contract::from(RichTerm::from(value)) + /// Return the `Dyn` contract, always succeeding. + pub fn dynamic() -> Self { + Term::Type(TypeF::Dyn.into()).into() } } -impl From, RecordRows, EnumRows>> for Contract { - fn from(value: TypeF, RecordRows, EnumRows>) -> Self { - Contract::from(Term::Type(Type::from(value))) - } +/// [TryAsContract] is essentially like `TryInto` but passes additional state around used for +/// effective reference resolution. +pub trait TryAsContract { + /// Try to convert a JSON schema component `Self` to a contract. Returns `None` if the + /// component couldn't be converted to a lazy contract, and thus requires to go through a + /// predicate. + /// + /// `try_as_contract` will record the references used during the conversion through the `refs_usage` parameter. + fn try_as_contract(&self, refs_usage: &mut RefsUsage) -> Option; } -impl From<&InstanceType> for Contract { - fn from(value: &InstanceType) -> Contract { - match value { - InstanceType::Null => contract_from_predicate( - mk_app!( - static_access("predicates", ["isType"]), - Term::Enum("Null".into()) - ) - .into(), - ), - InstanceType::Boolean => Contract::from(TypeF::Bool), - InstanceType::Object => Contract::from(Term::Record(RecordData { - attrs: RecordAttrs { open: true }, - ..Default::default() - })), - InstanceType::Array => Contract::from(TypeF::Array(Box::new(TypeF::Dyn.into()))), - InstanceType::Number => Contract::from(TypeF::Number), - InstanceType::String => Contract::from(TypeF::String), - InstanceType::Integer => Contract::from(static_access("std", ["number", "Integer"])), - } - } +pub trait AsPredicateContract { + /// Convert a JSON schema to a contract by first converting it to a predicate, and then use + /// json-schema-to-nickel's `from_predicate` helper. As opposed to [TryAsContract::try_as_contract], this + /// conversion can't fail. However, it is less desirable (as it throws lazyness out of the + /// window and is less LSP-friendly for e.g. completion), so we generally try to use + /// [TryAsContract::try_as_contract] first. + fn as_predicate_contract(&self, refs_usage: &mut RefsUsage) -> Contract; } -impl TryFrom<&ObjectValidation> for Contract { - type Error = (); - - fn try_from(value: &ObjectValidation) -> Result { - fn is_open_record(additional: Option<&Schema>) -> bool { - match additional { - Some(Schema::Bool(open)) => *open, - None => true, - _ => unreachable!("additional_properties must be checked beforehand"), - } - } - - // box / deref patterns aren't stabilized, so we have to separate out - // `additional_properties` as a separate pattern - // SEE: https://github.com/rust-lang/rust/issues/29641 - // SEE: https://github.com/rust-lang/rust/issues/87121 - match (value, value.additional_properties.as_deref()) { - ( - ObjectValidation { - max_properties: None, - min_properties: None, - required, - properties, - pattern_properties, - additional_properties, - property_names: None, - }, - None | Some(Schema::Bool(_)), - ) if pattern_properties.is_empty() => Ok(Contract::from(generate_record_contract( - required, - properties, - is_open_record(additional_properties.as_deref()), - ))), - _ => Err(()), - } +impl AsPredicateContract for T +where + T: AsPredicate, +{ + fn as_predicate_contract(&self, refs_usage: &mut RefsUsage) -> Contract { + Contract::from(self.as_predicate(refs_usage)) } } -impl TryFrom<&ArrayValidation> for Contract { - type Error = (); - - fn try_from(val: &ArrayValidation) -> Result { - if let ArrayValidation { - items: Some(SingleOrVec::Single(s)), - additional_items: None, - max_items: None, - min_items: None, - unique_items: None, - contains: None, - } = val - { - let elt = Contract::try_from(s.as_ref()) - .unwrap_or_else(|_| contract_from_predicate(Predicate::from(s.as_ref()))); - if let [elt] = elt.0.as_slice() { - Ok(Contract::from(TypeF::Array(Box::new( - TypeF::Flat(elt.clone()).into(), - )))) - } else { - Err(()) - } - } else { - Err(()) +impl TryAsContract for Schema { + fn try_as_contract(&self, refs_usage: &mut RefsUsage) -> Option { + match self { + Schema::Bool(true) => Some(Contract(vec![])), + Schema::Bool(false) => None, + Schema::Object(obj) => obj.try_as_contract(refs_usage), } } } -impl TryFrom<&SchemaObject> for Contract { - type Error = (); - - fn try_from(value: &SchemaObject) -> Result { - match value { +impl TryAsContract for SchemaObject { + fn try_as_contract(&self, refs_usage: &mut RefsUsage) -> Option { + match self { // a raw type SchemaObject { metadata: _, @@ -203,7 +144,7 @@ impl TryFrom<&SchemaObject> for Contract { // only a type in it. Semantically, this is kind of weird. But // the pretty printer doesn't care, and it simplifies our code // significantly. - Ok(Contract::from(instance_type.as_ref())) + Some(Contract::from(instance_type.as_ref())) } // a reference to a definition SchemaObject { @@ -219,9 +160,11 @@ impl TryFrom<&SchemaObject> for Contract { object: None, reference: Some(reference), extensions, - } if only_ignored_fields(extensions) => { - Ok(Contract::from(definitions::reference(reference).contract)) - } + } if only_ignored_fields(extensions) => Some(Contract::from(definitions::resolve_ref( + reference, + refs_usage, + RefUsage::Contract, + ))), // a freeform record SchemaObject { metadata: _, @@ -237,7 +180,7 @@ impl TryFrom<&SchemaObject> for Contract { reference: None, extensions, } if **instance_type == InstanceType::Object && only_ignored_fields(extensions) => { - Ok(Contract::from(Term::Record(RecordData { + Some(Contract::from(Term::Record(RecordData { attrs: RecordAttrs { open: true }, ..Default::default() }))) @@ -257,7 +200,7 @@ impl TryFrom<&SchemaObject> for Contract { reference: None, extensions, } if **instance_type == InstanceType::Object && only_ignored_fields(extensions) => { - ov.as_ref().try_into() + ov.try_as_contract(refs_usage) } // Enum contract with all strings // => | std.enum.TagOrString | [| 'foo, 'bar, 'baz |] @@ -281,14 +224,14 @@ impl TryFrom<&SchemaObject> for Contract { .try_fold(EnumRows(EnumRowsF::Empty), |acc, value| { let id = match value { Value::String(s) => s.into(), - _ => return Err(()), + _ => return None, }; - Ok(EnumRows(EnumRowsF::Extend { + Some(EnumRows(EnumRowsF::Extend { row: id, tail: Box::new(acc), })) })?; - Ok(Contract(vec![ + Some(Contract(vec![ static_access("std", ["enum", "TagOrString"]), Term::Type(TypeF::Enum(enum_rows).into()).into(), ])) @@ -306,20 +249,131 @@ impl TryFrom<&SchemaObject> for Contract { object: None, reference: None, extensions: _, - } if **instance_type == InstanceType::Array => av.as_ref().try_into(), - _ => Err(()), + } if **instance_type == InstanceType::Array => av.try_as_contract(refs_usage), + _ => None, } } } -impl TryFrom<&Schema> for Contract { - type Error = (); +impl TryAsContract for ObjectValidation { + fn try_as_contract(&self, refs_usage: &mut RefsUsage) -> Option { + fn is_open_record(additional: Option<&Schema>) -> bool { + match additional { + Some(Schema::Bool(open)) => *open, + None => true, + _ => unreachable!("additional_properties must be checked beforehand"), + } + } - fn try_from(value: &Schema) -> Result { + // box / deref patterns aren't stabilized, so we have to separate out + // `additional_properties` as a separate pattern + // SEE: https://github.com/rust-lang/rust/issues/29641 + // SEE: https://github.com/rust-lang/rust/issues/87121 + match (self, self.additional_properties.as_deref()) { + ( + ObjectValidation { + max_properties: None, + min_properties: None, + required, + properties, + pattern_properties, + additional_properties, + property_names: None, + }, + None | Some(Schema::Bool(_)), + ) if pattern_properties.is_empty() => Some(Contract::from(generate_record_contract( + required, + properties, + is_open_record(additional_properties.as_deref()), + refs_usage, + ))), + _ => None, + } + } +} + +impl TryAsContract for ArrayValidation { + fn try_as_contract(&self, refs_usage: &mut RefsUsage) -> Option { + if let ArrayValidation { + items: Some(SingleOrVec::Single(s)), + additional_items: None, + max_items: None, + min_items: None, + unique_items: None, + contains: None, + } = self + { + let elt = s + .try_as_contract(refs_usage) + .unwrap_or_else(|| s.as_predicate_contract(refs_usage)); + if let [elt] = elt.0.as_slice() { + Some(Contract::from(TypeF::Array(Box::new( + TypeF::Flat(elt.clone()).into(), + )))) + } else { + None + } + } else { + None + } + } +} + +// The following conversions: +// +// 1. Are infallible +// 2. Don't do reference resolution +// +// We implement `From` directly instead of `TryConvert`. + +impl From for Contract { + fn from(rt: RichTerm) -> Self { + Contract(vec![rt]) + } +} + +impl From for RichTerm { + fn from(Contract(c): Contract) -> Self { + match c.as_slice() { + [] => static_access(PREDICATES_LIBRARY_ID, ["always"]), + // TODO: shouldn't need to clone here + [rt] => rt.clone(), + _ => { + let arr = Term::Array(Array::new(c.into_iter().collect()), Default::default()); + mk_app!(static_access("std", ["contract", "Sequence"]), arr) + } + } + } +} + +impl From for Contract { + fn from(value: Term) -> Self { + Contract::from(RichTerm::from(value)) + } +} + +impl From, RecordRows, EnumRows>> for Contract { + fn from(value: TypeF, RecordRows, EnumRows>) -> Self { + Contract::from(Term::Type(Type::from(value))) + } +} + +impl From<&InstanceType> for Contract { + fn from(value: &InstanceType) -> Contract { match value { - Schema::Bool(true) => Ok(Contract(vec![])), - Schema::Bool(false) => Err(()), - Schema::Object(obj) => obj.try_into(), + InstanceType::Null => Contract::from(Predicate::from(mk_app!( + static_access(PREDICATES_LIBRARY_ID, ["isType"]), + Term::Enum("Null".into()) + ))), + InstanceType::Boolean => Contract::from(TypeF::Bool), + InstanceType::Object => Contract::from(Term::Record(RecordData { + attrs: RecordAttrs { open: true }, + ..Default::default() + })), + InstanceType::Array => Contract::from(TypeF::Array(Box::new(TypeF::Dyn.into()))), + InstanceType::Number => Contract::from(TypeF::Number), + InstanceType::String => Contract::from(TypeF::String), + InstanceType::Integer => Contract::from(static_access("std", ["number", "Integer"])), } } } @@ -377,12 +431,14 @@ fn generate_record_contract( required: &BTreeSet, properties: &BTreeMap, open: bool, + refs_usage: &mut RefsUsage, ) -> RichTerm { let fields = properties.iter().map(|(name, schema)| { // try to convert to a contract, otherwise convert the predicate version // to a contract - let contract = Contract::try_from(schema) - .unwrap_or_else(|()| contract_from_predicate(Predicate::from(schema))); + let contract = schema + .try_as_contract(refs_usage) + .unwrap_or_else(|| schema.as_predicate_contract(refs_usage)); let doc = Documentation::try_from(schema).ok(); ( name.into(), @@ -405,12 +461,15 @@ fn generate_record_contract( .into() } -/// Convert `predicate` into a contract, suitable for use in a contract -/// assertion `term | Contract`. -pub fn contract_from_predicate(predicate: Predicate) -> Contract { - mk_app!( - static_access("predicates", ["contract_from_predicate"]), - predicate - ) - .into() +impl From for Contract { + // Convert a predicate to a contract by calling a function similar to + // `std.contract.from_predicate` (but which does a bit more about propagating meaningful error + // messages) + fn from(pred: Predicate) -> Self { + mk_app!( + static_access(PREDICATES_LIBRARY_ID, ["contract_from_predicate"]), + pred + ) + .into() + } } diff --git a/src/definitions.rs b/src/definitions.rs index 0aa887c..5cfd9bd 100644 --- a/src/definitions.rs +++ b/src/definitions.rs @@ -1,16 +1,38 @@ -//! # Reference handling for JSON schema +//! Reference handling for JSON schema //! -//! JSON schemas can contain a set of definitions at the top level and -//! references to other schemas at essentially arbitrary points. The general -//! format of JSON schema references is quite general. For example, it would be -//! possible to reference fields in a schema hosted at a remote URI. We don't -//! want to support the general case but we need a way of dealing with -//! references to top-level definitions in a schema. -//! This module handles an [`Environment`] data structure that keeps track of -//! top-level definitions in a JSON schema and their translations into Nickel -//! predicates and contracts. - -use std::collections::{BTreeMap, HashMap}; +//! JSON schemas can reference other schemas at essentially arbitrary points through the special +//! [`$ref`](https://json-schema.org/draft/2020-12/json-schema-core#name-direct-references-with-ref) +//! attribute. Those references are very general: they are URI, which can point to a remote +//! resource of the network. The fragment of the URI is a [JSON +//! pointer](https://discord.com/channels/1174731094726295632/1179430745727586407/1225453786529660938), +//! which is a path within the JSON schema to a specific attribute (note that it's not JSON-schema +//! specific, but rather a general mechanism to index into a JSON value). +//! +//! We don't want to support the general case, at least for now, as it comes with its lot of +//! complexity. However, we want to at least be capable of resolving all local references (i.e. +//! reference to the current file). +//! +//! There are two different kinds of references: +//! +//! - references to top-level definitions in a schema. JSON schemas can contain a set of +//! definitions at the top level and reference them from other parts of the schema. +//! - references to other properties of the schema. +//! +//! In both cases, in order to resolve references, we might need either the contract or the +//! predicate version of the converted schemas (both the predicate and the contract) for each +//! definition and property, as we don't know yet if their usage will require the predicate form or +//! the contract form. During conversion, we simply suppose that they are accessible through +//! special values that are introduced at the top level by json-schema-to-nickel, e.g. +//! `___nickel_defs` or `___nickel_props_preds`. Thus, we can refer to them with a statically known +//! field path. We record along the way which properties or definitions are used, and if they are +//! used as a contract or as a predicate. +//! +//! At the end, we can elaborate the required special values like `___nickel_defs` and only include +//! the actually used in the final contract, to avoid bloating the result. + +use crate::{contracts::TryAsContract, predicates::AsPredicate}; +use std::collections::hash_map::Entry; +use std::collections::{HashMap, HashSet}; use nickel_lang_core::{ identifier::Ident, @@ -18,59 +40,294 @@ use nickel_lang_core::{ record::{Field, FieldMetadata, RecordData}, LetAttrs, RichTerm, Term, }, - typ::TypeF, }; -use schemars::schema::Schema; +use schemars::schema::{RootSchema, Schema, SchemaObject}; use crate::{ - contracts::{contract_from_predicate, Contract, Documentation}, + contracts::{Contract, Documentation}, predicates::Predicate, - utils::static_access, + utils::{decode_json_ptr_part, static_access}, + DEFINITIONS_ID, ENVIRONMENT_ID, PROPS_PREDICATES_ID, }; -/// The nickel predicate and contract generated for a schema. +/// Specify if a reference is used in a context which requires a contract or a predicate. +#[derive(Clone, Debug, Copy)] +pub enum RefUsage { + Contract, + Predicate, +} + +/// A representation of a field path in the final generated contract. +/// +/// # Invariants +/// +/// The path is guaranteed to be non-empty by construction. Do not directly mutate the underlying +/// path with the risk of making it empty. +#[derive(Hash, Clone, Debug, Default)] +pub struct FieldPath { + path: Vec, +} + +pub struct EmptyFieldPath; + +impl TryFrom> for FieldPath { + type Error = EmptyFieldPath; + + fn try_from(path: Vec) -> Result { + if path.is_empty() { + return Err(EmptyFieldPath); + } + + Ok(Self { path }) + } +} + +impl From for RichTerm { + fn from(field_path: FieldPath) -> Self { + // unwrap(): the `FieldPath` struct guarantees that the path is non-empty by construction. + static_access( + field_path.path.first().unwrap(), + field_path.path.iter().skip(1), + ) + } +} + +/// A representation of JSON pointer, which is mostly a path within a JSON document toward a +/// specific value. See [JSON pointer](https://datatracker.ietf.org/doc/html/rfc6901). +#[derive(Hash, Clone, Debug, Default)] +pub struct JsonPointer { + pub path: Vec, +} + +impl JsonPointer { + /// Create a new JSON pointer from a string representation (valid according to RFC6901). + pub fn new(ptr: &str) -> Self { + Self { + path: ptr.split('/').map(decode_json_ptr_part).collect(), + } + } + + /// Take a JSON pointer to a property and return the corresponding path in the final + /// generated contract, that is, with all the intermediate `properties` stripped. + /// + /// For example, running [Self::try_as_field_path] on a JSON pointer + /// `/properties/foo/properties/bar` will return the field path `["foo", "bar"]`. + fn try_as_field_path(&self) -> Option { + let mut it = self.path.iter(); + let mut result = Vec::with_capacity(self.path.len() / 2); + + // We expect that the path can be grouped as a sequence of two elements, where the first + // one is always `properties`, and the second one corresponds to the property name. + while let Some(part) = it.next() { + if part != "properties" { + return None; + } + + if let Some(name) = it.next() { + result.push(name.clone()); + } else { + return None; + } + } + + FieldPath::try_from(result).ok() + } + + /// Tries to interpret `self` as pointing to a top-level definition. A JSON pointer points to a + /// top-level definition if the path has exactly two elements and the first one is + /// `definitions`. + fn try_as_def(&self) -> Option { + if self.path.len() == 2 && self.path[0] == "definitions" { + Some(self.path[1].clone()) + } else { + None + } + } +} + +/// The conversion of a JSON schema definition into a Nickel predicate and contract. We don't +/// always use both, so we only store the part which is actually used. #[derive(Clone)] -pub struct ConvertedSchema { +pub struct ConvertedDef { doc: Option, - predicate: Predicate, - contract: Contract, + predicate: Option, + contract: Option, } -/// The field access for referencing the predicate or contract generated from a -/// schema in other Nickel code. +impl ConvertedDef { + /// Take the contract part out of this definition and convert it to a record field with the + /// appropriate definition. This method returns `None` if `self.contract` is `None`. + /// + /// After calling this method, `self.contract` will be `None`. + pub fn contract_as_field(&mut self) -> Option { + Self::as_field(self.contract.take(), self.doc.clone()) + } + + /// Take the predicate part out of this definition and convert it to a record field with the + /// appropriate definition. This method returns `None` if `self.contract` is `None`. + /// + /// After calling this method, `self.predicate` will be `None`. + pub fn predicate_as_field(&mut self) -> Option { + Self::as_field(self.predicate.take(), self.doc.clone()) + } + + /// Helper including the logic common to `contract_as_field` and `predicate_as_field`. + fn as_field(value: Option, doc: Option) -> Option + where + V: Clone + Into, + { + let value = value?.into(); + + Some(Field { + value: Some(value), + metadata: FieldMetadata { + doc: doc.map(String::from), + ..Default::default() + }, + ..Default::default() + }) + } +} + +/// The conversion of a JSON schema property into a Nickel predicate. #[derive(Clone)] -pub struct Access { - pub predicate: RichTerm, - pub contract: RichTerm, +pub struct ConvertedProp { + doc: Option, + predicate: Predicate, } -/// An environment of top level schema definitions and their conversions into -/// Nickel predicates and contracts. +impl From for Field { + fn from(value: ConvertedProp) -> Self { + Field { + value: Some(value.predicate.into()), + metadata: FieldMetadata { + doc: value.doc.map(String::from), + ..Default::default() + }, + ..Default::default() + } + } +} + +/// State recording which properties and definitions are actually used and how (as predicates or as +/// contracts). #[derive(Clone, Default)] -pub struct Environment(HashMap); +pub struct RefsUsage { + /// The definitions referenced as predicates somewhere in the schema. + pub defs_predicates: HashSet, + /// The definitions referenced as contracts somewhere in the schema. + pub defs_contracts: HashSet, + /// The properties referenced as predicates somewhere in the schema (stored as path). + /// + /// We don't need to keep track of the contracts, as they will unconditionally be constituents + /// of the final schema. + pub props_predicates: HashSet>, +} -pub fn access(name: impl AsRef) -> Access { - Access { - contract: static_access("definitions", ["contract", name.as_ref()]), - predicate: static_access("definitions", ["predicate", name.as_ref()]), +impl RefsUsage { + /// The empty state + pub fn new() -> Self { + Self::default() + } + + /// Return the set difference between all the definitions (either predicate or contract) + /// referenced in `self` and all the definitions referenced in `other`. + /// + /// That is, [Self::defs_diff] returns `(self.defs_predicates | self.defs_contracts) - + /// (other.defs_predicates | other.defs_contracts)`. + pub fn defs_diff(&self, other: &RefsUsage) -> HashSet { + &(&self.defs_predicates | &self.defs_contracts) + - &(&other.defs_predicates | &other.defs_contracts) + } + + /// Extend the usages of `self` with the usages of `other`. + pub fn extend(&mut self, other: RefsUsage) { + self.defs_predicates.extend(other.defs_predicates); + self.defs_contracts.extend(other.defs_contracts); + self.props_predicates.extend(other.props_predicates); } } -pub fn reference(reference: &str) -> Access { - if let Some(remainder) = reference.strip_prefix("#/definitions/") { - access(remainder) - } else { +/// An environment of top level schema definitions and nested properties and their conversions into +/// Nickel predicates and contracts. +#[derive(Clone, Default)] +pub struct Environment { + /// The top-level definition of the schema. + definitions: HashMap, + /// The predicates of the properties of the schema. We only need to store the predicates, and + /// not the contracts, as the contracts are simply accessible recursively in the resulting + /// schema. For example, the contract for the reference `#/properties/foo/properties/bar` is + /// simply `foo.bar`. + /// + /// The key is the path to the property. In our previous example, the key would be `["foo", + /// "bar"]`. + property_preds: HashMap, ConvertedProp>, +} + +/// Resolve a JSON schema reference to a Nickel term. The resulting Nickel expression will have a +/// different shape depending on the usage context and the type of reference (definition vs +/// property). +/// +/// # Arguments +/// +/// - `reference`: the JSON schema reference to resolve. It must be a valid URI +/// - `state`: the state used to record which properties and definitions are actually used, and +/// how. `resolve_ref` will update the state accordingly +/// - `usage`: the context in which the reference is used. Some contexts requires a predicate, +/// while other can do with a contract. +pub fn resolve_ref(reference: &str, state: &mut RefsUsage, usage: RefUsage) -> RichTerm { + let unsupported_reference = || -> RichTerm { eprintln!( " - Warning: skipping reference {reference} (replaced by an always succeeding `Dyn` \ - contract). The current version of `json-schema-to-nickel` doesn't support external \ - references" + Warning: skipping reference {reference} (replaced by an always succeeding \ + `Dyn` contract). The current version of `json-schema-to-nickel` only supports \ + internal references to top-level definitions or nested properties" ); - Access { - contract: Term::Type(TypeF::Dyn.into()).into(), - predicate: static_access("predicates", ["always"]), + match usage { + RefUsage::Contract => Contract::dynamic().into(), + RefUsage::Predicate => Predicate::always().into(), + } + }; + + if let Some(fragment) = reference.strip_prefix("#/") { + let json_ptr = JsonPointer::new(fragment); + + if let Some(field_path) = json_ptr.try_as_field_path() { + match usage { + RefUsage::Contract => RichTerm::from(field_path), + RefUsage::Predicate => { + // If we are referring to a property as a predicate, we need to keep track of it. + state.props_predicates.insert(field_path.path.clone()); + // We don't index the properties element by element, as in + // `.foo.bar.baz`, but we use the whole path with `/` + // as a separator as a key. See the documentation of `PROPS_PREDICATES_MANGLED` + // for more information. + static_access( + ENVIRONMENT_ID, + [PROPS_PREDICATES_ID, field_path.path.join("/").as_str()], + ) + } + } + } else if let Some(name) = json_ptr.try_as_def() { + match usage { + RefUsage::Contract => { + state.defs_contracts.insert(name.clone()); + static_access(ENVIRONMENT_ID, [DEFINITIONS_ID, "contracts", name.as_ref()]) + } + RefUsage::Predicate => { + state.defs_predicates.insert(name.clone()); + static_access( + ENVIRONMENT_ID, + [DEFINITIONS_ID, "predicates", name.as_ref()], + ) + } + } + } else { + unsupported_reference() } + } else { + unsupported_reference() } } @@ -80,69 +337,175 @@ impl Environment { Self::default() } + /// Create an environment from the top-level JSON schema and the record usage of refs during + /// the conversion of this schema to a Nickel contract or predicate. + /// + /// Note that we have to repeat the creation process: when converting the referenced + /// definitions, those definitions might themselves reference other definitions that were not + /// used until now. We record those usages as well, and iterate until no new definition is ever + /// referenced. + pub fn new(root_schema: &RootSchema, mut refs_usage: RefsUsage) -> Self { + let mut definitions = HashMap::new(); + let mut property_preds = HashMap::new(); + + // The stack of definition to process. We might grow this stack as converting some + // definitions references new ones. + let mut def_stack: Vec<_> = refs_usage + .defs_predicates + .iter() + .chain(&refs_usage.defs_contracts) + .cloned() + .collect(); + + while let Some(def) = def_stack.pop() { + let Some(schema) = root_schema.definitions.get(&def) else { + eprintln!( + "Warning: definition `{def}` is referenced in the schema but couldn't be found" + ); + continue; + }; + + let mut cur_usage = RefsUsage::new(); + + let doc = Documentation::try_from(schema).ok(); + + let predicate = refs_usage + .defs_predicates + .contains(&def) + .then(|| schema.as_predicate(&mut cur_usage)); + + let contract = refs_usage.defs_contracts.contains(&def).then(|| { + schema.try_as_contract(&mut cur_usage).unwrap_or_else(|| { + Contract::from( + predicate + .clone() + .unwrap_or_else(|| schema.as_predicate(&mut cur_usage)), + ) + }) + }); + + // Because of the iterative nature of the process, the definition might already be + // present in `definitions` (for example, if it was referenced as a predicate, and then + // later as a contract during the definition conversion phase). In this case, we simply + // merge the entries. + match definitions.entry(def) { + Entry::Occupied(mut entry) => { + let entry: &mut ConvertedDef = entry.get_mut(); + entry.contract = entry.contract.take().or(contract); + entry.predicate = entry.predicate.take().or(predicate); + } + Entry::Vacant(entry) => { + entry.insert(ConvertedDef { + doc, + predicate, + contract, + }); + } + } + + // Adding the new usages to the stack + let new_usages = cur_usage.defs_diff(&refs_usage); + def_stack.extend(new_usages); + + // Update refs_usage with the usages from this iteration + refs_usage.extend(cur_usage); + } + + // We need to pass a ref usage object when converting properties and definitions to put + // them in the environment. However, we don't care about properties, because they've been + // converted at least once already (all properties unconditionally appear in the final + // contract). Thus, converting those properties again shouldn't add new usage, and we can + // ignore their usage. + let mut usage_placeholder = RefsUsage::new(); + + for path in refs_usage.props_predicates.iter() { + let Some(schema) = get_property(&root_schema.schema, path) else { + eprintln!( + "Warning: property `{}` is referenced in the schema but couldn't be found", + path.join("/") + ); + continue; + }; + + let predicate = schema.as_predicate(&mut usage_placeholder); + let doc = Documentation::try_from(schema).ok(); + + property_preds.insert(path.clone(), ConvertedProp { doc, predicate }); + } + + Environment { + definitions, + property_preds, + } + } + /// Wrap a Nickel [`RichTerm`] in a let binding containing the definitions /// from the environment. This is necessary for the Nickel access terms /// tracked in the environment to actually work. - pub fn wrap(self, inner: RichTerm) -> RichTerm { + pub fn wrap(mut self, inner: RichTerm) -> RichTerm { let contracts = self - .0 - .iter() - .map(|(k, v)| { - ( - Ident::from(k), - Field { - value: Some(v.contract.clone().into()), - metadata: FieldMetadata { - doc: v.doc.clone().map(String::from), - ..Default::default() - }, - ..Default::default() - }, - ) - }) + .definitions + .iter_mut() + .filter_map(|(k, v)| Some((Ident::from(k), v.contract_as_field()?))) .collect(); + let predicates = self - .0 + .definitions .into_iter() - .map(|(k, v)| { + .filter_map(|(k, mut v)| Some((Ident::from(k), v.predicate_as_field()?))) + .collect(); + + let prop_preds = self + .property_preds + .into_iter() + .map(|(k, v)| (Ident::from(k.join("/")), Field::from(v))) + .collect(); + + // All the definitions as a Nickel record + let defs = Term::Record(RecordData::with_field_values( + [ ( - Ident::from(k), - Field { - value: Some(v.predicate.into()), - metadata: FieldMetadata { - doc: v.doc.map(String::from), - ..Default::default() - }, + Ident::from("contracts"), + Term::Record(RecordData { + fields: contracts, ..Default::default() - }, - ) - }) - .collect(); + }) + .into(), + ), + ( + Ident::from("predicates"), + Term::Record(RecordData { + fields: predicates, + ..Default::default() + }) + .into(), + ), + ] + .into_iter() + .collect(), + )) + .into(); + + // All the properties (predicates) as a Nickel record + let props = Term::Record(RecordData { + fields: prop_preds, + ..Default::default() + }) + .into(); + + // The enclosing record, with one field for the definitions and one for the properties + let global_env = Term::Record(RecordData::with_field_values( + [ + (Ident::from(DEFINITIONS_ID), defs), + (Ident::from(PROPS_PREDICATES_ID), props), + ] + .into_iter() + .collect(), + )); + Term::Let( - "definitions".into(), - Term::Record(RecordData::with_field_values( - [ - ( - Ident::from("contract"), - Term::Record(RecordData { - fields: contracts, - ..Default::default() - }) - .into(), - ), - ( - Ident::from("predicate"), - Term::Record(RecordData { - fields: predicates, - ..Default::default() - }) - .into(), - ), - ] - .into_iter() - .collect(), - )) - .into(), + Ident::from(ENVIRONMENT_ID), + global_env.into(), inner, LetAttrs { rec: true, @@ -153,27 +516,42 @@ impl Environment { } } -/// Convert the `definitions` field of a json schema mapping identifiers to -/// Schemas to an [`Environment`] struct mapping identifiers to Nickel terms -// FIXME: Definitions can have their own definitions. Does this handle that -// correctly? Does schema.rs even handle it correctly? -impl From<&BTreeMap> for Environment { - fn from(defs: &BTreeMap) -> Self { - let terms = defs - .iter() - .map(|(name, schema)| { - ( - name.clone(), - ConvertedSchema { - doc: Documentation::try_from(schema).ok(), - contract: Contract::try_from(schema).unwrap_or_else(|()| { - contract_from_predicate(Predicate::from(access(name).predicate)) - }), - predicate: Predicate::from(schema), - }, - ) - }) - .collect(); - Environment(terms) +/// Get the property located at a path in a schema. +/// +/// # Example +/// +/// For a path `["foo", "bar"]`, this function will extract (if it exists) the schema corresponding +/// to the JSON pointer `properties/foo/properties/bar`. +/// +/// # Return values +/// +/// - Returns `Some(subschema)` if the path exists in the schema and points to `subschema`. +/// - Returns `None` if the path does not exist in the schema or the path is empty. +/// +/// Note: it looks like we could return the original value upon empty path, but there's a mismatch: +/// we get a `SchemaObject` reference, and we must return a `Schema` reference. We can't convert +/// between the two (we can convert between the owned variants easily, but not for references). +/// Since we can special case empty paths before calling to `get_property` if really needed, it's +/// simpler to just return `None` here. +pub fn get_property<'a>(schema_obj: &'a SchemaObject, path: &[String]) -> Option<&'a Schema> { + let mut current: Option<&Schema> = None; + + for prop in path { + // We start from a schema object, but then always go from schemas to schemas, which requires + // this bit of juggling. + let current_obj = match current { + // We had at least one iteration before and the current schema is an object, which means we + // can indeed index into it. + Some(Schema::Object(next)) => next, + // We had at least one iteration before but the current schema isn't an object, we + // can't index into it. + Some(_) => return None, + // This is the first iteration, so we start from the initial schema object + None => schema_obj, + }; + + current = Some(current_obj.object.as_ref()?.properties.get(prop)?); } + + current } diff --git a/src/lib.rs b/src/lib.rs index fe445f2..46e12c4 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -24,7 +24,7 @@ pub mod definitions; pub mod predicates; pub(crate) mod utils; -use contracts::{contract_from_predicate, Contract}; +use contracts::Contract; use definitions::Environment; use nickel_lang_core::{ cache::{Cache, ErrorTolerance}, @@ -34,17 +34,52 @@ use nickel_lang_core::{ use predicates::Predicate; use schemars::schema::RootSchema; +/// The top-level variable storing the json-schema-to-nickel predicate library included by default +/// in any generated contract. +pub const PREDICATES_LIBRARY_ID: &str = "_js2n___nickel_preds_lib"; + +/// The top-level variable storing the environment, that is the definitions and the properties +/// referenced in the JSON schema (through the `$ref`) attribute. This variable stores +/// [DEFINITIONS_ID] and [PROPS_PREDICATES_ID], each in their own field. +/// +/// We put both under the same variable so that definitions and properties are accessible from +/// everywhere, including from other definitions and properties (in fact, we would like to have +/// mutual recursive let definitions for [DEFINITIONS_ID] and [PROPS_PREDICATES_ID], but +/// Nickel doesn't have mutually recursive lets, so we put both in a recursive record instead). +pub const ENVIRONMENT_ID: &str = "_js2n___nickel_global_env"; + +/// The name of the special variable introduced by json-schema-to-nickel in the final contract +/// which holds the predicates and the contracts corresponding to the definitions of the schema. +/// The name is long and specific on purpose as it could clash with existing variable in the +/// schema. +/// +/// This Nickel variable is expected to have the type +/// `{_ : {predicate: _, contract: _}}` where field names correspond to the top-level +/// definitions in the schema. +pub const DEFINITIONS_ID: &str = "_js2n___nickel_defs"; + +/// Same as [DEFINITIONS_ID] but for the predicates corresponding to properties of the schema. +/// +/// This Nickel variable is expected to have the type `{_ : Dyn -> Bool}` where predicates are +/// directly stored without further indirection, as opposed to [DEFINITIONS_ID]. Indeed, we don't +/// need the contract part, which can be accessed directly from within the final schema. +/// +/// Properties can be nested, so we might need to store both a predicate for `foo` and for +/// `foo.bar.baz`. To make this work, we store the predicates in a flat dictionary, where the keys +/// are complete paths using `/` as a separator (to avoid confusion with Nickel field path). +pub const PROPS_PREDICATES_ID: &str = "_js2n___nickel_prop_preds"; + /// Convert a [`RootSchema`] into a Nickel contract. If the JSON schema is /// representable as a lazy record contract, this conversion is preferred. /// Otherwise, we fall back to generating a predicate. -pub fn root_schema(root: &RootSchema) -> RichTerm { - let env = Environment::from(&root.definitions); - if let Ok(contract) = Contract::try_from(&root.schema) { - wrap_contract(env, contract) - } else { - let predicate = Predicate::from(&root.schema); - wrap_predicate(env, predicate) - } +pub fn root_schema(root_schema: &RootSchema) -> RichTerm { + let (contract, refs_usage) = Contract::from_root_schema(root_schema).unwrap_or_else(|| { + let (predicate, refs_usage) = Predicate::from_root_schema(root_schema); + (Contract::from(predicate), refs_usage) + }); + + let env = Environment::new(root_schema, refs_usage); + wrap_contract(env, contract) } /// Wrap a Nickel contract making use of the predicates support library and @@ -52,21 +87,18 @@ pub fn root_schema(root: &RootSchema) -> RichTerm { pub fn wrap_contract(env: Environment, contract: Contract) -> RichTerm { let lib_ncl = include_bytes!(concat!(env!("OUT_DIR"), "/predicates.ncl")); let lib_ncl = String::from_utf8_lossy(lib_ncl); + let mut cache = Cache::new(ErrorTolerance::Strict); let parser = TermParser::new(); let file_id = cache.add_string("predicates.ncl", lib_ncl.to_string()); let lexer = Lexer::new(cache.source(file_id)); let lib_rt = parser.parse_strict(file_id, lexer).unwrap(); + Term::Let( - "predicates".into(), + PREDICATES_LIBRARY_ID.into(), lib_rt, env.wrap(contract.into()), Default::default(), ) .into() } - -/// Convert a predicate into a contract and then wrap it using `wrap_contract`. -pub fn wrap_predicate(env: Environment, predicate: Predicate) -> RichTerm { - wrap_contract(env, contract_from_predicate(predicate)) -} diff --git a/src/predicates.rs b/src/predicates.rs index 37318b7..66502d8 100644 --- a/src/predicates.rs +++ b/src/predicates.rs @@ -1,3 +1,12 @@ +//! # Nickel eager contract generation for JSON schemas +//! +//! [crate::contracts] implements a translation from JSON schemas to Nickel which tries to preserve +//! lazyness. This isn't always possible. This module provides a fallback converion that is based +//! on boolean predicates instead, and which can handle general JSON schemas. +//! +//! The drawback is that the resulting Nickel contracts are eager (they don't preserve lazyness) +//! and are less LSP-friendly. +use crate::definitions::RefUsage; use std::{collections::BTreeMap, iter}; use nickel_lang_core::{ @@ -6,16 +15,52 @@ use nickel_lang_core::{ term::{array::Array, make, record::RecordData, Number, RichTerm, Term}, }; use schemars::schema::{ - ArrayValidation, InstanceType, NumberValidation, ObjectValidation, Schema, SchemaObject, - SingleOrVec, StringValidation, SubschemaValidation, + ArrayValidation, InstanceType, NumberValidation, ObjectValidation, RootSchema, Schema, + SchemaObject, SingleOrVec, StringValidation, SubschemaValidation, }; use serde_json::Value; -use crate::{definitions, utils::static_access}; +use crate::{ + definitions::{self, RefsUsage}, + utils::static_access, + PREDICATES_LIBRARY_ID, +}; #[derive(Clone)] pub struct Predicate(RichTerm); +impl Predicate { + /// Convert a full JSON schema to a predicate. Returns the predicate and the `$refs` that were + /// referenced during the conversion. + pub fn from_root_schema(root: &RootSchema) -> (Self, RefsUsage) { + let mut refs_usage = RefsUsage::default(); + let predicate = root.schema.as_predicate(&mut refs_usage); + + (predicate, refs_usage) + } + + /// Return an always succeeding predicate. + pub fn always() -> Self { + static_access(PREDICATES_LIBRARY_ID, ["always"]).into() + } +} + +/// [AsPredicate] is essentially like `Into` but passes additional state around used for +/// effective reference resolution. Similar to [crate::contracts::TryAsContract] for [Predicate], but +/// infallible. +pub trait AsPredicate { + /// Convert a JSON schema component `Self` to a predicate. [Self::as_predicate] carries + /// additional state related to reference resolution. + fn as_predicate(&self, refs_usage: &mut RefsUsage) -> Predicate; +} + +/// [AsPredicates] is a variant of [AsPredicate] returning a sequence of predicates. +pub trait AsPredicates { + /// Convert a JSON schema component `Self` to a list of predicates. [Self::as_predicates] + /// carries additional state related to reference resolution. + fn as_predicates(&self, refs_usage: &mut RefsUsage) -> Predicates; +} + impl From for Predicate { fn from(rt: RichTerm) -> Self { Predicate(rt) @@ -35,7 +80,7 @@ impl From for RichTerm { } // Orphan rule means we have to wrap in a newtype in order to impl From -struct Predicates(Vec); +pub struct Predicates(Vec); impl From for Vec { fn from(Predicates(preds): Predicates) -> Self { @@ -51,7 +96,6 @@ impl From> for Predicates { impl IntoIterator for Predicates { type Item = as IntoIterator>::Item; - type IntoIter = as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { @@ -59,9 +103,9 @@ impl IntoIterator for Predicates { } } -fn or_always(s: Option<&Schema>) -> Predicate { - s.map(Predicate::from) - .unwrap_or(Predicate::from(static_access("predicates", ["always"]))) +fn or_always(s: Option<&Schema>, refs_usage: &mut RefsUsage) -> Predicate { + s.map(|s| s.as_predicate(refs_usage)) + .unwrap_or(Predicate::always()) } impl From<&InstanceType> for Predicate { @@ -75,7 +119,7 @@ impl From<&InstanceType> for Predicate { InstanceType::String => Term::Enum("String".into()), InstanceType::Integer => Term::Enum("Integer".into()), }; - mk_app!(static_access("predicates", ["isType"]), type_tag).into() + mk_app!(static_access(PREDICATES_LIBRARY_ID, ["isType"]), type_tag).into() } } @@ -84,7 +128,7 @@ impl From<&SingleOrVec> for Predicate { match value { SingleOrVec::Single(t) => t.as_ref().into(), SingleOrVec::Vec(ts) => mk_app!( - static_access("predicates", ["anyOf"]), + static_access(PREDICATES_LIBRARY_ID, ["anyOf"]), Term::Array( Array::new(ts.iter().map(|t| Predicate::from(t).into()).collect()), Default::default() @@ -100,7 +144,7 @@ impl From<&SingleOrVec> for Predicate { impl From<&[Value]> for Predicate { fn from(value: &[Value]) -> Self { mk_app!( - static_access("predicates", ["enum"]), + static_access(PREDICATES_LIBRARY_ID, ["enum"]), Term::Array( Array::new( value @@ -120,7 +164,7 @@ impl From<&[Value]> for Predicate { impl From<&Value> for Predicate { fn from(value: &Value) -> Self { Term::App( - static_access("predicates", ["const"]), + static_access(PREDICATES_LIBRARY_ID, ["const"]), serde_json::from_value(value.clone()).unwrap(), ) .into() @@ -131,7 +175,7 @@ fn mk_all_of(preds: impl IntoIterator) -> Predicate { let mut ps = preds.into_iter(); match (ps.next(), ps.next()) { // [] - (None, _) => static_access("predicates", ["always"]).into(), + (None, _) => Predicate::always(), // [p] (Some(p), None) => p, // ps @@ -139,7 +183,7 @@ fn mk_all_of(preds: impl IntoIterator) -> Predicate { // reconstruct the full iterator let ps = iter::once(p1).chain(iter::once(p2)).chain(ps); mk_app!( - static_access("predicates", ["allOf"]), + static_access(PREDICATES_LIBRARY_ID, ["allOf"]), Term::Array(Array::from_iter(ps.map(RichTerm::from)), Default::default()) ) .into() @@ -151,7 +195,7 @@ fn mk_any_of(preds: impl IntoIterator) -> Predicate { let mut ps = preds.into_iter(); match (ps.next(), ps.next()) { // [] - (None, _) => static_access("predicates", ["always"]).into(), + (None, _) => Predicate::always(), // [p] (Some(p), None) => p, // ps @@ -159,7 +203,7 @@ fn mk_any_of(preds: impl IntoIterator) -> Predicate { // reconstruct the full iterator let ps = iter::once(p1).chain(iter::once(p2)).chain(ps); mk_app!( - static_access("predicates", ["anyOf"]), + static_access(PREDICATES_LIBRARY_ID, ["anyOf"]), Term::Array(Array::from_iter(ps.map(RichTerm::from)), Default::default()) ) .into() @@ -167,9 +211,9 @@ fn mk_any_of(preds: impl IntoIterator) -> Predicate { } } -impl From<&SubschemaValidation> for Predicates { - fn from( - SubschemaValidation { +impl AsPredicates for SubschemaValidation { + fn as_predicates(&self, refs_usage: &mut RefsUsage) -> Predicates { + let SubschemaValidation { all_of, any_of, one_of, @@ -177,25 +221,30 @@ impl From<&SubschemaValidation> for Predicates { if_schema, then_schema, else_schema, - }: &SubschemaValidation, - ) -> Self { + } = self; + let all_of = all_of .as_deref() - .map(|schemas| mk_all_of(schemas.iter().map(Predicate::from))) + .map(|schemas| mk_all_of(schemas.iter().map(|res| res.as_predicate(refs_usage)))) .into_iter(); let any_of = any_of .as_deref() - .map(|schemas| mk_any_of(schemas.iter().map(Predicate::from))) + .map(|schemas| mk_any_of(schemas.iter().map(|res| res.as_predicate(refs_usage)))) .into_iter(); let one_of = one_of .as_deref() .map(|schemas| { mk_app!( - static_access("predicates", ["oneOf"]), + static_access(PREDICATES_LIBRARY_ID, ["oneOf"]), Term::Array( - Array::new(schemas.iter().map(|s| Predicate::from(s).into()).collect()), + Array::new( + schemas + .iter() + .map(|s| s.as_predicate(refs_usage).into()) + .collect() + ), Default::default() ) ) @@ -205,17 +254,23 @@ impl From<&SubschemaValidation> for Predicates { let not = not .as_deref() - .map(|s| mk_app!(static_access("predicates", ["not"]), Predicate::from(s)).into()) + .map(|s| { + mk_app!( + static_access(PREDICATES_LIBRARY_ID, ["not"]), + s.as_predicate(refs_usage) + ) + .into() + }) .into_iter(); let ite = if_schema .as_deref() .map(move |if_schema| { mk_app!( - static_access("predicates", ["ifThenElse"]), - Predicate::from(if_schema), - or_always(then_schema.as_deref()), - or_always(else_schema.as_deref()) + static_access(PREDICATES_LIBRARY_ID, ["ifThenElse"]), + if_schema.as_predicate(refs_usage), + or_always(then_schema.as_deref(), refs_usage), + or_always(else_schema.as_deref(), refs_usage) ) .into() }) @@ -245,7 +300,7 @@ impl From<&NumberValidation> for Predicates { fn predicate(s: &str) -> impl '_ + FnOnce(f64) -> Predicate { move |n| { mk_app!( - static_access("predicates", ["numbers", s]), + static_access(PREDICATES_LIBRARY_ID, ["numbers", s]), Term::Num(Number::try_from_float_simplest(n).unwrap()) ) .into() @@ -280,11 +335,11 @@ impl From<&StringValidation> for Predicates { min_length, pattern, }: &StringValidation, - ) -> Self { + ) -> Predicates { let max_length = max_length .map(|n| { mk_app!( - static_access("predicates", ["strings", "maxLength"]), + static_access(PREDICATES_LIBRARY_ID, ["strings", "maxLength"]), Term::Num(n.into()) ) .into() @@ -294,7 +349,7 @@ impl From<&StringValidation> for Predicates { let min_length = min_length .map(|n| { mk_app!( - static_access("predicates", ["strings", "minLength"]), + static_access(PREDICATES_LIBRARY_ID, ["strings", "minLength"]), Term::Num(n.into()) ) .into() @@ -305,7 +360,7 @@ impl From<&StringValidation> for Predicates { .as_deref() .map(|s| { mk_app!( - static_access("predicates", ["strings", "pattern"]), + static_access(PREDICATES_LIBRARY_ID, ["strings", "pattern"]), make::string(s) ) .into() @@ -316,30 +371,35 @@ impl From<&StringValidation> for Predicates { } } -impl From<&ArrayValidation> for Predicates { - fn from( - ArrayValidation { +impl AsPredicates for ArrayValidation { + fn as_predicates(&self, refs_usage: &mut RefsUsage) -> Predicates { + let ArrayValidation { items, additional_items, max_items, min_items, unique_items, contains, - }: &ArrayValidation, - ) -> Self { + } = self; + let items = match items { None => vec![], Some(SingleOrVec::Single(s)) => vec![mk_app!( - static_access("predicates", ["arrays", "arrayOf"]), - Predicate::from(s.as_ref()) + static_access(PREDICATES_LIBRARY_ID, ["arrays", "arrayOf"]), + s.as_predicate(refs_usage) ) .into()], Some(SingleOrVec::Vec(schemas)) => { let len = schemas.len(); [mk_app!( - static_access("predicates", ["arrays", "items"]), + static_access(PREDICATES_LIBRARY_ID, ["arrays", "items"]), Term::Array( - Array::new(schemas.iter().map(|x| Predicate::from(x).into()).collect()), + Array::new( + schemas + .iter() + .map(|x| x.as_predicate(refs_usage).into()) + .collect() + ), Default::default() ) ) @@ -347,8 +407,8 @@ impl From<&ArrayValidation> for Predicates { .into_iter() .chain(additional_items.as_deref().map(|s| { mk_app!( - static_access("predicates", ["arrays", "additionalItems"]), - Predicate::from(s), + static_access(PREDICATES_LIBRARY_ID, ["arrays", "additionalItems"]), + s.as_predicate(refs_usage), Term::Num(len.into()) ) .into() @@ -361,7 +421,7 @@ impl From<&ArrayValidation> for Predicates { let max_items = max_items .map(|n| { mk_app!( - static_access("predicates", ["arrays", "maxItems"]), + static_access(PREDICATES_LIBRARY_ID, ["arrays", "maxItems"]), Term::Num(n.into()) ) .into() @@ -371,7 +431,7 @@ impl From<&ArrayValidation> for Predicates { let min_items = min_items .map(|n| { mk_app!( - static_access("predicates", ["arrays", "minItems"]), + static_access(PREDICATES_LIBRARY_ID, ["arrays", "minItems"]), Term::Num(n.into()) ) .into() @@ -380,7 +440,9 @@ impl From<&ArrayValidation> for Predicates { let unique_items = unique_items .and_then(|unique| { - unique.then_some(static_access("predicates", ["arrays", "uniqueItems"]).into()) + unique.then_some( + static_access(PREDICATES_LIBRARY_ID, ["arrays", "uniqueItems"]).into(), + ) }) .into_iter(); @@ -388,8 +450,8 @@ impl From<&ArrayValidation> for Predicates { .as_deref() .map(|s| { mk_app!( - static_access("predicates", ["arrays", "contains"]), - Predicate::from(s) + static_access(PREDICATES_LIBRARY_ID, ["arrays", "contains"]), + s.as_predicate(refs_usage) ) .into() }) @@ -406,9 +468,9 @@ impl From<&ArrayValidation> for Predicates { } } -impl From<&ObjectValidation> for Predicates { - fn from( - ObjectValidation { +impl AsPredicates for ObjectValidation { + fn as_predicates(&self, refs_usage: &mut RefsUsage) -> Predicates { + let ObjectValidation { max_properties, min_properties, required, @@ -416,12 +478,12 @@ impl From<&ObjectValidation> for Predicates { pattern_properties, additional_properties, property_names, - }: &ObjectValidation, - ) -> Self { + } = self; + let max_properties = max_properties .map(|n| { mk_app!( - static_access("predicates", ["records", "maxProperties"]), + static_access(PREDICATES_LIBRARY_ID, ["records", "maxProperties"]), Term::Num(n.into()) ) .into() @@ -431,7 +493,7 @@ impl From<&ObjectValidation> for Predicates { let min_properties = min_properties .map(|n| { mk_app!( - static_access("predicates", ["records", "minProperties"]), + static_access(PREDICATES_LIBRARY_ID, ["records", "minProperties"]), Term::Num(n.into()) ) .into() @@ -442,8 +504,8 @@ impl From<&ObjectValidation> for Predicates { .as_deref() .map(|s| { mk_app!( - static_access("predicates", ["records", "propertyNames"]), - Predicate::from(s) + static_access(PREDICATES_LIBRARY_ID, ["records", "propertyNames"]), + s.as_predicate(refs_usage) ) .into() }) @@ -455,7 +517,7 @@ impl From<&ObjectValidation> for Predicates { } else { Some( mk_app!( - static_access("predicates", ["records", "required"]), + static_access(PREDICATES_LIBRARY_ID, ["records", "required"]), Term::Array( Array::new(required.iter().map(make::string).collect()), Default::default() @@ -468,24 +530,24 @@ impl From<&ObjectValidation> for Predicates { .into_iter(); let record = [mk_app!( - static_access("predicates", ["records", "record"]), + static_access(PREDICATES_LIBRARY_ID, ["records", "record"]), Term::Record(RecordData::with_field_values( properties .iter() - .map(|(k, v)| (k.into(), Predicate::from(v).into())) + .map(|(k, v)| (k.into(), v.as_predicate(refs_usage).into())) .collect() )), Term::Record(RecordData::with_field_values( pattern_properties .iter() - .map(|(k, v)| (k.into(), Predicate::from(v).into())) + .map(|(k, v)| (k.into(), v.as_predicate(refs_usage).into())) .collect() )), Term::Bool(!matches!( additional_properties.as_deref(), Some(Schema::Bool(false)) )), - or_always(additional_properties.as_deref()) + or_always(additional_properties.as_deref(), refs_usage) ) .into()] .into_iter(); @@ -501,13 +563,16 @@ impl From<&ObjectValidation> for Predicates { } } -fn dependencies(extensions: &BTreeMap) -> impl IntoIterator { +fn dependencies( + extensions: &BTreeMap, + refs_usage: &mut RefsUsage, +) -> impl IntoIterator { extensions .get("dependencies") .and_then(|v| v.as_object()) .map(|deps| { mk_app!( - static_access("predicates", ["records", "dependencies"]), + static_access(PREDICATES_LIBRARY_ID, ["records", "dependencies"]), Term::Record(RecordData::with_field_values( deps.into_iter() .map(|(key, value)| ( @@ -524,7 +589,7 @@ fn dependencies(extensions: &BTreeMap) -> impl IntoIterator(value.clone()) - .map(|s| Predicate::from(&s).into()) + .map(|s| s.as_predicate(refs_usage).into()) .unwrap() } )) @@ -536,9 +601,9 @@ fn dependencies(extensions: &BTreeMap) -> impl IntoIterator for Predicate { - fn from( - SchemaObject { +impl AsPredicate for SchemaObject { + fn as_predicate(&self, refs_usage: &mut RefsUsage) -> Predicate { + let SchemaObject { metadata: _, instance_type, format: _, // TODO(vkleen): deal with string formats @@ -551,38 +616,55 @@ impl From<&SchemaObject> for Predicate { object, reference, extensions, - }: &SchemaObject, - ) -> Self { - mk_all_of( - instance_type + } = self; + + // Because we can't share the mutable reference to refs_usage, we need to build the + // arguments to `mk_all_of` in a separate vector pieces by pieces instead of chaining + // directly everything in one big iterator, so that we never share `ref_usages` between two + // live closures. + let mut args: Vec<_> = instance_type + .iter() + .map(Predicate::from) + .chain(enum_values.as_deref().map(Predicate::from)) + .chain(const_value.as_ref().map(Predicate::from)) + .chain(subschemas.iter().flat_map(|x| x.as_predicates(refs_usage))) + .chain( + number + .iter() + .flat_map(|x| Predicates::from(x.as_ref())) + .chain(string.iter().flat_map(|x| Predicates::from(x.as_ref()))), + ) + .collect(); + + args.extend(array.iter().flat_map(|x| x.as_predicates(refs_usage))); + + args.extend( + object .iter() - .map(Predicate::from) - .chain(enum_values.as_deref().map(Predicate::from)) - .chain(const_value.as_ref().map(Predicate::from)) - .chain(subschemas.iter().flat_map(|x| Predicates::from(x.as_ref()))) - .chain(number.iter().flat_map(|x| Predicates::from(x.as_ref()))) - .chain(string.iter().flat_map(|x| Predicates::from(x.as_ref()))) - .chain(array.iter().flat_map(|x| Predicates::from(x.as_ref()))) - .chain(object.iter().flat_map(|x| Predicates::from(x.as_ref()))) - .chain( - reference - .as_deref() - .map(|r| Predicate::from(definitions::reference(r).predicate)), - ) - // schema.rs parses dependencies incorrectly. It should really be - // part of object validation (object_predicates()) but it gets put - // in extensions instead. - .chain(dependencies(extensions)), - ) + .flat_map(|x| x.as_ref().as_predicates(refs_usage)), + ); + + args.extend(reference.as_deref().map(|r| { + Predicate::from(definitions::resolve_ref(r, refs_usage, RefUsage::Predicate)) + })); + + args.extend( + // schema.rs parses dependencies incorrectly. It should really be + // part of object validation (object_predicates()) but it gets put + // in extensions instead. + dependencies(extensions, refs_usage), + ); + + mk_all_of(args) } } -impl From<&Schema> for Predicate { - fn from(value: &Schema) -> Self { - match value { - Schema::Bool(true) => static_access("predicates", ["always"]).into(), - Schema::Bool(false) => static_access("predicates", ["never"]).into(), - Schema::Object(o) => o.into(), +impl AsPredicate for Schema { + fn as_predicate(&self, refs_usage: &mut RefsUsage) -> Predicate { + match self { + Schema::Bool(true) => Predicate::always(), + Schema::Bool(false) => static_access(PREDICATES_LIBRARY_ID, ["never"]).into(), + Schema::Object(o) => o.as_predicate(refs_usage), } } } diff --git a/src/utils.rs b/src/utils.rs index 5590636..4a6352b 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -11,3 +11,10 @@ where { make::static_access(make::var(record), fields) } + +/// Replace special escaping sequences by the actual character within one element of a JSON pointer +/// path. See the [JSON pointer syntax](https://datatracker.ietf.org/doc/html/rfc6901#section-3). +/// Currently, this just amounts to replace `~0` by `~` and `~1` by `/`. +pub fn decode_json_ptr_part(part: &str) -> String { + part.replace("~0", "~").replace("~1", "/") +} diff --git a/tests/json_schema_test_suite_test.rs b/tests/json_schema_test_suite_test.rs index 9f83391..d108336 100644 --- a/tests/json_schema_test_suite_test.rs +++ b/tests/json_schema_test_suite_test.rs @@ -1,11 +1,11 @@ +use schemars::schema::Schema; use std::io::stderr; use json_schema_test_suite::{json_schema_test_suite, TestCase}; use json_schema_to_nickel::{ - definitions::Environment, predicates::Predicate, root_schema, wrap_predicate, + definitions::Environment, predicates::AsPredicate, root_schema, wrap_contract, }; use nickel_lang_core::{eval::cache::lazy::CBNCache, program::Program, term::RichTerm}; -use schemars::schema::Schema; use stringreader::StringReader; #[json_schema_test_suite("vendor/JSON-Schema-Test-Suite", "draft7", { @@ -34,11 +34,11 @@ fn translation_typecheck_test( let contract = if test_case.schema.is_object() { root_schema(&dbg!(serde_json::from_value(test_case.schema).unwrap())) } else { - wrap_predicate( + let schema: Schema = dbg!(serde_json::from_value(test_case.schema).unwrap()); + + wrap_contract( Environment::empty(), - Predicate::from(dbg!( - &serde_json::from_value::(test_case.schema).unwrap() - )), + schema.as_predicate(&mut Default::default()).into(), ) };