From 52b033c4d1c5d1fafd91b35bfa620c9d3bd6e492 Mon Sep 17 00:00:00 2001 From: Samuele Maci Date: Fri, 22 May 2020 18:55:20 +0100 Subject: [PATCH] perf: Use bitmap to validate multiple types Bitmaps allow lower memory footprint and faster checks as we remove vec iterations during validation --- src/error.rs | 12 ++-- src/keywords/legacy/type_draft_4.rs | 35 +++++----- src/keywords/type_.rs | 34 +++++----- src/primitive_type.rs | 99 ++++++++++++++++++++++++++++- 4 files changed, 135 insertions(+), 45 deletions(-) diff --git a/src/error.rs b/src/error.rs index 8539f772..26f39c0a 100644 --- a/src/error.rs +++ b/src/error.rs @@ -1,4 +1,4 @@ -use crate::primitive_type::PrimitiveType; +use crate::primitive_type::{PrimitiveType, PrimitiveTypesBitMap}; use serde_json::Value; use std::{ borrow::Cow, @@ -131,7 +131,7 @@ pub enum ValidationErrorKind { #[derive(Debug)] pub enum TypeKind { Single(PrimitiveType), - Multiple(Vec), + Multiple(PrimitiveTypesBitMap), } /// Shortcuts for creation of specific error kinds. @@ -340,7 +340,7 @@ impl<'a> ValidationError<'a> { } pub(crate) fn multiple_type_error( instance: &'a Value, - types: Vec, + types: PrimitiveTypesBitMap, ) -> ValidationError<'a> { ValidationError { instance: Cow::Borrowed(instance), @@ -571,7 +571,7 @@ impl<'a> fmt::Display for ValidationError<'a> { "'{}' is not of types {}", self.instance, types - .iter() + .into_iter() .map(|t| format!("'{}'", t)) .collect::>() .join(", ") @@ -598,9 +598,9 @@ mod tests { let instance = json!(42); let err = ValidationError::multiple_type_error( &instance, - vec![PrimitiveType::String, PrimitiveType::Number], + vec![PrimitiveType::String, PrimitiveType::Number].into(), ); let repr = format!("{}", err); - assert_eq!(repr, "'42' is not of types 'string', 'number'") + assert_eq!(repr, "'42' is not of types 'number', 'string'") } } diff --git a/src/keywords/legacy/type_draft_4.rs b/src/keywords/legacy/type_draft_4.rs index 3da46312..d0627ab6 100644 --- a/src/keywords/legacy/type_draft_4.rs +++ b/src/keywords/legacy/type_draft_4.rs @@ -2,24 +2,24 @@ use super::super::{type_, CompilationResult, Validate}; use crate::{ compilation::{CompilationContext, JSONSchema}, error::{error, no_error, CompilationError, ErrorIterator, ValidationError}, - primitive_type::PrimitiveType, + primitive_type::{PrimitiveType, PrimitiveTypesBitMap}, }; use serde_json::{Map, Number, Value}; use std::convert::TryFrom; pub struct MultipleTypesValidator { - types: Vec, + types: PrimitiveTypesBitMap, } impl MultipleTypesValidator { #[inline] pub(crate) fn compile(items: &[Value]) -> CompilationResult { - let mut types = Vec::with_capacity(items.len()); + let mut types = PrimitiveTypesBitMap::new(); for item in items { match item { Value::String(string) => { if let Ok(primitive_type) = PrimitiveType::try_from(string.as_str()) { - types.push(primitive_type) + types |= primitive_type; } else { return Err(CompilationError::SchemaError); } @@ -38,32 +38,29 @@ impl Validate for MultipleTypesValidator { } else { error(ValidationError::multiple_type_error( instance, - self.types.clone(), + self.types, )) } } fn is_valid(&self, _: &JSONSchema, instance: &Value) -> bool { - for type_ in &self.types { - match (type_, instance) { - (PrimitiveType::Integer, Value::Number(num)) if is_integer(num) => return true, - (PrimitiveType::Null, Value::Null) - | (PrimitiveType::Boolean, Value::Bool(_)) - | (PrimitiveType::String, Value::String(_)) - | (PrimitiveType::Array, Value::Array(_)) - | (PrimitiveType::Object, Value::Object(_)) - | (PrimitiveType::Number, Value::Number(_)) => return true, - (_, _) => continue, - }; + match instance { + Value::Array(_) => self.types.contains_type(&PrimitiveType::Array), + Value::Bool(_) => self.types.contains_type(&PrimitiveType::Boolean), + Value::Null => self.types.contains_type(&PrimitiveType::Null), + Value::Number(num) => { + self.types.contains_type(&PrimitiveType::Number) + || (self.types.contains_type(&PrimitiveType::Integer) && is_integer(num)) + } + Value::Object(_) => self.types.contains_type(&PrimitiveType::Object), + Value::String(_) => self.types.contains_type(&PrimitiveType::String), } - false } fn name(&self) -> String { format!( "type: [{}]", self.types - .iter() - .map(|type_| format!("{}", type_)) + .into_iter().map(|type_| format!("{}", type_)) .collect::>() .join(", ") ) diff --git a/src/keywords/type_.rs b/src/keywords/type_.rs index fc6b0c3a..5867f844 100644 --- a/src/keywords/type_.rs +++ b/src/keywords/type_.rs @@ -2,24 +2,24 @@ use super::{CompilationResult, Validate}; use crate::{ compilation::{CompilationContext, JSONSchema}, error::{error, no_error, CompilationError, ErrorIterator, ValidationError}, - primitive_type::PrimitiveType, + primitive_type::{PrimitiveType, PrimitiveTypesBitMap}, }; use serde_json::{Map, Number, Value}; use std::convert::TryFrom; pub struct MultipleTypesValidator { - types: Vec, + types: PrimitiveTypesBitMap, } impl MultipleTypesValidator { #[inline] pub(crate) fn compile(items: &[Value]) -> CompilationResult { - let mut types = Vec::with_capacity(items.len()); + let mut types = PrimitiveTypesBitMap::new(); for item in items { match item { Value::String(string) => { if let Ok(primitive_type) = PrimitiveType::try_from(string.as_str()) { - types.push(primitive_type) + types |= primitive_type; } else { return Err(CompilationError::SchemaError); } @@ -38,31 +38,29 @@ impl Validate for MultipleTypesValidator { } else { error(ValidationError::multiple_type_error( instance, - self.types.clone(), + self.types, )) } } fn is_valid(&self, _: &JSONSchema, instance: &Value) -> bool { - for type_ in &self.types { - match (type_, instance) { - (PrimitiveType::Integer, Value::Number(num)) if is_integer(num) => return true, - (PrimitiveType::Null, Value::Null) - | (PrimitiveType::Boolean, Value::Bool(_)) - | (PrimitiveType::String, Value::String(_)) - | (PrimitiveType::Array, Value::Array(_)) - | (PrimitiveType::Object, Value::Object(_)) - | (PrimitiveType::Number, Value::Number(_)) => return true, - (_, _) => continue, - }; + match instance { + Value::Array(_) => self.types.contains_type(&PrimitiveType::Array), + Value::Bool(_) => self.types.contains_type(&PrimitiveType::Boolean), + Value::Null => self.types.contains_type(&PrimitiveType::Null), + Value::Number(num) => { + self.types.contains_type(&PrimitiveType::Number) + || (self.types.contains_type(&PrimitiveType::Integer) && is_integer(num)) + } + Value::Object(_) => self.types.contains_type(&PrimitiveType::Object), + Value::String(_) => self.types.contains_type(&PrimitiveType::String), } - false } fn name(&self) -> String { format!( "type: [{}]", self.types - .iter() + .into_iter() .map(|type_| format!("{}", type_)) .collect::>() .join(", ") diff --git a/src/primitive_type.rs b/src/primitive_type.rs index 090cdb83..3ecbbe75 100644 --- a/src/primitive_type.rs +++ b/src/primitive_type.rs @@ -1,8 +1,8 @@ -use std::{convert::TryFrom, fmt}; +use std::{convert::TryFrom, fmt, ops::BitOrAssign}; /// For faster error handling in "type" keyword validator we have this enum, to match /// with it instead of a string. -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Copy)] pub enum PrimitiveType { Array, Boolean, @@ -30,6 +30,7 @@ impl fmt::Display for PrimitiveType { impl TryFrom<&str> for PrimitiveType { type Error = (); + #[inline] fn try_from(value: &str) -> Result { match value { "array" => Ok(PrimitiveType::Array), @@ -43,3 +44,97 @@ impl TryFrom<&str> for PrimitiveType { } } } + +#[inline(always)] +fn primitive_type_to_bit_map_representation(primitive_type: &PrimitiveType) -> u8 { + match primitive_type { + PrimitiveType::Array => 1, + PrimitiveType::Boolean => 2, + PrimitiveType::Integer => 4, + PrimitiveType::Null => 8, + PrimitiveType::Number => 16, + PrimitiveType::Object => 32, + PrimitiveType::String => 64, + } +} + +#[inline(always)] +fn bit_map_representation_primitive_type(bit_representation: u8) -> PrimitiveType { + match bit_representation { + 1 => PrimitiveType::Array, + 2 => PrimitiveType::Boolean, + 4 => PrimitiveType::Integer, + 8 => PrimitiveType::Null, + 16 => PrimitiveType::Number, + 32 => PrimitiveType::Object, + 64 => PrimitiveType::String, + _ => unreachable!("This shoud never be possible") + } +} + +#[derive(Clone, Copy, Debug)] +pub struct PrimitiveTypesBitMap { + inner: u8, +} +impl PrimitiveTypesBitMap { + pub(crate) const fn new() -> Self { + Self { inner: 0 } + } + + #[inline] + pub(crate) fn add_type(mut self, primitive_type: &PrimitiveType) -> Self { + self.inner |= primitive_type_to_bit_map_representation(primitive_type); + self + } + + #[inline(always)] + pub(crate) fn contains_type(&self, primitive_type: &PrimitiveType) -> bool { + primitive_type_to_bit_map_representation(primitive_type) & self.inner != 0 + } +} +impl BitOrAssign for PrimitiveTypesBitMap { + #[inline] + fn bitor_assign(&mut self, rhs: PrimitiveType) { + *self = self.add_type(&rhs); + } +} +impl IntoIterator for PrimitiveTypesBitMap { + type Item = PrimitiveType; + type IntoIter = PrimitiveTypesBitMapIterator; + fn into_iter(self) -> Self::IntoIter { + PrimitiveTypesBitMapIterator { + range: 1..7, + bit_map: self + } + } +} +#[cfg(test)] +impl From> for PrimitiveTypesBitMap { + fn from(value: Vec) -> Self { + let mut result = Self::new(); + for primitive_type in value { + result |= primitive_type; + } + result + } +} + +pub struct PrimitiveTypesBitMapIterator { + range: std::ops::Range, + bit_map: PrimitiveTypesBitMap, +} +impl Iterator for PrimitiveTypesBitMapIterator { + type Item = PrimitiveType; + fn next(&mut self) -> Option { + loop { + if let Some(value) = self.range.next() { + let bit_value = 1<