Skip to content

Commit

Permalink
perf: Use bitmap to validate multiple types
Browse files Browse the repository at this point in the history
Bitmaps allow lower memory footprint and faster checks as we remove vec iterations during validation
  • Loading branch information
macisamuele authored and Stranger6667 committed May 22, 2020
1 parent 97b45d8 commit 52b033c
Show file tree
Hide file tree
Showing 4 changed files with 135 additions and 45 deletions.
12 changes: 6 additions & 6 deletions src/error.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use crate::primitive_type::PrimitiveType;
use crate::primitive_type::{PrimitiveType, PrimitiveTypesBitMap};
use serde_json::Value;
use std::{
borrow::Cow,
Expand Down Expand Up @@ -131,7 +131,7 @@ pub enum ValidationErrorKind {
#[derive(Debug)]
pub enum TypeKind {
Single(PrimitiveType),
Multiple(Vec<PrimitiveType>),
Multiple(PrimitiveTypesBitMap),
}

/// Shortcuts for creation of specific error kinds.
Expand Down Expand Up @@ -340,7 +340,7 @@ impl<'a> ValidationError<'a> {
}
pub(crate) fn multiple_type_error(
instance: &'a Value,
types: Vec<PrimitiveType>,
types: PrimitiveTypesBitMap,
) -> ValidationError<'a> {
ValidationError {
instance: Cow::Borrowed(instance),
Expand Down Expand Up @@ -571,7 +571,7 @@ impl<'a> fmt::Display for ValidationError<'a> {
"'{}' is not of types {}",
self.instance,
types
.iter()
.into_iter()
.map(|t| format!("'{}'", t))
.collect::<Vec<String>>()
.join(", ")
Expand All @@ -598,9 +598,9 @@ mod tests {
let instance = json!(42);
let err = ValidationError::multiple_type_error(
&instance,
vec![PrimitiveType::String, PrimitiveType::Number],
vec![PrimitiveType::String, PrimitiveType::Number].into(),
);
let repr = format!("{}", err);
assert_eq!(repr, "'42' is not of types 'string', 'number'")
assert_eq!(repr, "'42' is not of types 'number', 'string'")
}
}
35 changes: 16 additions & 19 deletions src/keywords/legacy/type_draft_4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@ use super::super::{type_, CompilationResult, Validate};
use crate::{
compilation::{CompilationContext, JSONSchema},
error::{error, no_error, CompilationError, ErrorIterator, ValidationError},
primitive_type::PrimitiveType,
primitive_type::{PrimitiveType, PrimitiveTypesBitMap},
};
use serde_json::{Map, Number, Value};
use std::convert::TryFrom;

pub struct MultipleTypesValidator {
types: Vec<PrimitiveType>,
types: PrimitiveTypesBitMap,
}

impl MultipleTypesValidator {
#[inline]
pub(crate) fn compile(items: &[Value]) -> CompilationResult {
let mut types = Vec::with_capacity(items.len());
let mut types = PrimitiveTypesBitMap::new();
for item in items {
match item {
Value::String(string) => {
if let Ok(primitive_type) = PrimitiveType::try_from(string.as_str()) {
types.push(primitive_type)
types |= primitive_type;
} else {
return Err(CompilationError::SchemaError);
}
Expand All @@ -38,32 +38,29 @@ impl Validate for MultipleTypesValidator {
} else {
error(ValidationError::multiple_type_error(
instance,
self.types.clone(),
self.types,
))
}
}
fn is_valid(&self, _: &JSONSchema, instance: &Value) -> bool {
for type_ in &self.types {
match (type_, instance) {
(PrimitiveType::Integer, Value::Number(num)) if is_integer(num) => return true,
(PrimitiveType::Null, Value::Null)
| (PrimitiveType::Boolean, Value::Bool(_))
| (PrimitiveType::String, Value::String(_))
| (PrimitiveType::Array, Value::Array(_))
| (PrimitiveType::Object, Value::Object(_))
| (PrimitiveType::Number, Value::Number(_)) => return true,
(_, _) => continue,
};
match instance {
Value::Array(_) => self.types.contains_type(&PrimitiveType::Array),
Value::Bool(_) => self.types.contains_type(&PrimitiveType::Boolean),
Value::Null => self.types.contains_type(&PrimitiveType::Null),
Value::Number(num) => {
self.types.contains_type(&PrimitiveType::Number)
|| (self.types.contains_type(&PrimitiveType::Integer) && is_integer(num))
}
Value::Object(_) => self.types.contains_type(&PrimitiveType::Object),
Value::String(_) => self.types.contains_type(&PrimitiveType::String),
}
false
}

fn name(&self) -> String {
format!(
"type: [{}]",
self.types
.iter()
.map(|type_| format!("{}", type_))
.into_iter().map(|type_| format!("{}", type_))
.collect::<Vec<String>>()
.join(", ")
)
Expand Down
34 changes: 16 additions & 18 deletions src/keywords/type_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@ use super::{CompilationResult, Validate};
use crate::{
compilation::{CompilationContext, JSONSchema},
error::{error, no_error, CompilationError, ErrorIterator, ValidationError},
primitive_type::PrimitiveType,
primitive_type::{PrimitiveType, PrimitiveTypesBitMap},
};
use serde_json::{Map, Number, Value};
use std::convert::TryFrom;

pub struct MultipleTypesValidator {
types: Vec<PrimitiveType>,
types: PrimitiveTypesBitMap,
}

impl MultipleTypesValidator {
#[inline]
pub(crate) fn compile(items: &[Value]) -> CompilationResult {
let mut types = Vec::with_capacity(items.len());
let mut types = PrimitiveTypesBitMap::new();
for item in items {
match item {
Value::String(string) => {
if let Ok(primitive_type) = PrimitiveType::try_from(string.as_str()) {
types.push(primitive_type)
types |= primitive_type;
} else {
return Err(CompilationError::SchemaError);
}
Expand All @@ -38,31 +38,29 @@ impl Validate for MultipleTypesValidator {
} else {
error(ValidationError::multiple_type_error(
instance,
self.types.clone(),
self.types,
))
}
}
fn is_valid(&self, _: &JSONSchema, instance: &Value) -> bool {
for type_ in &self.types {
match (type_, instance) {
(PrimitiveType::Integer, Value::Number(num)) if is_integer(num) => return true,
(PrimitiveType::Null, Value::Null)
| (PrimitiveType::Boolean, Value::Bool(_))
| (PrimitiveType::String, Value::String(_))
| (PrimitiveType::Array, Value::Array(_))
| (PrimitiveType::Object, Value::Object(_))
| (PrimitiveType::Number, Value::Number(_)) => return true,
(_, _) => continue,
};
match instance {
Value::Array(_) => self.types.contains_type(&PrimitiveType::Array),
Value::Bool(_) => self.types.contains_type(&PrimitiveType::Boolean),
Value::Null => self.types.contains_type(&PrimitiveType::Null),
Value::Number(num) => {
self.types.contains_type(&PrimitiveType::Number)
|| (self.types.contains_type(&PrimitiveType::Integer) && is_integer(num))
}
Value::Object(_) => self.types.contains_type(&PrimitiveType::Object),
Value::String(_) => self.types.contains_type(&PrimitiveType::String),
}
false
}

fn name(&self) -> String {
format!(
"type: [{}]",
self.types
.iter()
.into_iter()
.map(|type_| format!("{}", type_))
.collect::<Vec<String>>()
.join(", ")
Expand Down
99 changes: 97 additions & 2 deletions src/primitive_type.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::{convert::TryFrom, fmt};
use std::{convert::TryFrom, fmt, ops::BitOrAssign};

/// For faster error handling in "type" keyword validator we have this enum, to match
/// with it instead of a string.
#[derive(Debug, Clone)]
#[derive(Debug, Clone, Copy)]
pub enum PrimitiveType {
Array,
Boolean,
Expand Down Expand Up @@ -30,6 +30,7 @@ impl fmt::Display for PrimitiveType {
impl TryFrom<&str> for PrimitiveType {
type Error = ();

#[inline]
fn try_from(value: &str) -> Result<Self, Self::Error> {
match value {
"array" => Ok(PrimitiveType::Array),
Expand All @@ -43,3 +44,97 @@ impl TryFrom<&str> for PrimitiveType {
}
}
}

#[inline(always)]
fn primitive_type_to_bit_map_representation(primitive_type: &PrimitiveType) -> u8 {
match primitive_type {
PrimitiveType::Array => 1,
PrimitiveType::Boolean => 2,
PrimitiveType::Integer => 4,
PrimitiveType::Null => 8,
PrimitiveType::Number => 16,
PrimitiveType::Object => 32,
PrimitiveType::String => 64,
}
}

#[inline(always)]
fn bit_map_representation_primitive_type(bit_representation: u8) -> PrimitiveType {
match bit_representation {
1 => PrimitiveType::Array,
2 => PrimitiveType::Boolean,
4 => PrimitiveType::Integer,
8 => PrimitiveType::Null,
16 => PrimitiveType::Number,
32 => PrimitiveType::Object,
64 => PrimitiveType::String,
_ => unreachable!("This shoud never be possible")
}
}

#[derive(Clone, Copy, Debug)]
pub struct PrimitiveTypesBitMap {
inner: u8,
}
impl PrimitiveTypesBitMap {
pub(crate) const fn new() -> Self {
Self { inner: 0 }
}

#[inline]
pub(crate) fn add_type(mut self, primitive_type: &PrimitiveType) -> Self {
self.inner |= primitive_type_to_bit_map_representation(primitive_type);
self
}

#[inline(always)]
pub(crate) fn contains_type(&self, primitive_type: &PrimitiveType) -> bool {
primitive_type_to_bit_map_representation(primitive_type) & self.inner != 0
}
}
impl BitOrAssign<PrimitiveType> for PrimitiveTypesBitMap {
#[inline]
fn bitor_assign(&mut self, rhs: PrimitiveType) {
*self = self.add_type(&rhs);
}
}
impl IntoIterator for PrimitiveTypesBitMap {
type Item = PrimitiveType;
type IntoIter = PrimitiveTypesBitMapIterator;
fn into_iter(self) -> Self::IntoIter {
PrimitiveTypesBitMapIterator {
range: 1..7,
bit_map: self
}
}
}
#[cfg(test)]
impl From<Vec<PrimitiveType>> for PrimitiveTypesBitMap {
fn from(value: Vec<PrimitiveType>) -> Self {
let mut result = Self::new();
for primitive_type in value {
result |= primitive_type;
}
result
}
}

pub struct PrimitiveTypesBitMapIterator {
range: std::ops::Range<u8>,
bit_map: PrimitiveTypesBitMap,
}
impl Iterator for PrimitiveTypesBitMapIterator {
type Item = PrimitiveType;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(value) = self.range.next() {
let bit_value = 1<<value;
if self.bit_map.inner & bit_value != 0 {
return Some(bit_map_representation_primitive_type(bit_value))
}
} else {
return None;
}
}
}
}

0 comments on commit 52b033c

Please sign in to comment.