From 992b58f2baf4cfe8210d228f236f1cf6f0569ece Mon Sep 17 00:00:00 2001 From: David Lutterkort Date: Tue, 14 Apr 2020 09:01:48 -0700 Subject: [PATCH] store: Change the way we determine the type for 'id' fields We now take `ID` to be synonymous with `String` and allow any type as a primary key --- store/postgres/examples/layout.rs | 10 +- store/postgres/src/entities.rs | 12 +- store/postgres/src/relational.rs | 225 ++++++++++++++++------- store/postgres/tests/relational.rs | 11 +- store/postgres/tests/relational_bytes.rs | 13 +- 5 files changed, 171 insertions(+), 100 deletions(-) diff --git a/store/postgres/examples/layout.rs b/store/postgres/examples/layout.rs index 107e84f6b8b..0d52239a077 100644 --- a/store/postgres/examples/layout.rs +++ b/store/postgres/examples/layout.rs @@ -2,11 +2,10 @@ extern crate clap; extern crate graph_store_postgres; use clap::App; -use graphql_parser::parse_schema; use std::fs; use std::process::exit; -use graph::prelude::SubgraphDeploymentId; +use graph::prelude::{Schema, SubgraphDeploymentId}; use graph_store_postgres::relational::{ColumnType, Layout}; pub fn usage(msg: &str) -> ! { @@ -136,11 +135,12 @@ pub fn main() { let db_schema = args.value_of("db_schema").unwrap_or("rel"); let kind = args.value_of("generate").unwrap_or("ddl"); - let schema = ensure(fs::read_to_string(schema), "Can not read schema file"); - let schema = ensure(parse_schema(&schema), "Failed to parse schema"); let subgraph = SubgraphDeploymentId::new("Qmasubgraph").unwrap(); + let schema = ensure(fs::read_to_string(schema), "Can not read schema file"); + let schema = ensure(Schema::parse(&schema, subgraph), "Failed to parse schema"); + let layout = ensure( - Layout::new(&schema, subgraph, db_schema), + Layout::new(&schema, db_schema), "Failed to construct Mapping", ); match kind { diff --git a/store/postgres/src/entities.rs b/store/postgres/src/entities.rs index 83c80b26654..75ae80ae00a 100644 --- a/store/postgres/src/entities.rs +++ b/store/postgres/src/entities.rs @@ -792,13 +792,9 @@ impl Connection { self.conn.batch_execute(&*query)?; match *GRAPH_STORAGE_SCHEME { - v::Relational => Layout::create_relational_schema( - &self.conn, - &schema_name, - schema.id.clone(), - &schema.document, - ) - .map(|_| ()), + v::Relational => { + Layout::create_relational_schema(&self.conn, schema, schema_name).map(|_| ()) + } v::Split => create_split_schema(&self.conn, &schema_name), } } @@ -1291,7 +1287,7 @@ impl Storage { } V::Relational => { let subgraph_schema = store.input_schema(subgraph)?; - let layout = Layout::new(&subgraph_schema.document, subgraph.clone(), schema.name)?; + let layout = Layout::new(subgraph_schema.as_ref(), &schema.name)?; Storage::Relational(layout) } }; diff --git a/store/postgres/src/relational.rs b/store/postgres/src/relational.rs index 63f8f78863f..0a175e0e723 100644 --- a/store/postgres/src/relational.rs +++ b/store/postgres/src/relational.rs @@ -12,7 +12,7 @@ use graphql_parser::query as q; use graphql_parser::schema as s; use inflector::Inflector; use std::collections::{BTreeMap, HashMap, HashSet}; -use std::convert::From; +use std::convert::{From, TryFrom}; use std::fmt::{self, Write}; use std::str::FromStr; use std::sync::Arc; @@ -23,6 +23,7 @@ use crate::relational_queries::{ DeleteQuery, EntityData, FilterCollection, FilterQuery, FindManyQuery, FindQuery, InsertQuery, RevertClampQuery, RevertRemoveQuery, UpdateQuery, }; +use graph::data::graphql::ext::ObjectTypeExt; use graph::data::schema::{FulltextConfig, FulltextDefinition, Schema, SCHEMA_TYPE_NAME}; use graph::prelude::{ format_err, info, BlockNumber, Entity, EntityChange, EntityChangeOperation, EntityCollection, @@ -119,12 +120,43 @@ impl fmt::Display for SqlName { /// The SQL type to use for GraphQL ID properties. We support /// strings and byte arrays -#[derive(Copy, Clone, Debug, Eq, PartialEq)] +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] pub(crate) enum IdType { String, Bytes, } +impl TryFrom<&s::ObjectType> for IdType { + type Error = StoreError; + + fn try_from(obj_type: &s::ObjectType) -> Result { + let pk = obj_type + .field(&PRIMARY_KEY_COLUMN.to_owned()) + .expect("Each ObjectType has an `id` field"); + Self::try_from(&pk.field_type) + } +} + +impl TryFrom<&s::Type> for IdType { + type Error = StoreError; + + fn try_from(field_type: &s::Type) -> Result { + let name = named_type(field_type); + + match ValueType::from_str(name)? { + ValueType::String | ValueType::ID => Ok(IdType::String), + ValueType::Bytes => Ok(IdType::Bytes), + _ => Err(format_err!( + "The `id` field has type `{}` but only `String`, `Bytes`, and `ID` are allowed", + &name + ) + .into()), + } + } +} + +type IdTypeMap = HashMap; + type EnumMap = BTreeMap>; #[derive(Debug, Clone)] @@ -144,61 +176,101 @@ pub struct Layout { impl Layout { /// Generate a layout for a relational schema for entities in the - /// GraphQL schema `document`. Attributes of type `ID` will use the - /// SQL type `id_type`. The subgraph ID is passed in `subgraph`, and - /// the name of the database schema in which the subgraph's tables live - /// is in `schema`. - pub fn new( - document: &s::Document, - subgraph: SubgraphDeploymentId, - schema: V, - ) -> Result - where - V: Into, - { + /// GraphQL schema `schema`. The name of the database schema in which + /// the subgraph's tables live is in `schema`. + pub fn new(schema: &Schema, schema_name: &str) -> Result { use s::Definition::*; use s::TypeDefinition::*; - let schema = schema.into(); - SqlName::check_valid_identifier(&schema, "database schema")?; + SqlName::check_valid_identifier(schema_name, "database schema")?; - // Extract interfaces and tables - let mut tables = Vec::new(); - let mut enums = EnumMap::new(); - - for defn in &document.definitions { - match defn { - // Do not create a table for the _Schema_ type - TypeDefinition(Object(obj_type)) if obj_type.name.eq(SCHEMA_TYPE_NAME) => {} - TypeDefinition(Object(obj_type)) => { - tables.push(Table::new( - obj_type, - &schema, - Schema::entity_fulltext_definitions(&obj_type.name, document), - &enums, - tables.len() as u32, - )?); - } - TypeDefinition(Interface(_)) => { /* we do not care about interfaces */ } - TypeDefinition(Enum(enum_type)) => { - SqlName::check_valid_identifier(&enum_type.name, "enum")?; - enums.insert( - enum_type.name.clone(), - enum_type - .values - .iter() - .map(|value| value.name.to_owned()) - .collect(), - ); + // Extract enum types + let enums: EnumMap = schema + .document + .definitions + .iter() + .filter_map(|defn| { + if let TypeDefinition(Enum(enum_type)) = defn { + Some(enum_type) + } else { + None } - other => { - return Err(StoreError::Unknown(format_err!( - "can not handle {:?}", - other - ))) + }) + .map(|enum_type| -> Result<(String, Vec), StoreError> { + SqlName::check_valid_identifier(&enum_type.name, "enum")?; + Ok(( + enum_type.name.clone(), + enum_type + .values + .iter() + .map(|value| value.name.to_owned()) + .collect::>(), + )) + }) + .collect::>()?; + + // List of all object types that are not __SCHEMA__ + let object_types = schema + .document + .definitions + .iter() + .filter_map(|defn| { + if let TypeDefinition(Object(obj_type)) = defn { + Some(obj_type) + } else { + None } - } - } + }) + .filter(|obj_type| obj_type.name != SCHEMA_TYPE_NAME) + .collect::>(); + + // For interfaces, check that all implementors use the same IdType + // and build a list of name/IdType pairs + let id_types_for_interface = schema.types_for_interface.iter().map(|(interface, types)| { + types + .iter() + .map(|obj_type| IdType::try_from(obj_type)) + .collect::, _>>() + .and_then(move |types| { + if types.len() > 1 { + Err(format_err!( + "The implementations of interface \ + {} use different types for the `id` field", + interface + ) + .into()) + } else { + // For interfaces that are not implemented at all, pretend + // they have a String `id` field + let id_type = types.iter().next().cloned().unwrap_or(IdType::String); + Ok((interface.to_owned(), id_type)) + } + }) + }); + + // Map of type name to the type of the ID column for the object_types + // and interfaces in the schema + let id_types = object_types + .iter() + .map(|obj_type| IdType::try_from(*obj_type).map(|t| (obj_type.name.to_owned(), t))) + .chain(id_types_for_interface) + .collect::>()?; + + // Construct a Table struct for each ObjectType + let tables = object_types + .iter() + .enumerate() + .map(|(i, obj_type)| { + Table::new( + obj_type, + &schema_name, + Schema::entity_fulltext_definitions(&obj_type.name, &schema.document), + &enums, + &id_types, + i as u32, + ) + }) + .collect::, _>>()?; let tables: Vec<_> = tables.into_iter().map(|table| Arc::new(table)).collect(); @@ -207,7 +279,7 @@ impl Layout { .map(|table| { format!( "select count(*) from \"{}\".\"{}\" where upper_inf(block_range)", - schema, table.name + schema_name, table.name ) }) .collect::>() @@ -222,8 +294,8 @@ impl Layout { }); Ok(Layout { - subgraph, - schema, + subgraph: schema.id.clone(), + schema: schema_name.to_owned(), tables, enums, count_query, @@ -232,11 +304,10 @@ impl Layout { pub fn create_relational_schema( conn: &PgConnection, + schema: &Schema, schema_name: &str, - subgraph: SubgraphDeploymentId, - document: &s::Document, ) -> Result { - let layout = Self::new(document, subgraph, schema_name)?; + let layout = Self::new(schema, schema_name)?; let sql = layout .as_ddl() .map_err(|_| StoreError::Unknown(format_err!("failed to generate DDL for layout")))?; @@ -660,9 +731,15 @@ impl ColumnType { field_type: &q::Type, schema: &str, enums: &EnumMap, + id_types: &IdTypeMap, ) -> Result { let name = named_type(field_type); + // See if its an object type defined in the schema + if let Some(id_type) = id_types.get(name) { + return Ok(id_type.clone().into()); + } + // Check if it's an enum, and if it is, return an appropriate // ColumnType::Enum if enums.contains_key(&*name) { @@ -674,10 +751,9 @@ impl ColumnType { ))); } - // It is not an enum, and therefore one of our builtin primitive types - // or a reference to another type. For the latter, we use `ValueType::ID` - // as the underlying type - match ValueType::from_str(name).unwrap_or(ValueType::ID) { + // It is not an object type or an enum, and therefore one of our + // builtin primitive types + match ValueType::from_str(name)? { ValueType::Boolean => Ok(ColumnType::Boolean), ValueType::BigDecimal => Ok(ColumnType::BigDecimal), ValueType::BigInt => Ok(ColumnType::BigInt), @@ -710,7 +786,10 @@ impl ColumnType { match self { ColumnType::String => IdType::String, ColumnType::BytesId => IdType::Bytes, - _ => unreachable!("only String and Bytes are allowed as primary keys"), + _ => unreachable!( + "only String and BytesId are allowed as primary keys but not {:?}", + self + ), } } } @@ -725,14 +804,24 @@ pub struct Column { } impl Column { - fn new(field: &s::Field, schema: &str, enums: &EnumMap) -> Result { + fn new( + field: &s::Field, + schema: &str, + enums: &EnumMap, + id_types: &IdTypeMap, + ) -> Result { SqlName::check_valid_identifier(&*field.name, "attribute")?; let sql_name = SqlName::from(&*field.name); + let column_type = if sql_name.as_str() == PRIMARY_KEY_COLUMN { + IdType::try_from(&field.field_type)?.into() + } else { + ColumnType::from_field_type(&field.field_type, schema, enums, id_types)? + }; Ok(Column { name: sql_name, field: field.name.clone(), - column_type: ColumnType::from_field_type(&field.field_type, schema, enums)?, + column_type, field_type: field.field_type.clone(), fulltext_fields: None, }) @@ -851,6 +940,7 @@ impl Table { schema: &str, fulltexts: Vec, enums: &EnumMap, + id_types: &IdTypeMap, position: u32, ) -> Result { SqlName::check_valid_identifier(&*defn.name, "object")?; @@ -860,7 +950,7 @@ impl Table { .fields .iter() .filter(|field| !derived_column(field)) - .map(|field| Column::new(field, schema, enums)) + .map(|field| Column::new(field, schema, enums, id_types)) .chain(fulltexts.iter().map(|def| Column::new_fulltext(def))) .collect::, StoreError>>()?; @@ -991,14 +1081,13 @@ fn derived_column(field: &s::Field) -> bool { #[cfg(test)] mod tests { use super::*; - use graphql_parser::parse_schema; const ID_TYPE: ColumnType = ColumnType::String; fn test_layout(gql: &str) -> Layout { - let schema = parse_schema(gql).expect("Test schema invalid"); let subgraph = SubgraphDeploymentId::new("subgraph").unwrap(); - Layout::new(&schema, subgraph, "rel").expect("Failed to construct Layout") + let schema = Schema::parse(gql, subgraph).expect("Test schema invalid"); + Layout::new(&schema, "rel").expect("Failed to construct Layout") } #[test] diff --git a/store/postgres/tests/relational.rs b/store/postgres/tests/relational.rs index 6587c297700..1b3f78e7c7a 100644 --- a/store/postgres/tests/relational.rs +++ b/store/postgres/tests/relational.rs @@ -300,15 +300,8 @@ fn insert_test_data(conn: &PgConnection) -> Layout { let query = format!("create schema {}", SCHEMA_NAME); conn.batch_execute(&*query).unwrap(); - let layout = Layout::create_relational_schema( - &conn, - SCHEMA_NAME, - THINGS_SUBGRAPH_ID.clone(), - &schema.document, - ) - .expect("Failed to create relational schema"); - - layout + Layout::create_relational_schema(&conn, &schema, SCHEMA_NAME) + .expect("Failed to create relational schema") } fn scrub(entity: &Entity) -> Entity { diff --git a/store/postgres/tests/relational_bytes.rs b/store/postgres/tests/relational_bytes.rs index 9e5cf582d42..80e20255d05 100644 --- a/store/postgres/tests/relational_bytes.rs +++ b/store/postgres/tests/relational_bytes.rs @@ -82,15 +82,8 @@ fn create_schema(conn: &PgConnection) -> Layout { let query = format!("create schema {}", SCHEMA_NAME); conn.batch_execute(&*query).unwrap(); - let layout = Layout::create_relational_schema( - &conn, - SCHEMA_NAME, - THINGS_SUBGRAPH_ID.clone(), - &schema.document, - ) - .expect("Failed to create relational schema"); - - layout + Layout::create_relational_schema(&conn, &schema, SCHEMA_NAME) + .expect("Failed to create relational schema") } fn scrub(entity: &Entity) -> Entity { @@ -181,7 +174,7 @@ fn bad_id() { let res = layout.find(conn, "Thing", "\\xbadd", BLOCK_NUMBER_MAX); assert!(res.is_err()); assert_eq!( - "store error: Invalid character \'\\\' at position 0", + "store error: Invalid character \'\\\\\' at position 0", res.err().unwrap().to_string() );