diff --git a/chronicle-etl.gemspec b/chronicle-etl.gemspec index 13b7444..5aa3588 100644 --- a/chronicle-etl.gemspec +++ b/chronicle-etl.gemspec @@ -38,6 +38,7 @@ Gem::Specification.new do |spec| spec.required_ruby_version = ">= 2.7" spec.add_dependency "activesupport", "~> 7.0" + spec.add_dependency "chronicle-core", "~> 0.2.1" spec.add_dependency "chronic_duration", "~> 0.10.6" spec.add_dependency "colorize", "~> 0.8.1" spec.add_dependency "gems", ">= 1" diff --git a/lib/chronicle/etl.rb b/lib/chronicle/etl.rb index 155f1c5..8592edc 100644 --- a/lib/chronicle/etl.rb +++ b/lib/chronicle/etl.rb @@ -1,3 +1,5 @@ +require 'chronicle/schema' + require_relative 'etl/registry/registry' require_relative 'etl/authorizer' require_relative 'etl/config' @@ -9,14 +11,8 @@ require_relative 'etl/job_logger' require_relative 'etl/job' require_relative 'etl/logger' -require_relative 'etl/models/activity' -require_relative 'etl/models/attachment' -require_relative 'etl/models/base' -require_relative 'etl/models/raw' -require_relative 'etl/models/entity' require_relative 'etl/runner' require_relative 'etl/secrets' -require_relative 'etl/serializers/serializer' require_relative 'etl/utils/binary_attachments' require_relative 'etl/utils/hash_utilities' require_relative 'etl/utils/text_recognition' diff --git a/lib/chronicle/etl/exceptions.rb b/lib/chronicle/etl/exceptions.rb index 63601ce..52507de 100644 --- a/lib/chronicle/etl/exceptions.rb +++ b/lib/chronicle/etl/exceptions.rb @@ -51,8 +51,6 @@ class ProviderConnectorNotAvailableError < ConnectorNotAvailableError; end class ExtractionError < Error; end - class SerializationError < Error; end - class TransformationError < Error; end class UntransformableRecordError < TransformationError; end diff --git a/lib/chronicle/etl/loaders/json_loader.rb b/lib/chronicle/etl/loaders/json_loader.rb index 5d3e013..50b96cf 100644 --- a/lib/chronicle/etl/loaders/json_loader.rb +++ b/lib/chronicle/etl/loaders/json_loader.rb @@ -72,7 +72,8 @@ def finish # TODO: implement this def serializer - @config.serializer || Chronicle::ETL::RawSerializer + require 'chronicle/serialization' + @config.serializer || Chronicle::Serialization::HashSerializer end end end diff --git a/lib/chronicle/etl/loaders/rest_loader.rb b/lib/chronicle/etl/loaders/rest_loader.rb index ff5d984..011902a 100644 --- a/lib/chronicle/etl/loaders/rest_loader.rb +++ b/lib/chronicle/etl/loaders/rest_loader.rb @@ -1,6 +1,7 @@ require 'net/http' require 'uri' require 'json' +require 'chronicle/serialization' module Chronicle module ETL @@ -14,7 +15,7 @@ class RestLoader < Chronicle::ETL::Loader setting :access_token def load(record) - payload = Chronicle::ETL::JSONAPISerializer.serialize(record) + payload = Chronicle::Serialization::JSONAPISerializer.serialize(record) # have the outer data key that json-api expects payload = { data: payload } unless payload[:data] diff --git a/lib/chronicle/etl/loaders/table_loader.rb b/lib/chronicle/etl/loaders/table_loader.rb index d09a060..8097d4d 100644 --- a/lib/chronicle/etl/loaders/table_loader.rb +++ b/lib/chronicle/etl/loaders/table_loader.rb @@ -11,7 +11,7 @@ class TableLoader < Chronicle::ETL::Loader setting :truncate_values_at, default: 40 setting :table_renderer, default: :basic - setting :fields_exclude, default: ['lids', 'type'] + setting :fields_exclude, default: ['type'] setting :header_row, default: true def load(record) diff --git a/lib/chronicle/etl/models/activity.rb b/lib/chronicle/etl/models/activity.rb deleted file mode 100644 index dacbf94..0000000 --- a/lib/chronicle/etl/models/activity.rb +++ /dev/null @@ -1,15 +0,0 @@ -require 'chronicle/etl/models/base' - -module Chronicle - module ETL - module Models - class Activity < Chronicle::ETL::Models::Base - TYPE = 'activities'.freeze - ATTRIBUTES = [:verb, :start_at, :end_at].freeze - ASSOCIATIONS = [:involved, :actor].freeze - - attr_accessor(*ATTRIBUTES, *ASSOCIATIONS) - end - end - end -end diff --git a/lib/chronicle/etl/models/attachment.rb b/lib/chronicle/etl/models/attachment.rb deleted file mode 100644 index 07fbf40..0000000 --- a/lib/chronicle/etl/models/attachment.rb +++ /dev/null @@ -1,14 +0,0 @@ -require 'chronicle/etl/models/base' - -module Chronicle - module ETL - module Models - class Attachment < Chronicle::ETL::Models::Base - TYPE = 'attachments'.freeze - ATTRIBUTES = [:url_original, :data].freeze - - attr_accessor(*ATTRIBUTES) - end - end - end -end diff --git a/lib/chronicle/etl/models/base.rb b/lib/chronicle/etl/models/base.rb deleted file mode 100644 index fe0ca05..0000000 --- a/lib/chronicle/etl/models/base.rb +++ /dev/null @@ -1,122 +0,0 @@ -require 'digest' - -module Chronicle - module ETL - module Models - # Represents a record that's been transformed by a Transformer and - # ready to be loaded. Loosely based on ActiveModel. - # - # @todo Experiment with just mixing in ActiveModel instead of this - # this reimplementation - class Base - ATTRIBUTES = [:provider, :provider_id, :provider_namespace, :lat, :lng, :metadata].freeze - ASSOCIATIONS = [].freeze - - attr_accessor(:id, :dedupe_on, *ATTRIBUTES) - - def initialize(attributes = {}) - assign_attributes(attributes) if attributes - @dedupe_on = [] - @metadata = {} - end - - # A unique identifier for this model is formed from a type - # and either an id or lids. - def identifier_hash - { - type: self.class::TYPE, - id: @id, - lids: lids - }.compact - end - - # Array of local ids that uniquely identify this record - def lids - @dedupe_on.map do |fields| - generate_lid(fields) - end.compact.uniq - end - - # For a given set of fields of this model, generate a - # unique local id by hashing the field values - def generate_lid fields - raise ArgumentError.new("Must provide an array of symbolized fields") unless fields.is_a?(Array) - - values = fields.sort.map do |field| - instance_variable = "@#{field.to_s}" - self.instance_variable_get(instance_variable) - end - - return if values.any? { |e| e.nil? } - - Digest::SHA256.hexdigest(values.join(",")) - end - - # Set of attribute names that this model has is Base's shared - # attributes combined with the child class's - def attribute_list - (ATTRIBUTES + self.class::ATTRIBUTES).uniq - end - - # All of this record's attributes - def attributes - attributes = {} - attribute_list.each do |attribute| - instance_variable = "@#{attribute.to_s}" - attributes[attribute] = self.instance_variable_get(instance_variable) - end - attributes.compact - end - - # All of this record's associations - def associations - association_list = ASSOCIATIONS + self.class::ASSOCIATIONS - attributes = {} - association_list.each do |attribute| - instance_variable = "@#{attribute.to_s}" - association = self.instance_variable_get(instance_variable) - attributes[attribute] = association if association - end - attributes.compact - end - - def associations_hash - associations.map do |k, v| - if v.is_a?(Array) - [k, v.map(&:to_h)] - else - [k, v.to_h] - end - end.to_h - end - - def meta_hash - { - meta: { - dedupe_on: @dedupe_on.map{|d| d.map(&:to_s).join(",")} - } - } - end - - # FIXME: move this to a Utils module - def to_h_flattened - Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h) - end - - def to_h - identifier_hash - .merge(attributes) - .merge(associations_hash) - .merge(meta_hash) - end - - def assign_attributes attributes - attributes.each do |k, v| - setter = :"#{k}=" - public_send(setter, v) if respond_to? setter - end - end - end - end - end -end diff --git a/lib/chronicle/etl/models/entity.rb b/lib/chronicle/etl/models/entity.rb deleted file mode 100644 index 3b6efa4..0000000 --- a/lib/chronicle/etl/models/entity.rb +++ /dev/null @@ -1,29 +0,0 @@ -require 'chronicle/etl/models/base' - -module Chronicle - module ETL - module Models - class Entity < Chronicle::ETL::Models::Base - TYPE = 'entities'.freeze - ATTRIBUTES = [:title, :body, :provider_url, :represents, :slug, :myself, :metadata].freeze - - # TODO: This desperately needs a validation system - ASSOCIATIONS = [ - :involvements, # inverse of activity's `involved` - :analogous, - :attachments, - :abouts, - :aboutables, # inverse of above - :depicts, - :consumers, - :creators, - :creations, - :contains, - :containers # inverse of above - ].freeze # TODO: add these to reflect Chronicle Schema - - attr_accessor(*ATTRIBUTES, *ASSOCIATIONS) - end - end - end -end diff --git a/lib/chronicle/etl/models/raw.rb b/lib/chronicle/etl/models/raw.rb deleted file mode 100644 index 54f4659..0000000 --- a/lib/chronicle/etl/models/raw.rb +++ /dev/null @@ -1,26 +0,0 @@ -require 'chronicle/etl/models/base' - -module Chronicle - module ETL - module Models - # A record from an extraction with no processing or normalization applied - class Raw - TYPE = 'raw' - - attr_accessor :raw_data - - def initialize(raw_data) - @raw_data = raw_data - end - - def to_h - @raw_data.to_h - end - - def to_h_flattened - Chronicle::ETL::Utils::HashUtilities.flatten_hash(to_h) - end - end - end - end -end diff --git a/lib/chronicle/etl/registry/plugins.rb b/lib/chronicle/etl/registry/plugins.rb index f527e20..da81bc2 100644 --- a/lib/chronicle/etl/registry/plugins.rb +++ b/lib/chronicle/etl/registry/plugins.rb @@ -107,7 +107,7 @@ def self.exists?(name) # All versions of all plugins currently installed def self.installed_gemspecs # TODO: add check for chronicle-etl dependency - Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" } + Gem::Specification.filter { |s| s.name.match(/^chronicle-/) && s.name != "chronicle-etl" && s.name != "chronicle-core" } end # Latest version of each installed plugin diff --git a/lib/chronicle/etl/runner.rb b/lib/chronicle/etl/runner.rb index 0bc148e..8740259 100644 --- a/lib/chronicle/etl/runner.rb +++ b/lib/chronicle/etl/runner.rb @@ -88,8 +88,8 @@ def process_extraction(extraction) new_objects = [transformer.transform].flatten # raise an error unless all new_objects are a Base - unless new_objects.all? { |r| r.is_a?(Chronicle::ETL::Models::Base) || r.is_a?(Chronicle::ETL::Models::Raw) } - raise(Chronicle::ETL::RunnerError, "Expected transformer to output a Chronicle ETL Model") + unless new_objects.all? { |r| r.is_a?(Chronicle::Schema::Base) } + raise(Chronicle::ETL::RunnerError, "Expected transformer to output a Chronicle Schema model") end Chronicle::ETL::Logger.debug(tty_log_transformation(transformer)) diff --git a/lib/chronicle/etl/serializers/jsonapi_serializer.rb b/lib/chronicle/etl/serializers/jsonapi_serializer.rb deleted file mode 100644 index afac9c7..0000000 --- a/lib/chronicle/etl/serializers/jsonapi_serializer.rb +++ /dev/null @@ -1,31 +0,0 @@ -module Chronicle - module ETL - class JSONAPISerializer < Chronicle::ETL::Serializer - def initialize(*args) - super - - raise(SerializationError, "Record must be a subclass of Chronicle::ETL::Model::Base") unless @record.is_a?(Chronicle::ETL::Models::Base) - end - - def serializable_hash - @record - .identifier_hash - .merge({ attributes: @record.attributes }) - .merge({ relationships: build_associations }) - .merge(@record.meta_hash) - end - - def build_associations - @record.associations.transform_values do |value| - association_data = - if value.is_a?(Array) - value.map { |record| JSONAPISerializer.new(record).serializable_hash } - else - JSONAPISerializer.new(value).serializable_hash - end - { data: association_data } - end - end - end - end -end diff --git a/lib/chronicle/etl/serializers/raw_serializer.rb b/lib/chronicle/etl/serializers/raw_serializer.rb deleted file mode 100644 index 60504cf..0000000 --- a/lib/chronicle/etl/serializers/raw_serializer.rb +++ /dev/null @@ -1,10 +0,0 @@ -module Chronicle - module ETL - # Take a Raw model and output `raw_data` as a hash - class RawSerializer < Chronicle::ETL::Serializer - def serializable_hash - @record.to_h - end - end - end -end diff --git a/lib/chronicle/etl/serializers/serializer.rb b/lib/chronicle/etl/serializers/serializer.rb deleted file mode 100644 index 08906fb..0000000 --- a/lib/chronicle/etl/serializers/serializer.rb +++ /dev/null @@ -1,28 +0,0 @@ -module Chronicle - module ETL - # Abstract class representing a Serializer for an ETL record - class Serializer - # Construct a new instance of this serializer. - # == Parameters: - # options:: - # Options for configuring this Serializers - def initialize(record, options = {}) - @record = record - @options = options - end - - # Serialize a record as a hash - def serializable_hash - raise NotImplementedError - end - - def self.serialize(record) - serializer = self.new(record) - serializer.serializable_hash - end - end - end -end - -require_relative 'jsonapi_serializer' -require_relative 'raw_serializer' \ No newline at end of file diff --git a/lib/chronicle/etl/transformers/image_file_transformer.rb b/lib/chronicle/etl/transformers/image_file_transformer.rb index 478258f..f194464 100644 --- a/lib/chronicle/etl/transformers/image_file_transformer.rb +++ b/lib/chronicle/etl/transformers/image_file_transformer.rb @@ -62,7 +62,7 @@ def timestamp private def build_created(file) - record = ::Chronicle::ETL::Models::Activity.new + record = ::Chronicle::Schema::Activity.new record.verb = @config.verb record.provider = @config.provider record.provider_id = id @@ -77,7 +77,7 @@ def build_created(file) end def build_actor - actor = ::Chronicle::ETL::Models::Entity.new + actor = ::Chronicle::Schema::Entity.new actor.represents = 'identity' actor.provider = @config.actor[:provider] actor.slug = @config.actor[:slug] @@ -86,7 +86,7 @@ def build_actor end def build_image - image = ::Chronicle::ETL::Models::Entity.new + image = ::Chronicle::Schema::Entity.new image.represents = @config.involved[:represents] image.title = build_title image.body = exif['Description'] @@ -107,7 +107,7 @@ def build_image image.abouts = build_keywords(tags) if @config.include_image_data - attachment = ::Chronicle::ETL::Models::Attachment.new + attachment = ::Chronicle::Schema::Attachment.new attachment.data = build_image_data image.attachments = [attachment] end @@ -117,7 +117,7 @@ def build_image def build_keywords(topics) topics.map do |topic| - t = ::Chronicle::ETL::Models::Entity.new + t = ::Chronicle::Schema::Entity.new t.represents = 'topic' t.provider = @config.involved[:provider] t.title = topic @@ -129,7 +129,7 @@ def build_keywords(topics) def build_people_depicted(names) names.map do |name| - identity = ::Chronicle::ETL::Models::Entity.new + identity = ::Chronicle::Schema::Entity.new identity.represents = 'identity' identity.provider = @config.involved[:provider] identity.slug = name.parameterize diff --git a/lib/chronicle/etl/transformers/null_transformer.rb b/lib/chronicle/etl/transformers/null_transformer.rb index be1ee8b..bfe8772 100644 --- a/lib/chronicle/etl/transformers/null_transformer.rb +++ b/lib/chronicle/etl/transformers/null_transformer.rb @@ -7,7 +7,7 @@ class NullTransformer < Chronicle::ETL::Transformer end def transform - Chronicle::ETL::Models::Raw.new(@extraction.data) + Chronicle::Schema::Raw.new(@extraction.data) end def timestamp; end diff --git a/spec/chronicle/etl/loaders/csv_loader_spec.rb b/spec/chronicle/etl/loaders/csv_loader_spec.rb index 03ce163..c489941 100644 --- a/spec/chronicle/etl/loaders/csv_loader_spec.rb +++ b/spec/chronicle/etl/loaders/csv_loader_spec.rb @@ -4,10 +4,10 @@ RSpec.describe Chronicle::ETL::CSVLoader do # TODO: consolidate this with other specs let(:record) do - Chronicle::ETL::Models::Activity.new( + Chronicle::Schema::Activity.new( provider: 'foo', verb: 'tested', - actor: Chronicle::ETL::Models::Entity.new( + actor: Chronicle::Schema::Entity.new( represent: 'identity', provider: 'bar' ) diff --git a/spec/chronicle/etl/loaders/json_loader_spec.rb b/spec/chronicle/etl/loaders/json_loader_spec.rb index 057607c..38c02b0 100644 --- a/spec/chronicle/etl/loaders/json_loader_spec.rb +++ b/spec/chronicle/etl/loaders/json_loader_spec.rb @@ -3,7 +3,7 @@ RSpec.describe Chronicle::ETL::JSONLoader do let(:record) do - Chronicle::ETL::Models::Raw.new({ foo: 'bar' }) + Chronicle::Schema::Raw.new({ foo: 'bar' }) end context "when using stdout as destination" do diff --git a/spec/chronicle/etl/loaders/table_loader_spec.rb b/spec/chronicle/etl/loaders/table_loader_spec.rb index 8b73632..a335284 100644 --- a/spec/chronicle/etl/loaders/table_loader_spec.rb +++ b/spec/chronicle/etl/loaders/table_loader_spec.rb @@ -2,10 +2,10 @@ RSpec.describe Chronicle::ETL::TableLoader do let(:record) do - Chronicle::ETL::Models::Activity.new( + Chronicle::Schema::Activity.new( provider: 'foo', verb: 'tested', - actor: Chronicle::ETL::Models::Entity.new( + actor: Chronicle::Schema::Entity.new( represent: 'identity', provider: 'bar' ) diff --git a/spec/chronicle/etl/serializers/jsonapi_serializer_spec.rb b/spec/chronicle/etl/serializers/jsonapi_serializer_spec.rb deleted file mode 100644 index 1912c3e..0000000 --- a/spec/chronicle/etl/serializers/jsonapi_serializer_spec.rb +++ /dev/null @@ -1,34 +0,0 @@ -require 'spec_helper' - -RSpec.describe Chronicle::ETL::JSONAPISerializer do - let(:record) do - Chronicle::ETL::Models::Activity.new( - provider: 'foo', - verb: 'tested', - actor: Chronicle::ETL::Models::Entity.new( - represent: 'identity', - provider: 'bar' - ) - ) - end - - let(:record_raw) do - Chronicle::ETL::Models::Raw.new({ foo: 'bar' }) - end - - it "can build a JSONAPI object from a model" do - expected = { - type: "activities", - lids: [], - attributes: { provider: "foo", metadata: {}, verb: "tested" }, - relationships: { actor: { data: { type: "entities", lids: [], attributes: { provider: "bar", metadata: {} }, relationships: {}, meta: { dedupe_on: [] } } } }, - meta: { dedupe_on: [] } - } - expect(Chronicle::ETL::JSONAPISerializer.serialize(record).to_json).to eql(expected.to_json) - end - - it "only works on subclasses of Chronicle::ETL::Models::Base" do - expect { Chronicle::ETL::JSONAPISerializer.serialize(record_raw) } - .to raise_exception(Chronicle::ETL::SerializationError) - end -end diff --git a/spec/chronicle/etl/serializers/raw_serializer_spec.rb b/spec/chronicle/etl/serializers/raw_serializer_spec.rb deleted file mode 100644 index 626c413..0000000 --- a/spec/chronicle/etl/serializers/raw_serializer_spec.rb +++ /dev/null @@ -1,11 +0,0 @@ -require 'spec_helper' - -RSpec.describe Chronicle::ETL::RawSerializer do - let(:record_raw) do - Chronicle::ETL::Models::Raw.new({ foo: 'bar', num: 4 }) - end - - it "outputs the raw fields of a RawModel" do - expect(Chronicle::ETL::RawSerializer.serialize(record_raw)).to eql({foo: 'bar', num: 4}) - end -end diff --git a/spec/chronicle/etl/transformers.rb/null_transformer_spec.rb b/spec/chronicle/etl/transformers.rb/null_transformer_spec.rb index a3f9105..7f1f56d 100644 --- a/spec/chronicle/etl/transformers.rb/null_transformer_spec.rb +++ b/spec/chronicle/etl/transformers.rb/null_transformer_spec.rb @@ -6,7 +6,7 @@ describe "#transform" do it "does nothing" do t = Chronicle::ETL::NullTransformer.new(extraction) - expect(t.transform.raw_data[:foo]).to eq('bar') + expect(t.transform.foo).to eq('bar') end end end