Skip to content

Commit

Permalink
Merge pull request #20 from josacar/support-mariadb-dumps-with-newline
Browse files Browse the repository at this point in the history
Support mariadb dumps with newlines
  • Loading branch information
josacar authored Dec 28, 2023
2 parents c393161 + 352b265 commit 8e0eb12
Show file tree
Hide file tree
Showing 13 changed files with 108 additions and 85 deletions.
3 changes: 0 additions & 3 deletions CHANGES

This file was deleted.

2 changes: 1 addition & 1 deletion shard.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
name: triki
version: 0.2.5
version: 0.3.0

dependencies:
walker_method:
Expand Down
36 changes: 32 additions & 4 deletions spec/triki_spec.cr
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,8 @@ describe Triki do
},
"another_table" => :truncate,
"some_table_to_keep" => :keep,
}).tap do |o|
o.database_type = :postgres
}).tap do |my_obfuscator|
my_obfuscator.database_type = :postgres
end

output = IO::Memory.new
Expand Down Expand Up @@ -179,8 +179,8 @@ describe Triki do
},
"another_table" => :truncate,
"some_table_to_keep" => :keep,
}).tap do |o|
o.database_type = :postgres
}).tap do |my_obfuscator|
my_obfuscator.database_type = :postgres
end

output = IO::Memory.new
Expand Down Expand Up @@ -371,6 +371,34 @@ describe Triki do
output_string.should_not contain("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
end

context "with MariaDB >= 10.7.1 dump" do
it "should obfuscate the tables and remove newlines" do
string = <<-SQL
INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),
('joe@joe.com','joe', 'somethingelse2', 54),
('dontmurderme@direwolf.com','direwolf', 'somethingelse3', 44);
INSERT INTO `another_table` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4),
(5,6,7,8);
INSERT INTO `some_table_to_keep` (`a`, `b`, `c`, `d`) VALUES (1,2,3,4),
(5,6,7,8);
INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh', 'aawefjkafe', 'wadus'),
('hello1','kjhj!', 892938, 'tradus'),
('hello2','moose!!', NULL, NULL);
INSERT INTO `an_ignored_table` (`col`, `col2`) VALUES ('hello','kjhjd^&dkjh'),
('hello1','kjhj!'),
('hello2','moose!!');
SQL
output_string.should contain("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES (")
output_string.should contain("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES (")
output_string.should contain("'some\\'thin,ge())lse1'")
output_string.should contain("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','monkey',NULL,'wadus'),('hello1','monkey',NULL,'tradus'),('hello2','monkey',NULL,NULL);")
output_string.should_not contain("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh',NULL, 'wadus'),('hello1','kjhj!',NULL, 'tradus'),('hello2','moose!!',NULL, NULL);")
output_string.should_not contain("INSERT INTO `one_more_table` (`a`, `password`, `c`, `d,d`) VALUES ('hello','kjhjd^&dkjh',NULL,'wadus'),('hello1','kjhj!',NULL,'tradus'),('hello2','moose!!',NULL,NULL);")
output_string.should_not contain("INSERT INTO `some_table` (`email`, `name`, `something`, `age`) VALUES ('bob@honk.com','bob', 'some\\'thin,ge())lse1', 25),('joe@joe.com','joe', 'somethingelse2', 54);")
end
end
it "honors a special case: on the people table, rows with skip_regexes that match are skipped" do
output_string.should contain("('bob@honk.com',")
output_string.should contain("('dontmurderme@direwolf.com',")
Expand Down
3 changes: 1 addition & 2 deletions src/triki.cr
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ class Triki
alias ColumnAction = Symbol
alias Between = Range(Int32, Int32)
alias ColumnList = Array(String)
alias Columns = Array(String)
alias RowContent = String | Int32 | Nil

alias RowAsHash = Hash(ColumnName, RowContent)
Expand Down Expand Up @@ -127,7 +126,7 @@ class Triki
end
end

def obfuscate_bulk_insert_line(line, table_name : String, columns : ColumnList, ignore = false)
def obfuscate_bulk_insert_statement(line, table_name : String, columns : ColumnList, ignore = false)
table_config = config[table_name]

case table_config
Expand Down
5 changes: 5 additions & 0 deletions src/triki/base.cr
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
class Triki
abstract struct Base
abstract def parse(obfuscator, config, input_io, output_io)
end
end
6 changes: 3 additions & 3 deletions src/triki/config_applicator.cr
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ class Triki
alias RowContent = Triki::RowContent

# ameba:disable Metrics/CyclomaticComplexity
def self.apply_table_config(row : Array(String?), table_config : Triki::ConfigTableHash, columns : Columns, faker = Faker, dictionary = EnglishDictionary)
def self.apply_table_config(row : Array(String?), table_config : Triki::ConfigTableHash, columns : ColumnList, faker = Faker, dictionary = EnglishDictionary)
return row unless table_config.is_a?(Hash)

row_hash = row_as_hash(row, columns)
Expand Down Expand Up @@ -115,8 +115,8 @@ class Triki
end

def self.row_as_hash(row : Array, columns : Array) : RowAsHash
columns.zip(row).each_with_object(RowAsHash.new) do |(name, value), m|
m[name] = value
columns.zip(row).each_with_object(RowAsHash.new) do |(name, value), row_as_hash|
row_as_hash[name] = value
end
end

Expand Down
30 changes: 14 additions & 16 deletions src/triki/config_scaffold_generator.cr
Original file line number Diff line number Diff line change
@@ -1,28 +1,26 @@
class Triki
module ConfigScaffoldGenerator
macro included
def table_data(line)
{% if @type.name == "Triki::Postgres" %}
parse_copy_statement(line)
{% else %}
parse_insert_statement(line)
{% end %}
end
end

def generate_config(obfuscator, config, input_io, output_io)
buffer = IO::Memory.new

input_io.each_line(chomp: false) do |line|
if obfuscator.database_type == :postgres
parse_copy_statement = ->(statement_line : String) do
if regex_match = /^\s*COPY (.*?) \((.*?)\) FROM\s*/i.match(statement_line)
{
"table_name" => regex_match[1],
"column_names" => regex_match[2].split(/\s*,\s*/),
}
end
end
table_data = parse_copy_statement.call(line)
else
table_data = parse_insert_statement(line)
end
while statement = input_io.gets(';')
table_data = table_data(statement)
next unless table_data

table_name = table_data["table_name"].as(String)
table_name = table_data["table_name"].as(TableName)
next if obfuscator.scaffolded_tables[table_name]? # only process each table_name once

columns = table_data["column_names"].as(Array(String))
columns = table_data["column_names"].as(ColumnList)
table_config = config[table_name]?
next if table_config == :truncate || table_config == :keep

Expand Down
45 changes: 0 additions & 45 deletions src/triki/copy_statement_parser.cr

This file was deleted.

10 changes: 5 additions & 5 deletions src/triki/insert_statement_parser.cr
Original file line number Diff line number Diff line change
@@ -1,20 +1,20 @@
class Triki
module InsertStatementParser
def parse(obfuscator, config, input_io, output_io)
input_io.each_line(chomp: false) do |line|
if table_data = parse_insert_statement(line)
while statement = input_io.gets(';')
if table_data = parse_insert_statement(statement)
table_name = table_data["table_name"].as(String)
columns = table_data["column_names"].as(Array(String))
ignore = table_data["ignore"]?

if config[table_name]?
output_io.puts obfuscator.obfuscate_bulk_insert_line(line, table_name, columns, ignore)
output_io.puts obfuscator.obfuscate_bulk_insert_statement(statement, table_name, columns, ignore)
else
Triki::Log.warn { "Deprecated: #{table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep." }
output_io.print(line)
output_io.print(statement)
end
else
output_io.print(line)
output_io.print(statement)
end
end
end
Expand Down
4 changes: 2 additions & 2 deletions src/triki/mysql.cr
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
require "string_scanner"

class Triki
struct Mysql
struct Mysql < Base
alias Field = String?
alias Fields = Array(Field)
alias Rows = Array(Fields)
Expand All @@ -22,7 +22,7 @@ class Triki

def make_insert_statement(table_name, column_names, rows, ignore = nil)
String.build do |buffer|
buffer << %{INSERT #{ignore ? "IGNORE " : "" }INTO `#{table_name}` (`#{column_names.join("`, `")}`) VALUES }
buffer << %{INSERT #{ignore ? "IGNORE " : ""}INTO `#{table_name}` (`#{column_names.join("`, `")}`) VALUES }
write_rows(buffer, rows)
buffer << ";"
end
Expand Down
45 changes: 43 additions & 2 deletions src/triki/postgres.cr
Original file line number Diff line number Diff line change
@@ -1,8 +1,49 @@
class Triki
struct Postgres
include Triki::CopyStatementParser
struct Postgres < Base
include Triki::ConfigScaffoldGenerator

# Postgres uses COPY statements instead of INSERT and look like:
#
# COPY some_table (a, b, c, d) FROM stdin;
# 1 2 3 4
# 5 6 7 8
# \.
#
# This requires the parse methods to persist data (table name and
# column names) across multiple lines.
#
def parse(obfuscator, config, input_io, output_io)
current_table_name = String.new
current_columns = ColumnList.new
inside_copy_statement = false

input_io.each_line(chomp: false) do |line|
if parse_insert_statement(line)
raise RuntimeError.new("Cannot obfuscate Postgres dumps containing INSERT statements. Please use COPY statments.")
elsif table_data = parse_copy_statement(line)
inside_copy_statement = true

current_table_name = table_data["table_name"].as(String)
current_columns = table_data["column_names"].as(ColumnList)

if !config[current_table_name]
Log.warn { "Deprecated: #{current_table_name} was not specified in the config. A future release will cause this to be an error. Please specify the table definition or set it to :keep." }
end

output_io.print(line)
elsif line.match /^\\\.$/
inside_copy_statement = false

output_io.print(line)
elsif inside_copy_statement
obfuscated_line = obfuscator.obfuscate_bulk_insert_statement(line, current_table_name, current_columns)
output_io.puts(obfuscated_line) unless obfuscated_line.empty?
else
output_io.print(line)
end
end
end

# Copy statements contain the column values tab separated like so:
# blah blah blah blah
# which we want to turn into:
Expand Down
2 changes: 1 addition & 1 deletion src/triki/sql_server.cr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
class Triki
struct SqlServer
struct SqlServer < Base
include Triki::InsertStatementParser
include Triki::ConfigScaffoldGenerator

Expand Down
2 changes: 1 addition & 1 deletion src/triki/version.cr
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
class Triki
VERSION = "0.2.5"
VERSION = "0.3.0"
end

0 comments on commit 8e0eb12

Please sign in to comment.