Skip to content

Commit

Permalink
Add simple in-memory Schema data structure. Restore nullable bit to type
Browse files Browse the repository at this point in the history
metadata only. Add "?" to nullable type formatting.
  • Loading branch information
wesm committed Mar 3, 2016
1 parent b88b69e commit c770f7d
Show file tree
Hide file tree
Showing 28 changed files with 434 additions and 191 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -467,6 +467,8 @@ set(LINK_LIBS
set(ARROW_SRCS
src/arrow/array.cc
src/arrow/builder.cc
src/arrow/field.cc
src/arrow/schema.cc
src/arrow/type.cc
)

Expand Down
2 changes: 1 addition & 1 deletion cpp/src/arrow/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,4 @@ install(FILES
set(ARROW_TEST_LINK_LIBS arrow_test_util ${ARROW_MIN_TEST_LIBS})

ADD_ARROW_TEST(array-test)
ADD_ARROW_TEST(field-test)
ADD_ARROW_TEST(schema-test)
4 changes: 2 additions & 2 deletions cpp/src/arrow/array.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,8 @@ class Array {
int32_t length() const { return length_;}
int32_t null_count() const { return null_count_;}

const TypePtr& type() const { return type_;}
TypeEnum type_enum() const { return type_->type;}
const std::shared_ptr<DataType>& type() const { return type_;}
LogicalType::type logical_type() const { return type_->type;}

const std::shared_ptr<Buffer>& nulls() const {
return nulls_;
Expand Down
19 changes: 6 additions & 13 deletions cpp/src/arrow/field-test.cc → cpp/src/arrow/field.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,24 +15,17 @@
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>
#include <memory>
#include <string>

#include "arrow/field.h"
#include "arrow/type.h"
#include "arrow/types/integer.h"

using std::string;
#include <sstream>
#include <string>

namespace arrow {

TEST(TestField, Basics) {
TypePtr ftype = TypePtr(new Int32Type());
Field f0("f0", ftype);

ASSERT_EQ(f0.name, "f0");
ASSERT_EQ(f0.type->ToString(), ftype->ToString());
std::string Field::ToString() const {
std::stringstream ss;
ss << this->name << " " << this->type->ToString();
return ss.str();
}

} // namespace arrow
17 changes: 16 additions & 1 deletion cpp/src/arrow/field.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,27 @@ struct Field {
TypePtr type;

Field(const std::string& name, const TypePtr& type) :
name(name), type(type) {}
name(name),
type(type) {}

bool operator==(const Field& other) const {
return this->Equals(other);
}

bool operator!=(const Field& other) const {
return !this->Equals(other);
}

bool Equals(const Field& other) const {
return (this == &other) || (this->name == other.name &&
this->type->Equals(other.type.get()));
}

bool nullable() const {
return this->type->nullable;
}

std::string ToString() const;
};

} // namespace arrow
Expand Down
110 changes: 110 additions & 0 deletions cpp/src/arrow/schema-test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include <gtest/gtest.h>
#include <memory>
#include <string>
#include <vector>

#include "arrow/field.h"
#include "arrow/schema.h"
#include "arrow/type.h"
#include "arrow/types/string.h"

using std::shared_ptr;
using std::vector;

namespace arrow {

TEST(TestField, Basics) {
shared_ptr<DataType> ftype = std::make_shared<Int32Type>();
shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);
Field f0("f0", ftype);
Field f0_nn("f0", ftype_nn);

ASSERT_EQ(f0.name, "f0");
ASSERT_EQ(f0.type->ToString(), ftype->ToString());

ASSERT_TRUE(f0.nullable());
ASSERT_FALSE(f0_nn.nullable());
}

TEST(TestField, Equals) {
shared_ptr<DataType> ftype = std::make_shared<Int32Type>();
shared_ptr<DataType> ftype_nn = std::make_shared<Int32Type>(false);

Field f0("f0", ftype);
Field f0_nn("f0", ftype_nn);
Field f0_other("f0", ftype);

ASSERT_EQ(f0, f0_other);
ASSERT_NE(f0, f0_nn);
}

class TestSchema : public ::testing::Test {
public:
void SetUp() {}
};

TEST_F(TestSchema, Basics) {
auto f0 = std::make_shared<Field>("f0", std::make_shared<Int32Type>());

auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
auto f1_optional = std::make_shared<Field>("f1", std::make_shared<UInt8Type>());

auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());

vector<shared_ptr<Field> > fields = {f0, f1, f2};
auto schema = std::make_shared<Schema>(fields);

ASSERT_EQ(3, schema->num_fields());
ASSERT_EQ(f0, schema->field(0));
ASSERT_EQ(f1, schema->field(1));
ASSERT_EQ(f2, schema->field(2));

auto schema2 = std::make_shared<Schema>(fields);

vector<shared_ptr<Field> > fields3 = {f0, f1_optional, f2};
auto schema3 = std::make_shared<Schema>(fields3);
ASSERT_TRUE(schema->Equals(schema2));
ASSERT_FALSE(schema->Equals(schema3));

ASSERT_TRUE(schema->Equals(*schema2.get()));
ASSERT_FALSE(schema->Equals(*schema3.get()));
}

TEST_F(TestSchema, ToString) {
auto f0 = std::make_shared<Field>("f0", std::make_shared<Int32Type>());
auto f1 = std::make_shared<Field>("f1", std::make_shared<UInt8Type>(false));
auto f2 = std::make_shared<Field>("f2", std::make_shared<StringType>());
auto f3 = std::make_shared<Field>("f3",
std::make_shared<ListType>(std::make_shared<Int16Type>()));

vector<shared_ptr<Field> > fields = {f0, f1, f2, f3};
auto schema = std::make_shared<Schema>(fields);

std::string result = schema->ToString();
std::string expected = R"(f0 ?int32
f1 uint8
f2 ?string
f3 ?list<?int16>
)";

ASSERT_EQ(expected, result);
}

} // namespace arrow
58 changes: 58 additions & 0 deletions cpp/src/arrow/schema.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "arrow/schema.h"

#include <memory>
#include <string>
#include <sstream>
#include <vector>

#include "arrow/field.h"

namespace arrow {

Schema::Schema(const std::vector<std::shared_ptr<Field> >& fields) :
fields_(fields) {}

bool Schema::Equals(const Schema& other) const {
if (this == &other) return true;
if (num_fields() != other.num_fields()) {
return false;
}
for (int i = 0; i < num_fields(); ++i) {
if (!field(i)->Equals(*other.field(i).get())) {
return false;
}
}
return true;
}

bool Schema::Equals(const std::shared_ptr<Schema>& other) const {
return Equals(*other.get());
}

std::string Schema::ToString() const {
std::stringstream buffer;

for (auto field : fields_) {
buffer << field->ToString() << std::endl;
}
return buffer.str();
}

} // namespace arrow
56 changes: 56 additions & 0 deletions cpp/src/arrow/schema.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#ifndef ARROW_SCHEMA_H
#define ARROW_SCHEMA_H

#include <memory>
#include <string>
#include <vector>

#include "arrow/field.h"
#include "arrow/type.h"

namespace arrow {

class Schema {
public:
explicit Schema(const std::vector<std::shared_ptr<Field> >& fields);

// Returns true if all of the schema fields are equal
bool Equals(const Schema& other) const;
bool Equals(const std::shared_ptr<Schema>& other) const;

// Return the ith schema element. Does not boundscheck
const std::shared_ptr<Field>& field(int i) const {
return fields_[i];
}

// Render a string representation of the schema suitable for debugging
std::string ToString() const;

int num_fields() const {
return fields_.size();
}

private:
std::vector<std::shared_ptr<Field> > fields_;
};

} // namespace arrow

#endif // ARROW_FIELD_H
Loading

0 comments on commit c770f7d

Please sign in to comment.