Skip to content

Commit eb1af90

Browse files
authored
feat(c++): support build multi-properties in high-level API (#722)
1 parent a3a580e commit eb1af90

File tree

7 files changed

+338
-52
lines changed

7 files changed

+338
-52
lines changed

cpp/examples/high_level_writer_example.cc

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
void vertices_builder() {
3030
// construct vertices builder
3131
std::string vertex_meta_file =
32-
GetTestingResourceRoot() + "/ldbc_sample/parquet/" + "person.vertex.yml";
32+
GetTestingResourceRoot() + "/ldbc/parquet/" + "person.vertex.yml";
3333
auto vertex_meta = graphar::Yaml::LoadFile(vertex_meta_file).value();
3434
auto vertex_info = graphar::VertexInfo::Load(vertex_meta).value();
3535
graphar::IdType start_index = 0;
@@ -45,11 +45,16 @@ void vertices_builder() {
4545
// prepare vertex data
4646
int vertex_count = 3;
4747
std::vector<std::string> property_names = {"id", "firstName", "lastName",
48-
"gender"};
48+
"gender", "emails"};
4949
std::vector<int64_t> id = {0, 1, 2};
5050
std::vector<std::string> firstName = {"John", "Jane", "Alice"};
5151
std::vector<std::string> lastName = {"Smith", "Doe", "Wonderland"};
5252
std::vector<std::string> gender = {"male", "famale", "famale"};
53+
std::vector<std::vector<std::string>> emails = {
54+
55+
56+
57+
5358

5459
// add vertices
5560
for (int i = 0; i < vertex_count; i++) {
@@ -58,6 +63,10 @@ void vertices_builder() {
5863
v.AddProperty(property_names[1], firstName[i]);
5964
v.AddProperty(property_names[2], lastName[i]);
6065
v.AddProperty(property_names[3], gender[i]);
66+
for (const auto& email : emails[i]) {
67+
v.AddProperty(graphar::Cardinality::LIST, property_names[4],
68+
email); // Multi-property
69+
}
6170
ASSERT(builder.AddVertex(v).ok());
6271
}
6372

cpp/src/graphar/arrow/chunk_writer.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
#include "arrow/api.h"
2525
#include "arrow/compute/api.h"
26+
#include "graphar/fwd.h"
2627
#include "graphar/writer_util.h"
2728
#if defined(ARROW_VERSION) && ARROW_VERSION >= 12000000
2829
#include "arrow/acero/exec_plan.h"
@@ -193,10 +194,16 @@ Status VertexPropertyWriter::validate(
193194
" does not exist in the input table.");
194195
}
195196
auto field = schema->field(indice);
196-
if (DataType::ArrowDataTypeToDataType(field->type()) != property.type) {
197+
auto schema_data_type = DataType::DataTypeToArrowDataType(property.type);
198+
if (property.cardinality != Cardinality::SINGLE) {
199+
schema_data_type = arrow::list(schema_data_type);
200+
}
201+
if (!DataType::ArrowDataTypeToDataType(field->type())
202+
->Equals(DataType::ArrowDataTypeToDataType(schema_data_type))) {
197203
return Status::TypeError(
198204
"The data type of property: ", property.name, " is ",
199-
property.type->ToTypeName(), ", but got ",
205+
DataType::ArrowDataTypeToDataType(schema_data_type)->ToTypeName(),
206+
", but got ",
200207
DataType::ArrowDataTypeToDataType(field->type())->ToTypeName(),
201208
".");
202209
}

cpp/src/graphar/high-level/vertices_builder.cc

Lines changed: 85 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,14 @@
1818
*/
1919

2020
#include "graphar/high-level/vertices_builder.h"
21+
#include <any>
22+
#include <iterator>
23+
#include <vector>
2124
#include "graphar/convert_to_arrow_type.h"
25+
#include "graphar/fwd.h"
2226
#include "graphar/graph_info.h"
27+
#include "graphar/label.h"
28+
#include "graphar/status.h"
2329

2430
namespace graphar::builder {
2531

@@ -66,59 +72,62 @@ Status VerticesBuilder::validate(const Vertex& v, IdType index,
6672
bool invalid_type = false;
6773
switch (type->id()) {
6874
case Type::BOOL:
69-
if (property.second.type() !=
70-
typeid(typename TypeToArrowType<Type::BOOL>::CType)) {
71-
invalid_type = true;
72-
}
75+
GAR_RETURN_NOT_OK(
76+
v.ValidatePropertyType<typename TypeToArrowType<Type::BOOL>::CType>(
77+
property.first,
78+
vertex_info_->GetPropertyCardinality(property.first).value()));
7379
break;
7480
case Type::INT32:
75-
if (property.second.type() !=
76-
typeid(typename TypeToArrowType<Type::INT32>::CType)) {
77-
invalid_type = true;
78-
}
81+
GAR_RETURN_NOT_OK(v.ValidatePropertyType<
82+
typename TypeToArrowType<Type::INT32>::CType>(
83+
property.first,
84+
vertex_info_->GetPropertyCardinality(property.first).value()));
7985
break;
8086
case Type::INT64:
81-
if (property.second.type() !=
82-
typeid(typename TypeToArrowType<Type::INT64>::CType)) {
83-
invalid_type = true;
84-
}
87+
GAR_RETURN_NOT_OK(v.ValidatePropertyType<
88+
typename TypeToArrowType<Type::INT64>::CType>(
89+
property.first,
90+
vertex_info_->GetPropertyCardinality(property.first).value()));
8591
break;
8692
case Type::FLOAT:
87-
if (property.second.type() !=
88-
typeid(typename TypeToArrowType<Type::FLOAT>::CType)) {
89-
invalid_type = true;
90-
}
93+
GAR_RETURN_NOT_OK(v.ValidatePropertyType<
94+
typename TypeToArrowType<Type::FLOAT>::CType>(
95+
property.first,
96+
vertex_info_->GetPropertyCardinality(property.first).value()));
9197
break;
9298
case Type::DOUBLE:
93-
if (property.second.type() !=
94-
typeid(typename TypeToArrowType<Type::DOUBLE>::CType)) {
95-
invalid_type = true;
96-
}
99+
GAR_RETURN_NOT_OK(v.ValidatePropertyType<
100+
typename TypeToArrowType<Type::DOUBLE>::CType>(
101+
property.first,
102+
vertex_info_->GetPropertyCardinality(property.first).value()));
97103
break;
98104
case Type::STRING:
99-
if (property.second.type() !=
100-
typeid(typename TypeToArrowType<Type::STRING>::CType)) {
101-
invalid_type = true;
102-
}
105+
GAR_RETURN_NOT_OK(v.ValidatePropertyType<
106+
typename TypeToArrowType<Type::STRING>::CType>(
107+
property.first,
108+
vertex_info_->GetPropertyCardinality(property.first).value()));
103109
break;
104110
case Type::DATE:
105111
// date is stored as int32_t
106-
if (property.second.type() !=
107-
typeid(typename TypeToArrowType<Type::DATE>::CType::c_type)) {
108-
invalid_type = true;
109-
}
112+
GAR_RETURN_NOT_OK(v.ValidatePropertyType<
113+
typename TypeToArrowType<Type::DATE>::CType::c_type>(
114+
property.first,
115+
vertex_info_->GetPropertyCardinality(property.first).value()));
110116
break;
111117
case Type::TIMESTAMP:
112118
// timestamp is stored as int64_t
113-
if (property.second.type() !=
114-
typeid(typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type)) {
115-
invalid_type = true;
116-
}
119+
GAR_RETURN_NOT_OK(
120+
v.ValidatePropertyType<
121+
typename TypeToArrowType<Type::TIMESTAMP>::CType::c_type>(
122+
property.first,
123+
vertex_info_->GetPropertyCardinality(property.first).value()));
117124
break;
118125
default:
119126
return Status::TypeError("Unsupported property type.");
120127
}
121-
if (invalid_type) {
128+
if (invalid_type &&
129+
Cardinality::SINGLE ==
130+
vertex_info_->GetPropertyCardinality(property.first).value()) {
122131
return Status::TypeError(
123132
"Invalid data type for property ", property.first + ", defined as ",
124133
type->ToTypeName(), ", but got ", property.second.type().name());
@@ -134,16 +143,41 @@ Status VerticesBuilder::tryToAppend(
134143
std::shared_ptr<arrow::Array>& array) { // NOLINT
135144
using CType = typename TypeToArrowType<type>::CType;
136145
arrow::MemoryPool* pool = arrow::default_memory_pool();
137-
typename TypeToArrowType<type>::BuilderType builder(pool);
138-
for (auto& v : vertices_) {
139-
if (v.Empty() || !v.ContainProperty(property_name)) {
140-
RETURN_NOT_ARROW_OK(builder.AppendNull());
141-
} else {
142-
RETURN_NOT_ARROW_OK(
143-
builder.Append(std::any_cast<CType>(v.GetProperty(property_name))));
146+
auto builder =
147+
std::make_shared<typename TypeToArrowType<type>::BuilderType>(pool);
148+
auto cardinality =
149+
vertex_info_->GetPropertyCardinality(property_name).value();
150+
if (cardinality != Cardinality::SINGLE) {
151+
arrow::ListBuilder list_builder(pool, builder);
152+
for (auto& v : vertices_) {
153+
RETURN_NOT_ARROW_OK(list_builder.Append());
154+
if (v.Empty() || !v.ContainProperty(property_name)) {
155+
RETURN_NOT_ARROW_OK(builder->AppendNull());
156+
} else {
157+
if (!v.IsMultiProperty(property_name)) {
158+
RETURN_NOT_ARROW_OK(builder->Append(
159+
std::any_cast<CType>(v.GetProperty(property_name))));
160+
} else {
161+
auto property_value_list = std::any_cast<std::vector<std::any>>(
162+
v.GetProperty(property_name));
163+
for (auto& value : property_value_list) {
164+
RETURN_NOT_ARROW_OK(builder->Append(std::any_cast<CType>(value)));
165+
}
166+
}
167+
}
168+
}
169+
array = list_builder.Finish().ValueOrDie();
170+
} else {
171+
for (auto& v : vertices_) {
172+
if (v.Empty() || !v.ContainProperty(property_name)) {
173+
RETURN_NOT_ARROW_OK(builder->AppendNull());
174+
} else {
175+
RETURN_NOT_ARROW_OK(builder->Append(
176+
std::any_cast<CType>(v.GetProperty(property_name))));
177+
}
144178
}
179+
array = builder->Finish().ValueOrDie();
145180
}
146-
array = builder.Finish().ValueOrDie();
147181
return Status::OK();
148182
}
149183

@@ -219,11 +253,18 @@ Result<std::shared_ptr<arrow::Table>> VerticesBuilder::convertToTable() {
219253
for (auto& property_group : property_groups) {
220254
for (auto& property : property_group->GetProperties()) {
221255
// add a column to schema
222-
schema_vector.push_back(arrow::field(
223-
property.name, DataType::DataTypeToArrowDataType(property.type)));
256+
if (vertex_info_->GetPropertyCardinality(property.name).value() !=
257+
Cardinality::SINGLE) {
258+
schema_vector.push_back(arrow::field(
259+
property.name,
260+
arrow::list(DataType::DataTypeToArrowDataType(property.type))));
261+
} else {
262+
schema_vector.push_back(arrow::field(
263+
property.name, DataType::DataTypeToArrowDataType(property.type)));
264+
}
224265
// add a column to data
225266
std::shared_ptr<arrow::Array> array;
226-
appendToArray(property.type, property.name, array);
267+
GAR_RETURN_NOT_OK(appendToArray(property.type, property.name, array));
227268
arrays.push_back(array);
228269
}
229270
}

cpp/src/graphar/high-level/vertices_builder.h

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,21 @@
2020
#pragma once
2121

2222
#include <any>
23+
#include <cassert>
2324
#include <cstddef>
2425
#include <memory>
2526
#include <string>
2627
#include <unordered_map>
28+
#include <unordered_set>
2729
#include <utility>
2830
#include <vector>
2931

3032
#include "graphar/arrow/chunk_writer.h"
33+
#include "graphar/fwd.h"
3134
#include "graphar/graph_info.h"
3235
#include "graphar/result.h"
36+
#include "graphar/status.h"
37+
#include "graphar/types.h"
3338
#include "graphar/writer_util.h"
3439

3540
// forward declaration
@@ -88,6 +93,28 @@ class Vertex {
8893
properties_[name] = val;
8994
}
9095

96+
inline void AddProperty(const Cardinality cardinality,
97+
const std::string& name, const std::any& val) {
98+
if (cardinality == Cardinality::SINGLE) {
99+
cardinalities_[name] = Cardinality::SINGLE;
100+
AddProperty(name, val);
101+
return;
102+
}
103+
empty_ = false;
104+
if (cardinalities_.find(name) != cardinalities_.end()) {
105+
assert(cardinalities_[name] == cardinality);
106+
auto property_value_list =
107+
std::any_cast<std::vector<std::any>>(properties_[name]);
108+
property_value_list.push_back(val);
109+
properties_[name] = property_value_list;
110+
} else {
111+
auto property_value_list = std::vector<std::any>();
112+
property_value_list.push_back(val);
113+
properties_[name] = property_value_list;
114+
}
115+
cardinalities_[name] = cardinality;
116+
}
117+
91118
/**
92119
* @brief Get a property of the vertex.
93120
*
@@ -118,10 +145,75 @@ class Vertex {
118145
return (properties_.find(property) != properties_.end());
119146
}
120147

148+
inline bool IsMultiProperty(const std::string& property) const {
149+
return (cardinalities_.find(property) != cardinalities_.end() &&
150+
cardinalities_.at(property) != Cardinality::SINGLE);
151+
}
152+
153+
template <typename T>
154+
Status ValidatePropertyType(const std::string& property,
155+
const Cardinality cardinality) const {
156+
if (cardinality == Cardinality::SINGLE && IsMultiProperty(property)) {
157+
return Status::TypeError(
158+
"Invalid data cardinality for property ", property,
159+
", defined as SINGLE but got ",
160+
cardinalities_.at(property) == Cardinality::LIST ? "LIST" : "SET");
161+
}
162+
if (IsMultiProperty(property) &&
163+
(cardinality == Cardinality::SET ||
164+
cardinalities_.at(property) == Cardinality::SET)) {
165+
GAR_RETURN_NOT_OK(ValidateMultiPropertySet<T>(property));
166+
}
167+
if (IsMultiProperty(property)) {
168+
auto value_list =
169+
std::any_cast<std::vector<std::any>>(properties_.at(property));
170+
for (auto value : value_list) {
171+
auto& value_type = value.type();
172+
if (value_type != typeid(T)) {
173+
return Status::TypeError("Invalid data type for property ", property,
174+
", defined as ", typeid(T).name(),
175+
", but got ", value_type.name());
176+
}
177+
}
178+
} else {
179+
auto& value_type = properties_.at(property).type();
180+
if (value_type != typeid(T)) {
181+
return Status::TypeError("Invalid data type for property ", property,
182+
", defined as ", typeid(T).name(),
183+
", but got ", value_type.name());
184+
}
185+
}
186+
return Status::OK();
187+
}
188+
189+
template <typename T>
190+
Status ValidateMultiProperty(const std::string& property) const {
191+
if (IsMultiProperty(property) &&
192+
cardinalities_.at(property) == Cardinality::SET) {
193+
GAR_RETURN_NOT_OK(ValidateMultiPropertySet<T>(property));
194+
}
195+
return Status::OK();
196+
}
197+
198+
template <typename T>
199+
Status ValidateMultiPropertySet(const std::string& property) const {
200+
auto vec = std::any_cast<std::vector<std::any>>(properties_.at(property));
201+
std::unordered_set<T> seen;
202+
for (const auto& item : vec) {
203+
if (!seen.insert(std::any_cast<T>(item)).second) {
204+
return Status::KeyError(
205+
"Duplicate values exist in set type multi-property key: ", property,
206+
" value: ", std::any_cast<T>(item));
207+
}
208+
}
209+
return Status::OK();
210+
}
211+
121212
private:
122213
IdType id_;
123214
bool empty_;
124215
std::unordered_map<std::string, std::any> properties_;
216+
std::unordered_map<std::string, Cardinality> cardinalities_;
125217
};
126218

127219
/**

cpp/src/graphar/types.h

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -100,8 +100,17 @@ class DataType {
100100
inline DataType& operator=(const DataType& other) = default;
101101

102102
bool Equals(const DataType& other) const {
103-
return id_ == other.id_ &&
104-
user_defined_type_name_ == other.user_defined_type_name_;
103+
if (id_ != other.id_ ||
104+
user_defined_type_name_ != other.user_defined_type_name_) {
105+
return false;
106+
}
107+
if (child_ == nullptr && other.child_ == nullptr) {
108+
return true;
109+
}
110+
if (child_ != nullptr && other.child_ != nullptr) {
111+
return child_->Equals(other.child_);
112+
}
113+
return false;
105114
}
106115

107116
bool Equals(const std::shared_ptr<DataType>& other) const {

0 commit comments

Comments
 (0)