Skip to content

Commit 883d7eb

Browse files
authored
feat(c++): support read and write multi-property (#719)
1 parent 1cca403 commit 883d7eb

File tree

11 files changed

+368
-46
lines changed

11 files changed

+368
-46
lines changed

cpp/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ if (BUILD_TESTS)
566566
add_test(test_arrow_chunk_reader SRCS test/test_arrow_chunk_reader.cc)
567567
add_test(test_graph SRCS test/test_graph.cc)
568568
add_test(test_multi_label SRCS test/test_multi_label.cc)
569+
add_test(test_multi_property SRCS test/test_multi_property.cc)
569570

570571
# enable_testing()
571572
endif()

cpp/src/graphar/arrow/chunk_reader.cc

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ Result<std::shared_ptr<arrow::Schema>> PropertyGroupToSchema(
4949
GeneralParams::kVertexIndexCol, arrow::int64()));
5050
}
5151
for (const auto& prop : pg->GetProperties()) {
52-
fields.push_back(std::make_shared<arrow::Field>(
53-
prop.name, DataType::DataTypeToArrowDataType(prop.type)));
52+
auto dataType = DataType::DataTypeToArrowDataType(prop.type);
53+
if (prop.cardinality != Cardinality::SINGLE) {
54+
dataType = arrow::list(dataType);
55+
}
56+
fields.push_back(std::make_shared<arrow::Field>(prop.name, dataType));
5457
}
5558
return arrow::schema(fields);
5659
}

cpp/src/graphar/arrow/chunk_writer.cc

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -321,17 +321,6 @@ Status VertexPropertyWriter::WriteTable(
321321
return Status::OK();
322322
}
323323

324-
// Helper function to split a string by a delimiter
325-
std::vector<std::string> SplitString(const std::string& str, char delimiter) {
326-
std::vector<std::string> tokens;
327-
std::string token;
328-
std::istringstream tokenStream(str);
329-
while (std::getline(tokenStream, token, delimiter)) {
330-
tokens.push_back(token);
331-
}
332-
return tokens;
333-
}
334-
335324
Status VertexPropertyWriter::WriteLabelTable(
336325
const std::shared_ptr<arrow::Table>& input_table, IdType start_chunk_index,
337326
FileType file_type, ValidateLevel validate_level) const {
@@ -379,6 +368,8 @@ Result<std::shared_ptr<arrow::Table>> VertexPropertyWriter::GetLabelTable(
379368
auto label_column = std::static_pointer_cast<arrow::StringArray>(chunk);
380369

381370
// Populate the matrix based on :LABEL column values
371+
// TODO(@yangxk): store array in the label_column, split the string when
372+
// reading file
382373
for (int64_t row = 0; row < label_column->length(); ++row) {
383374
if (label_column->IsValid(row)) {
384375
std::string labels_string = label_column->GetString(row);

cpp/src/graphar/fwd.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,8 @@ class FileSystem;
7272
using IdType = int64_t;
7373
enum class Type;
7474
class DataType;
75+
/** Defines how multiple values are handled for a given property key */
76+
enum Cardinality { SINGLE, LIST, SET };
7577
/** Type of file format */
7678
enum FileType { CSV = 0, PARQUET = 1, ORC = 2, JSON = 3 };
7779
enum SelectType { PROPERTIES = 0, LABELS = 1 };

cpp/src/graphar/graph_info.cc

Lines changed: 41 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
#include <unordered_set>
2121
#include <utility>
2222

23+
#include "graphar/status.h"
2324
#include "mini-yaml/yaml/Yaml.hpp"
2425

2526
#include "graphar/filesystem.h"
@@ -86,7 +87,8 @@ std::string BuildPath(const std::vector<std::string>& paths) {
8687
bool operator==(const Property& lhs, const Property& rhs) {
8788
return (lhs.name == rhs.name) && (lhs.type == rhs.type) &&
8889
(lhs.is_primary == rhs.is_primary) &&
89-
(lhs.is_nullable == rhs.is_nullable);
90+
(lhs.is_nullable == rhs.is_nullable) &&
91+
(lhs.cardinality == rhs.cardinality);
9092
}
9193

9294
PropertyGroup::PropertyGroup(const std::vector<Property>& properties,
@@ -138,6 +140,11 @@ bool PropertyGroup::IsValidated() const {
138140
// list type is not supported in csv file
139141
return false;
140142
}
143+
// TODO(@yangxk): support cardinality in csv file
144+
if (p.cardinality != Cardinality::SINGLE && file_type_ == FileType::CSV) {
145+
// list cardinality is not supported in csv file
146+
return false;
147+
}
141148
}
142149
return true;
143150
}
@@ -212,6 +219,7 @@ class VertexInfo::Impl {
212219
property_name_to_primary_.emplace(p.name, p.is_primary);
213220
property_name_to_nullable_.emplace(p.name, p.is_nullable);
214221
property_name_to_type_.emplace(p.name, p.type);
222+
property_name_to_cardinality_.emplace(p.name, p.cardinality);
215223
}
216224
}
217225
}
@@ -251,6 +259,7 @@ class VertexInfo::Impl {
251259
std::unordered_map<std::string, bool> property_name_to_nullable_;
252260
std::unordered_map<std::string, std::shared_ptr<DataType>>
253261
property_name_to_type_;
262+
std::unordered_map<std::string, Cardinality> property_name_to_cardinality_;
254263
};
255264

256265
VertexInfo::VertexInfo(const std::string& type, IdType chunk_size,
@@ -363,6 +372,15 @@ Result<std::shared_ptr<DataType>> VertexInfo::GetPropertyType(
363372
return it->second;
364373
}
365374

375+
Result<Cardinality> VertexInfo::GetPropertyCardinality(
376+
const std::string& property_name) const {
377+
auto it = impl_->property_name_to_cardinality_.find(property_name);
378+
if (it == impl_->property_name_to_cardinality_.end()) {
379+
return Status::Invalid("property name not found: ", property_name);
380+
}
381+
return it->second;
382+
}
383+
366384
Result<std::shared_ptr<VertexInfo>> VertexInfo::AddPropertyGroup(
367385
std::shared_ptr<PropertyGroup> property_group) const {
368386
if (property_group == nullptr) {
@@ -440,8 +458,13 @@ Result<std::shared_ptr<VertexInfo>> VertexInfo::Load(
440458
bool is_primary = p_node["is_primary"].As<bool>();
441459
bool is_nullable =
442460
p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();
461+
Cardinality cardinality = Cardinality::SINGLE;
462+
if (!p_node["cardinality"].IsNone()) {
463+
cardinality =
464+
StringToCardinality(p_node["cardinality"].As<std::string>());
465+
}
443466
property_vec.emplace_back(property_name, property_type, is_primary,
444-
is_nullable);
467+
is_nullable, cardinality);
445468
}
446469
property_groups.push_back(
447470
std::make_shared<PropertyGroup>(property_vec, file_type, pg_prefix));
@@ -485,6 +508,9 @@ Result<std::string> VertexInfo::Dump() const noexcept {
485508
p_node["data_type"] = p.type->ToTypeName();
486509
p_node["is_primary"] = p.is_primary ? "true" : "false";
487510
p_node["is_nullable"] = p.is_nullable ? "true" : "false";
511+
if (p.cardinality != Cardinality::SINGLE) {
512+
p_node["cardinality"] = CardinalityToString(p.cardinality);
513+
}
488514
pg_node["properties"].PushBack();
489515
pg_node["properties"][pg_node["properties"].Size() - 1] = p_node;
490516
}
@@ -574,6 +600,13 @@ class EdgeInfo::Impl {
574600
}
575601
// check if property name is unique in all property groups
576602
for (const auto& p : pg->GetProperties()) {
603+
if (p.cardinality != Cardinality::SINGLE) {
604+
// edge property only supports single cardinality
605+
std::cout
606+
<< "Edge property only supports single cardinality, but got: "
607+
<< CardinalityToString(p.cardinality) << std::endl;
608+
return false;
609+
}
577610
if (check_property_unique_set.find(p.name) !=
578611
check_property_unique_set.end()) {
579612
return false;
@@ -910,6 +943,12 @@ Result<std::shared_ptr<EdgeInfo>> EdgeInfo::Load(std::shared_ptr<Yaml> yaml) {
910943
auto property_name = p_node["name"].As<std::string>();
911944
auto property_type =
912945
DataType::TypeNameToDataType(p_node["data_type"].As<std::string>());
946+
if (!p_node["cardinality"].IsNone() &&
947+
StringToCardinality(p_node["cardinality"].As<std::string>()) !=
948+
Cardinality::SINGLE) {
949+
return Status::YamlError(
950+
"Unsupported set cardinality for edge property.");
951+
}
913952
bool is_primary = p_node["is_primary"].As<bool>();
914953
bool is_nullable =
915954
p_node["is_nullable"].IsNone() || p_node["is_nullable"].As<bool>();

cpp/src/graphar/graph_info.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,20 @@ class Property {
3737
std::shared_ptr<DataType> type; // property data type
3838
bool is_primary; // primary key tag
3939
bool is_nullable; // nullable tag for non-primary key
40+
Cardinality
41+
cardinality; // cardinality of the property, only use in vertex info
4042

4143
Property() = default;
4244

4345
explicit Property(const std::string& name,
4446
const std::shared_ptr<DataType>& type = nullptr,
45-
bool is_primary = false, bool is_nullable = true)
47+
bool is_primary = false, bool is_nullable = true,
48+
Cardinality cardinality = Cardinality::SINGLE)
4649
: name(name),
4750
type(type),
4851
is_primary(is_primary),
49-
is_nullable(!is_primary && is_nullable) {}
52+
is_nullable(!is_primary && is_nullable),
53+
cardinality(cardinality) {}
5054
};
5155

5256
bool operator==(const Property& lhs, const Property& rhs);
@@ -276,6 +280,8 @@ class VertexInfo {
276280
Result<std::shared_ptr<DataType>> GetPropertyType(
277281
const std::string& property_name) const;
278282

283+
Result<Cardinality> GetPropertyCardinality(
284+
const std::string& property_name) const;
279285
/**
280286
* Get whether the vertex info contains the specified property.
281287
*

cpp/src/graphar/types.h

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include <memory>
2424
#include <string>
2525
#include <utility>
26+
#include <vector>
2627

2728
#include "graphar/fwd.h"
2829
#include "graphar/macros.h"
@@ -242,4 +243,42 @@ static inline const char* FileTypeToString(FileType file_type) {
242243
return file_type2string.at(file_type);
243244
}
244245

246+
static inline Cardinality StringToCardinality(const std::string& str) {
247+
static const std::map<std::string, Cardinality> str2cardinality{
248+
{"single", Cardinality::SINGLE},
249+
{"list", Cardinality::LIST},
250+
{"set", Cardinality::SET},
251+
};
252+
try {
253+
return str2cardinality.at(str.c_str());
254+
} catch (const std::exception& e) {
255+
throw std::runtime_error("KeyError: " + str);
256+
}
257+
}
258+
259+
static inline const char* CardinalityToString(Cardinality cardinality) {
260+
static const std::map<Cardinality, const char*> cardinality2string{
261+
{Cardinality::SINGLE, "single"},
262+
{Cardinality::LIST, "list"},
263+
{Cardinality::SET, "set"},
264+
};
265+
try {
266+
return cardinality2string.at(cardinality);
267+
} catch (const std::exception& e) {
268+
throw std::runtime_error("KeyError: " +
269+
std::to_string(static_cast<int>(cardinality)));
270+
}
271+
}
272+
273+
// Helper function to split a string by a delimiter
274+
inline std::vector<std::string> SplitString(const std::string& str,
275+
char delimiter) {
276+
std::vector<std::string> tokens;
277+
std::string token;
278+
std::istringstream tokenStream(str);
279+
while (std::getline(tokenStream, token, delimiter)) {
280+
tokens.push_back(token);
281+
}
282+
return tokens;
283+
}
245284
} // namespace graphar

0 commit comments

Comments
 (0)