Skip to content

Commit 6978533

Browse files
committed
POC: sync from dfly to valkey
Signed-off-by: Abhijat Malviya <[email protected]>
1 parent 50e1fbf commit 6978533

File tree

5 files changed

+254
-1
lines changed

5 files changed

+254
-1
lines changed

src/server/conn_context.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ struct ConnectionState {
154154
std::string repl_ip_address;
155155
uint32_t repl_listening_port = 0;
156156
DflyVersion repl_version = DflyVersion::VER1;
157+
bool is_valkey = false;
157158
};
158159

159160
struct SquashingInfo {

src/server/dflycmd.cc

Lines changed: 228 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -649,6 +649,228 @@ void DflyCmd::Load(CmdArgList args, RedisReplyBuilder* rb, ConnectionContext* cn
649649
rb->SendOk();
650650
}
651651

652+
namespace {
653+
654+
struct ShardJournalChannel : journal::JournalConsumerInterface {
655+
explicit ShardJournalChannel(fb2::EventCount& e, journal::Journal* journal)
656+
: ec{e}, journal_{journal} {
657+
CHECK(journal);
658+
journal_cb_id = journal_->RegisterOnChange(this);
659+
}
660+
661+
void Stop() {
662+
journal_->UnregisterOnChange(journal_cb_id);
663+
}
664+
665+
void ConsumeJournalChange(const journal::JournalChangeItem& item) override {
666+
if (rpos == wpos) {
667+
rpos = 0;
668+
wpos = 0;
669+
buffer = {};
670+
}
671+
672+
buffer.emplace_back(item.journal_item.data);
673+
wpos++;
674+
675+
ec.notifyAll();
676+
}
677+
678+
void ThrottleIfNeeded() override {
679+
}
680+
681+
std::vector<std::string> Read() {
682+
CHECK_LT(rpos, wpos) << "Invalid read attempt";
683+
684+
auto i = rpos;
685+
std::vector<std::string> result;
686+
while (i < wpos) {
687+
result.emplace_back(std::move(buffer[i++]));
688+
}
689+
rpos = i;
690+
return result;
691+
}
692+
693+
bool HasData() const {
694+
return rpos < wpos;
695+
}
696+
697+
fb2::EventCount& ec;
698+
size_t rpos{0};
699+
size_t wpos{0};
700+
std::vector<std::string> buffer;
701+
uint32_t journal_cb_id;
702+
journal::Journal* journal_;
703+
};
704+
705+
struct Pipe final : io::Source, io::Sink {
706+
io::Result<unsigned long> ReadSome(const iovec* v, uint32_t len) override {
707+
if (done) {
708+
return 0;
709+
}
710+
711+
ec.await([&] { return rpos < wpos; });
712+
auto bytes_read = 0;
713+
714+
while (rpos < wpos && len > 0) {
715+
const auto chunk_size = min(wpos - rpos, v->iov_len);
716+
std::copy_n(buffer.begin() + rpos, chunk_size, static_cast<char*>(v->iov_base));
717+
bytes_read += chunk_size;
718+
rpos += chunk_size;
719+
++v;
720+
--len;
721+
}
722+
723+
if (rpos == wpos && wpos == cap) {
724+
rpos = 0;
725+
wpos = 0;
726+
ec.notifyAll();
727+
}
728+
729+
return bytes_read;
730+
}
731+
732+
io::Result<unsigned long> WriteSome(const iovec* v, uint32_t len) override {
733+
CHECK(!done);
734+
ec.await([&] { return wpos < cap; });
735+
int bytes_written = 0;
736+
737+
while (wpos < cap && len > 0) {
738+
const auto chunk_size = std::min(cap - wpos, v->iov_len);
739+
auto p = static_cast<const char*>(v->iov_base);
740+
std::copy_n(p, chunk_size, buffer.begin() + wpos);
741+
bytes_written += chunk_size;
742+
wpos += chunk_size;
743+
++v;
744+
--len;
745+
}
746+
747+
ec.notifyAll();
748+
return bytes_written;
749+
}
750+
751+
std::array<uint8_t, 1024> buffer;
752+
size_t rpos{0};
753+
size_t wpos{0};
754+
size_t cap{1024};
755+
std::atomic_bool done{false};
756+
fb2::EventCount ec;
757+
};
758+
759+
} // namespace
760+
761+
void DflyCmd::StartValkeySync() {
762+
auto Write = [this](auto v) {
763+
const auto buf = io::Bytes(reinterpret_cast<const unsigned char*>(v.data()), v.size());
764+
CHECK(!_valkey_replica->conn->socket()->Write(buf));
765+
};
766+
767+
CHECK(_valkey_replica.has_value()) << "There is no valkey replica to sync with";
768+
769+
// Since we do not know the size of rdb up front, use the EOF protocol, send
770+
// "$EOF:<40-random-chars>\n" first, then the same 40 chars at the end
771+
std::string eof_mark(40, 'X');
772+
std::string eof_mark_with_prefix = absl::StrCat("$EOF:", eof_mark, "\n");
773+
774+
Write(eof_mark_with_prefix);
775+
776+
for (unsigned i = 0; i < shard_set->size(); ++i) {
777+
Pipe p;
778+
auto cb = [&] {
779+
std::array<uint8_t, 128> backing;
780+
const io::MutableBytes mb{backing};
781+
while (!p.done) {
782+
if (auto n = p.Read(mb); !n.has_value() || n.value() == 0) {
783+
break;
784+
}
785+
CHECK(!_valkey_replica->conn->socket()->Write(mb));
786+
}
787+
788+
if (auto n = p.Read(mb); n.has_value() && n.value()) {
789+
CHECK(!_valkey_replica->conn->socket()->Write(mb));
790+
}
791+
};
792+
auto drain_fb = fb2::Fiber("replica-drain-fb", cb);
793+
794+
shard_set->Await(i, [&p, this, i] {
795+
auto shard = EngineShard::tlocal();
796+
auto mode = i == 0 ? SaveMode::SINGLE_SHARD_WITH_SUMMARY : SaveMode::SINGLE_SHARD;
797+
RdbSaver saver{&p, mode, false, ""};
798+
if (mode == SaveMode::SINGLE_SHARD_WITH_SUMMARY) {
799+
CHECK(!saver.SaveHeader(saver.GetGlobalData(&sf_->service())));
800+
}
801+
802+
saver.StartSnapshotInShard(false, &_valkey_replica->exec_st, shard);
803+
CHECK(!saver.WaitSnapshotInShard(shard));
804+
p.done = true;
805+
VLOG(1) << "finished writing snapshot for shard " << shard->shard_id();
806+
});
807+
808+
drain_fb.JoinIfNeeded();
809+
}
810+
811+
Write(eof_mark);
812+
813+
// Stable sync
814+
VLOG(1) << "Entering stable sync..";
815+
816+
std::vector<std::unique_ptr<ShardJournalChannel>> channels(shard_set->size());
817+
fb2::EventCount ec;
818+
JournalReader reader{nullptr, 0};
819+
820+
auto cb = [&channels, &ec, this](EngineShard* shard) {
821+
auto& channel = channels[shard->shard_id()];
822+
sf_->journal()->StartInThread();
823+
channel.reset(new ShardJournalChannel(ec, sf_->journal()));
824+
VLOG(1) << "Set channel for shard " << shard->shard_id();
825+
};
826+
shard_set->RunBlockingInParallel(cb);
827+
828+
RedisReplyBuilder rb{_valkey_replica->conn->socket()};
829+
DbIndex current_dbid = std::numeric_limits<DbIndex>::max();
830+
831+
while (true) {
832+
ec.await([&channels] {
833+
return std::any_of(channels.begin(), channels.end(),
834+
[](const auto& channel) { return channel->HasData(); });
835+
});
836+
for (const auto& channel : channels) {
837+
if (channel->HasData()) {
838+
auto data = channel->Read();
839+
auto total_size =
840+
std::accumulate(data.begin(), data.end(), 0,
841+
[](auto currsum, const auto& str) { return currsum + str.size(); });
842+
auto span = io::Bytes(reinterpret_cast<uint8_t*>(data.begin()->data()), total_size);
843+
auto src = io::BytesSource{span};
844+
reader.SetSource(&src);
845+
while (true) {
846+
auto entry = reader.ReadEntry();
847+
if (!entry.has_value()) {
848+
// We read all the commands in the buffer
849+
CHECK_EQ(entry.error().value(), EIO);
850+
break;
851+
}
852+
853+
auto& parsed = entry.value();
854+
if (parsed.dbid != current_dbid) {
855+
VLOG(1) << "Database changed from " << current_dbid << " to " << parsed.dbid;
856+
std::string parsed_dbid = std::to_string(parsed.dbid);
857+
std::vector<std::string_view> select_cmd = {"SELECT", parsed_dbid};
858+
859+
VLOG(1) << "sending command: " << select_cmd;
860+
rb.SendBulkStrArr(select_cmd);
861+
current_dbid = parsed.dbid;
862+
}
863+
864+
VLOG(1) << "sending command: " << parsed.ToString() << " of size " << parsed.cmd.cmd_len;
865+
866+
// valkey expects commands propagated as bulk array
867+
rb.SendBulkStrArr(parsed.cmd.cmd_args);
868+
}
869+
}
870+
}
871+
}
872+
}
873+
652874
OpStatus DflyCmd::StartFullSyncInThread(FlowInfo* flow, ExecutionState* exec_st,
653875
EngineShard* shard) {
654876
DCHECK(shard);
@@ -730,6 +952,12 @@ void DflyCmd::StartStableSyncInThread(FlowInfo* flow, ExecutionState* exec_st, E
730952
};
731953
}
732954

955+
void DflyCmd::CreateValkeySyncSession(facade::Connection* conn) {
956+
CHECK(!_valkey_replica.has_value());
957+
fb2::LockGuard lk(mu_);
958+
_valkey_replica.emplace(conn, [](const GenericError&) {});
959+
}
960+
733961
auto DflyCmd::CreateSyncSession(ConnectionState* state) -> std::pair<uint32_t, unsigned> {
734962
util::fb2::LockGuard lk(mu_);
735963
unsigned sync_id = next_sync_id_++;

src/server/dflycmd.h

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,13 @@ class DflyCmd {
129129
util::fb2::SharedMutex shared_mu; // See top of header for locking levels.
130130
};
131131

132+
struct ValkeyReplica {
133+
ValkeyReplica(facade::Connection* conn, ExecutionState::ErrHandler h) : conn{conn}, exec_st{h} {
134+
}
135+
facade::Connection* conn = nullptr;
136+
ExecutionState exec_st;
137+
};
138+
132139
public:
133140
DflyCmd(ServerFamily* server_family);
134141

@@ -142,6 +149,7 @@ class DflyCmd {
142149

143150
// Create new sync session. Returns (session_id, number of flows)
144151
std::pair<uint32_t, unsigned> CreateSyncSession(ConnectionState* state) ABSL_LOCKS_EXCLUDED(mu_);
152+
void CreateValkeySyncSession(facade::Connection* conn);
145153

146154
// Master side access method to replication info of that connection.
147155
std::shared_ptr<ReplicaInfo> GetReplicaInfoFromConnection(ConnectionState* state);
@@ -156,6 +164,7 @@ class DflyCmd {
156164

157165
// Tries to break those flows that stuck on socket write for too long time.
158166
void BreakStalledFlowsInShard() ABSL_NO_THREAD_SAFETY_ANALYSIS;
167+
void StartValkeySync();
159168

160169
private:
161170
using RedisReplyBuilder = facade::RedisReplyBuilder;
@@ -238,6 +247,8 @@ class DflyCmd {
238247
using ReplicaInfoMap = absl::btree_map<uint32_t, std::shared_ptr<ReplicaInfo>>;
239248
ReplicaInfoMap replica_infos_ ABSL_GUARDED_BY(mu_);
240249

250+
std::optional<ValkeyReplica> _valkey_replica = std::nullopt;
251+
241252
mutable util::fb2::Mutex mu_; // Guard global operations. See header top for locking levels.
242253
};
243254

src/server/server_family.cc

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3769,6 +3769,14 @@ void ServerFamily::ReplTakeOver(CmdArgList args, const CommandContext& cmd_cntx)
37693769
return builder->SendOk();
37703770
}
37713771

3772+
void ServerFamily::PSync(CmdArgList args, const CommandContext& cmd_cntx) {
3773+
auto* rb = static_cast<RedisReplyBuilder*>(cmd_cntx.rb);
3774+
auto response = absl::StrFormat("FULLRESYNC %s %ld", master_replid_, 0);
3775+
rb->SendSimpleString(response);
3776+
3777+
dfly_cmd_->StartValkeySync();
3778+
}
3779+
37723780
void ServerFamily::ReplConf(CmdArgList args, const CommandContext& cmd_cntx) {
37733781
auto* builder = cmd_cntx.rb;
37743782
{
@@ -3854,6 +3862,9 @@ void ServerFamily::ReplConf(CmdArgList args, const CommandContext& cmd_cntx) {
38543862
VLOG(2) << "Received client ACK=" << ack;
38553863
cntx->replication_flow->last_acked_lsn = ack;
38563864
return;
3865+
} else if (cmd == "VERSION" && args.size() == 2) {
3866+
cntx->conn_state.replication_info.is_valkey = true;
3867+
dfly_cmd_->CreateValkeySyncSession(cntx->conn());
38573868
} else {
38583869
VLOG(1) << "Error " << cmd << " " << arg << " " << args.size();
38593870
return err_cb();
@@ -4159,7 +4170,8 @@ void ServerFamily::Register(CommandRegistry* registry) {
41594170
<< CI{"SLOWLOG", CO::ADMIN | CO::FAST, -2, 0, 0, acl::kSlowLog}.HFUNC(SlowLog)
41604171
<< CI{"SCRIPT", CO::NOSCRIPT | CO::NO_KEY_TRANSACTIONAL, -2, 0, 0, acl::kScript}.HFUNC(Script)
41614172
<< CI{"DFLY", CO::ADMIN | CO::GLOBAL_TRANS | CO::HIDDEN, -2, 0, 0, acl::kDfly}.HFUNC(Dfly)
4162-
<< CI{"MODULE", CO::ADMIN, 2, 0, 0, acl::kModule}.HFUNC(Module);
4173+
<< CI{"MODULE", CO::ADMIN, 2, 0, 0, acl::kModule}.HFUNC(Module)
4174+
<< CI{"PSYNC", CO::ADMIN | CO::GLOBAL_TRANS, -2, 0, 0, acl::kDfly}.HFUNC(PSync);
41634175
}
41644176

41654177
} // namespace dfly

src/server/server_family.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -363,6 +363,7 @@ class ServerFamily {
363363
void Script(CmdArgList args, const CommandContext& cmd_cntx);
364364
void SlowLog(CmdArgList args, const CommandContext& cmd_cntx);
365365
void Module(CmdArgList args, const CommandContext& cmd_cntx);
366+
void PSync(CmdArgList args, const CommandContext& cmd_cntx);
366367

367368
void SyncGeneric(std::string_view repl_master_id, uint64_t offs, ConnectionContext* cntx);
368369

0 commit comments

Comments
 (0)