Skip to content

Commit 79a26cd

Browse files
committed
update logic to ignore BACKUP/RESTORE. add tests.
1 parent 9220355 commit 79a26cd

File tree

2 files changed

+335
-6
lines changed

2 files changed

+335
-6
lines changed

go/vt/vttablet/tabletmanager/tm_init.go

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -388,12 +388,19 @@ func (tm *TabletManager) Start(tablet *topodatapb.Tablet, config *tabletenv.Tabl
388388

389389
// If we found an existing tablet record, use its tablet type instead of the initial one
390390
if err == nil {
391-
log.Infof("Found existing tablet record with --init-tablet-type-lookup enabled, using tablet type %v from topology instead of init-tablet-type %v",
392-
existingTablet.Type, tablet.Type)
393-
tablet.Type = existingTablet.Type
394-
// If it was a PRIMARY, preserve the start time
395-
if existingTablet.Type == topodatapb.TabletType_PRIMARY {
396-
tablet.PrimaryTermStartTime = existingTablet.PrimaryTermStartTime
391+
// Skip transient operational types (BACKUP, RESTORE)
392+
// These are temporary states that should not be preserved across restarts
393+
if existingTablet.Type == topodatapb.TabletType_BACKUP || existingTablet.Type == topodatapb.TabletType_RESTORE {
394+
log.Infof("Found existing tablet record with transient type %v, using init-tablet-type %v instead",
395+
existingTablet.Type, tablet.Type)
396+
} else {
397+
log.Infof("Found existing tablet record with --init-tablet-type-lookup enabled, using tablet type %v from topology instead of init-tablet-type %v",
398+
existingTablet.Type, tablet.Type)
399+
tablet.Type = existingTablet.Type
400+
// If it was a PRIMARY, preserve the start time
401+
if existingTablet.Type == topodatapb.TabletType_PRIMARY {
402+
tablet.PrimaryTermStartTime = existingTablet.PrimaryTermStartTime
403+
}
397404
}
398405
} else {
399406
log.Infof("No existing tablet record found, using init-tablet-type: %v", tablet.Type)

go/vt/vttablet/tabletmanager/tm_init_test.go

Lines changed: 322 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -961,3 +961,325 @@ func grantAllPrivilegesToUser(t *testing.T, connParams mysql.ConnParams, testUse
961961
require.NoError(t, err)
962962
conn.Close()
963963
}
964+
965+
func TestInitTabletTypeLookup_PreservesRDONLY(t *testing.T) {
966+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
967+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
968+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
969+
970+
ctx, cancel := context.WithCancel(context.Background())
971+
defer cancel()
972+
cell := "cell1"
973+
ts := memorytopo.NewServer(ctx, cell)
974+
alias := &topodatapb.TabletAlias{
975+
Cell: "cell1",
976+
Uid: 1,
977+
}
978+
979+
// 1. Initialize tablet as REPLICA (normal startup) with flag disabled
980+
initTabletTypeLookup = false
981+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
982+
tablet := tm.Tablet()
983+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
984+
ti, err := ts.GetTablet(ctx, alias)
985+
require.NoError(t, err)
986+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
987+
tm.Stop()
988+
989+
// 2. Simulate operator changing tablet to RDONLY in topology
990+
_, err = ts.UpdateTabletFields(ctx, alias, func(t *topodatapb.Tablet) error {
991+
t.Type = topodatapb.TabletType_RDONLY
992+
return nil
993+
})
994+
require.NoError(t, err)
995+
996+
// 3. Restart with flag enabled - should preserve RDONLY
997+
initTabletTypeLookup = true
998+
err = tm.Start(tablet, nil)
999+
require.NoError(t, err)
1000+
ti, err = ts.GetTablet(ctx, alias)
1001+
require.NoError(t, err)
1002+
assert.Equal(t, topodatapb.TabletType_RDONLY, ti.Type)
1003+
tm.Stop()
1004+
}
1005+
1006+
func TestInitTabletTypeLookup_PreservesPrimaryWithTermTime(t *testing.T) {
1007+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
1008+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
1009+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
1010+
1011+
ctx, cancel := context.WithCancel(context.Background())
1012+
defer cancel()
1013+
cell := "cell1"
1014+
ts := memorytopo.NewServer(ctx, cell)
1015+
alias := &topodatapb.TabletAlias{
1016+
Cell: "cell1",
1017+
Uid: 1,
1018+
}
1019+
1020+
// 1. Initialize tablet as REPLICA with flag disabled
1021+
initTabletTypeLookup = false
1022+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
1023+
tablet := tm.Tablet()
1024+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
1025+
ti, err := ts.GetTablet(ctx, alias)
1026+
require.NoError(t, err)
1027+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1028+
tm.Stop()
1029+
1030+
// 2. Simulate promotion to PRIMARY with a specific term start time
1031+
now := time.Now()
1032+
_, err = ts.UpdateTabletFields(ctx, alias, func(t *topodatapb.Tablet) error {
1033+
t.Type = topodatapb.TabletType_PRIMARY
1034+
t.PrimaryTermStartTime = protoutil.TimeToProto(now)
1035+
return nil
1036+
})
1037+
require.NoError(t, err)
1038+
1039+
// 3. Restart with flag enabled - should preserve PRIMARY and term start time
1040+
initTabletTypeLookup = true
1041+
err = tm.Start(tablet, nil)
1042+
require.NoError(t, err)
1043+
ti, err = ts.GetTablet(ctx, alias)
1044+
require.NoError(t, err)
1045+
assert.Equal(t, topodatapb.TabletType_PRIMARY, ti.Type)
1046+
assert.Equal(t, now.Unix(), ti.GetPrimaryTermStartTime().Unix())
1047+
tm.Stop()
1048+
}
1049+
1050+
func TestInitTabletTypeLookup_FallbackWhenNoTopoRecord(t *testing.T) {
1051+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
1052+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
1053+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
1054+
1055+
ctx, cancel := context.WithCancel(context.Background())
1056+
defer cancel()
1057+
cell := "cell1"
1058+
ts := memorytopo.NewServer(ctx, cell)
1059+
alias := &topodatapb.TabletAlias{
1060+
Cell: "cell1",
1061+
Uid: 1,
1062+
}
1063+
1064+
// Start new tablet with flag enabled but no existing topo record
1065+
initTabletTypeLookup = true
1066+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
1067+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
1068+
ti, err := ts.GetTablet(ctx, alias)
1069+
require.NoError(t, err)
1070+
// Should use initTabletType (REPLICA)
1071+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1072+
tm.Stop()
1073+
}
1074+
1075+
func TestInitTabletTypeLookup_DisabledUsesInitType(t *testing.T) {
1076+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
1077+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
1078+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
1079+
1080+
ctx, cancel := context.WithCancel(context.Background())
1081+
defer cancel()
1082+
cell := "cell1"
1083+
ts := memorytopo.NewServer(ctx, cell)
1084+
alias := &topodatapb.TabletAlias{
1085+
Cell: "cell1",
1086+
Uid: 1,
1087+
}
1088+
1089+
// 1. Initialize tablet as REPLICA with flag disabled
1090+
initTabletTypeLookup = false
1091+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
1092+
tablet := tm.Tablet()
1093+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
1094+
ti, err := ts.GetTablet(ctx, alias)
1095+
require.NoError(t, err)
1096+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1097+
tm.Stop()
1098+
1099+
// 2. Simulate operator changing tablet to RDONLY in topology
1100+
_, err = ts.UpdateTabletFields(ctx, alias, func(t *topodatapb.Tablet) error {
1101+
t.Type = topodatapb.TabletType_RDONLY
1102+
return nil
1103+
})
1104+
require.NoError(t, err)
1105+
1106+
// 3. Restart with flag still disabled - should use initTabletType (REPLICA)
1107+
initTabletTypeLookup = false
1108+
err = tm.Start(tablet, nil)
1109+
require.NoError(t, err)
1110+
ti, err = ts.GetTablet(ctx, alias)
1111+
require.NoError(t, err)
1112+
// Topo record should be overwritten with REPLICA
1113+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1114+
tm.Stop()
1115+
}
1116+
1117+
func TestInitTabletTypeLookup_SkipsTransientBackupType(t *testing.T) {
1118+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
1119+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
1120+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
1121+
1122+
ctx, cancel := context.WithCancel(context.Background())
1123+
defer cancel()
1124+
cell := "cell1"
1125+
ts := memorytopo.NewServer(ctx, cell)
1126+
alias := &topodatapb.TabletAlias{
1127+
Cell: "cell1",
1128+
Uid: 1,
1129+
}
1130+
1131+
// 1. Initialize tablet as REPLICA with flag disabled
1132+
initTabletTypeLookup = false
1133+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
1134+
tablet := tm.Tablet()
1135+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
1136+
ti, err := ts.GetTablet(ctx, alias)
1137+
require.NoError(t, err)
1138+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1139+
tm.Stop()
1140+
1141+
// 2. Simulate crash during backup (tablet type is BACKUP in topo)
1142+
_, err = ts.UpdateTabletFields(ctx, alias, func(t *topodatapb.Tablet) error {
1143+
t.Type = topodatapb.TabletType_BACKUP
1144+
return nil
1145+
})
1146+
require.NoError(t, err)
1147+
1148+
// 3. Restart with flag enabled - should skip BACKUP and use initTabletType
1149+
initTabletTypeLookup = true
1150+
err = tm.Start(tablet, nil)
1151+
require.NoError(t, err)
1152+
ti, err = ts.GetTablet(ctx, alias)
1153+
require.NoError(t, err)
1154+
// Should use initTabletType (REPLICA), not preserve BACKUP
1155+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1156+
tm.Stop()
1157+
}
1158+
1159+
func TestInitTabletTypeLookup_SkipsTransientRestoreType(t *testing.T) {
1160+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
1161+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
1162+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
1163+
1164+
ctx, cancel := context.WithCancel(context.Background())
1165+
defer cancel()
1166+
cell := "cell1"
1167+
ts := memorytopo.NewServer(ctx, cell)
1168+
alias := &topodatapb.TabletAlias{
1169+
Cell: "cell1",
1170+
Uid: 1,
1171+
}
1172+
1173+
// 1. Initialize tablet as REPLICA with flag disabled
1174+
initTabletTypeLookup = false
1175+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
1176+
tablet := tm.Tablet()
1177+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
1178+
ti, err := ts.GetTablet(ctx, alias)
1179+
require.NoError(t, err)
1180+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1181+
tm.Stop()
1182+
1183+
// 2. Simulate crash during restore (tablet type is RESTORE in topo)
1184+
_, err = ts.UpdateTabletFields(ctx, alias, func(t *topodatapb.Tablet) error {
1185+
t.Type = topodatapb.TabletType_RESTORE
1186+
return nil
1187+
})
1188+
require.NoError(t, err)
1189+
1190+
// 3. Restart with flag enabled - should skip RESTORE and use initTabletType
1191+
initTabletTypeLookup = true
1192+
err = tm.Start(tablet, nil)
1193+
require.NoError(t, err)
1194+
ti, err = ts.GetTablet(ctx, alias)
1195+
require.NoError(t, err)
1196+
// Should use initTabletType (REPLICA), not preserve RESTORE
1197+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1198+
tm.Stop()
1199+
}
1200+
1201+
func TestInitTabletTypeLookup_PreservesDrained(t *testing.T) {
1202+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
1203+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
1204+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
1205+
1206+
ctx, cancel := context.WithCancel(context.Background())
1207+
defer cancel()
1208+
cell := "cell1"
1209+
ts := memorytopo.NewServer(ctx, cell)
1210+
alias := &topodatapb.TabletAlias{
1211+
Cell: "cell1",
1212+
Uid: 1,
1213+
}
1214+
1215+
// 1. Initialize tablet as REPLICA with flag disabled
1216+
initTabletTypeLookup = false
1217+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
1218+
tablet := tm.Tablet()
1219+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
1220+
ti, err := ts.GetTablet(ctx, alias)
1221+
require.NoError(t, err)
1222+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1223+
tm.Stop()
1224+
1225+
// 2. Admin sets type to DRAINED for maintenance
1226+
_, err = ts.UpdateTabletFields(ctx, alias, func(t *topodatapb.Tablet) error {
1227+
t.Type = topodatapb.TabletType_DRAINED
1228+
return nil
1229+
})
1230+
require.NoError(t, err)
1231+
1232+
// 3. Restart with flag enabled - should preserve DRAINED
1233+
initTabletTypeLookup = true
1234+
err = tm.Start(tablet, nil)
1235+
require.NoError(t, err)
1236+
ti, err = ts.GetTablet(ctx, alias)
1237+
require.NoError(t, err)
1238+
// Should preserve DRAINED from topology
1239+
assert.Equal(t, topodatapb.TabletType_DRAINED, ti.Type)
1240+
tm.Stop()
1241+
}
1242+
1243+
func TestInitTabletTypeLookup_InteractionWithCheckPrimaryShip(t *testing.T) {
1244+
defer func(saved bool) { initTabletTypeLookup = saved }(initTabletTypeLookup)
1245+
defer func(saved time.Duration) { rebuildKeyspaceRetryInterval = saved }(rebuildKeyspaceRetryInterval)
1246+
rebuildKeyspaceRetryInterval = 10 * time.Millisecond
1247+
1248+
ctx, cancel := context.WithCancel(context.Background())
1249+
defer cancel()
1250+
cell := "cell1"
1251+
ts := memorytopo.NewServer(ctx, cell)
1252+
alias := &topodatapb.TabletAlias{
1253+
Cell: "cell1",
1254+
Uid: 1,
1255+
}
1256+
1257+
// 1. Initialize tablet as REPLICA with flag disabled
1258+
initTabletTypeLookup = false
1259+
tm := newTestTM(t, ts, 1, "ks", "0", nil)
1260+
tablet := tm.Tablet()
1261+
ensureSrvKeyspace(t, ctx, ts, cell, "ks")
1262+
ti, err := ts.GetTablet(ctx, alias)
1263+
require.NoError(t, err)
1264+
assert.Equal(t, topodatapb.TabletType_REPLICA, ti.Type)
1265+
tm.Stop()
1266+
1267+
// 2. Set shard's PrimaryAlias to this tablet
1268+
now := time.Now()
1269+
_, err = ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error {
1270+
si.PrimaryAlias = alias
1271+
si.PrimaryTermStartTime = protoutil.TimeToProto(now)
1272+
return nil
1273+
})
1274+
require.NoError(t, err)
1275+
1276+
// 3. Restart with flag enabled - checkPrimaryShip should still promote to PRIMARY
1277+
initTabletTypeLookup = true
1278+
err = tm.Start(tablet, nil)
1279+
require.NoError(t, err)
1280+
ti, err = ts.GetTablet(ctx, alias)
1281+
require.NoError(t, err)
1282+
// Should be PRIMARY due to checkPrimaryShip logic
1283+
assert.Equal(t, topodatapb.TabletType_PRIMARY, ti.Type)
1284+
tm.Stop()
1285+
}

0 commit comments

Comments
 (0)