Skip to content

Commit f2eb9b8

Browse files
committed
update PRIMARY handling and error messages
Signed-off-by: Stephen Baker <[email protected]>
1 parent 4cfab07 commit f2eb9b8

File tree

2 files changed

+21
-10
lines changed

2 files changed

+21
-10
lines changed

go/vt/vttablet/tabletmanager/tm_init.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -383,24 +383,26 @@ func (tm *TabletManager) Start(tablet *topodatapb.Tablet, config *tabletenv.Tabl
383383
existingTablet, err := tm.TopoServer.GetTablet(ctx, tablet.Alias)
384384
if err != nil && !topo.IsErrType(err, topo.NoNode) {
385385
// Error other than "node doesn't exist" - return it
386-
return vterrors.Wrap(err, "failed to check for existing tablet record")
386+
return vterrors.Wrap(err, "--init-tablet-type-lookup is enabled but failed to get existing tablet record from topology, unable to determine tablet type during startup")
387387
}
388388

389-
// If we found an existing tablet record, use its tablet type instead of the initial one
389+
// If we found an existing tablet record, check its type
390390
if err == nil {
391-
// Skip transient operational types (BACKUP, RESTORE)
392-
// These are temporary states that should not be preserved across restarts
393-
if existingTablet.Type == topodatapb.TabletType_BACKUP || existingTablet.Type == topodatapb.TabletType_RESTORE {
391+
if existingTablet.Type == topodatapb.TabletType_PRIMARY {
392+
// Don't set to PRIMARY yet - let checkPrimaryShip() validate and decide
393+
// checkPrimaryShip() has the logic to verify shard records and determine if this tablet should really be PRIMARY
394+
log.Infof("Found existing tablet record with PRIMARY type, setting to REPLICA and allowing checkPrimaryShip() to validate")
395+
tablet.Type = topodatapb.TabletType_REPLICA
396+
} else if existingTablet.Type == topodatapb.TabletType_BACKUP || existingTablet.Type == topodatapb.TabletType_RESTORE {
397+
// Skip transient operational types (BACKUP, RESTORE)
398+
// These are temporary states that should not be preserved across restarts
394399
log.Infof("Found existing tablet record with transient type %v, using init-tablet-type %v instead",
395400
existingTablet.Type, tablet.Type)
396401
} else {
402+
// Safe to restore the type for non-PRIMARY, non-transient types
397403
log.Infof("Found existing tablet record with --init-tablet-type-lookup enabled, using tablet type %v from topology instead of init-tablet-type %v",
398404
existingTablet.Type, tablet.Type)
399405
tablet.Type = existingTablet.Type
400-
// If it was a PRIMARY, preserve the start time
401-
if existingTablet.Type == topodatapb.TabletType_PRIMARY {
402-
tablet.PrimaryTermStartTime = existingTablet.PrimaryTermStartTime
403-
}
404406
}
405407
} else {
406408
log.Infof("No existing tablet record found, using init-tablet-type: %v", tablet.Type)

go/vt/vttablet/tabletmanager/tm_init_test.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1036,12 +1036,21 @@ func TestInitTabletTypeLookup_PreservesPrimaryWithTermTime(t *testing.T) {
10361036
})
10371037
require.NoError(t, err)
10381038

1039-
// 3. Restart with flag enabled - should preserve PRIMARY and term start time
1039+
// 3. Update shard's PrimaryAlias to point to this tablet so checkPrimaryShip will promote it
1040+
_, err = ts.UpdateShardFields(ctx, "ks", "0", func(si *topo.ShardInfo) error {
1041+
si.PrimaryAlias = alias
1042+
si.PrimaryTermStartTime = protoutil.TimeToProto(now)
1043+
return nil
1044+
})
1045+
require.NoError(t, err)
1046+
1047+
// 4. Restart with flag enabled - should set to REPLICA initially, then checkPrimaryShip promotes to PRIMARY
10401048
initTabletTypeLookup = true
10411049
err = tm.Start(tablet, nil)
10421050
require.NoError(t, err)
10431051
ti, err = ts.GetTablet(ctx, alias)
10441052
require.NoError(t, err)
1053+
// Should be promoted to PRIMARY by checkPrimaryShip and preserve the term start time
10451054
assert.Equal(t, topodatapb.TabletType_PRIMARY, ti.Type)
10461055
assert.Equal(t, now.Unix(), ti.GetPrimaryTermStartTime().Unix())
10471056
tm.Stop()

0 commit comments

Comments
 (0)