Skip to content

Commit 127fa52

Browse files
authored
Snapshottable recovery system (#2032)
* Snapshottable recovery system Deploy the entire recovery system to the same folder (kernel, initrd and rootfs). During upgrade deploy to a transitional folder and then switch it with the current recovery system and then delete the old one. This makes sure we clean up old recovery systems and don't risk mixing systems during upgrade. Signed-off-by: Fredrik Lönnegren <[email protected]>
1 parent 88a9d0c commit 127fa52

File tree

14 files changed

+128
-94
lines changed

14 files changed

+128
-94
lines changed

cmd/build-iso.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ func NewBuildISO(root *cobra.Command, addCheckRoot bool) *cobra.Command {
125125
}
126126

127127
buildISO := action.NewBuildISOAction(cfg, spec)
128-
return buildISO.ISORun()
128+
return buildISO.Run()
129129
},
130130
}
131131

pkg/action/build-disk.go

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -237,16 +237,9 @@ func (b *BuildDiskAction) BuildDiskRun() (err error) { //nolint:gocyclo
237237
return elementalError.NewFromError(err, elementalError.HookAfterDisk)
238238
}
239239

240-
// Create recovery image
241-
bootDir := filepath.Join(b.roots[constants.RecoveryPartName], "boot")
242-
if err = utils.MkdirAll(b.cfg.Fs, bootDir, constants.DirPerm); err != nil {
243-
b.cfg.Logger.Errorf("failed creating recovery boot dir: %v", err)
244-
return err
245-
}
246-
247240
tmpSrc := b.spec.RecoverySystem.Source
248241
b.spec.RecoverySystem.Source = types.NewDirSrc(recRoot)
249-
err = elemental.DeployRecoverySystem(b.cfg.Config, &b.spec.RecoverySystem, bootDir)
242+
err = elemental.DeployRecoverySystem(b.cfg.Config, &b.spec.RecoverySystem)
250243
if err != nil {
251244
b.cfg.Logger.Errorf("failed deploying recovery system: %v", err)
252245
return err

pkg/action/build-iso.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ func grubCfgTemplate(arch string) string {
4141
4242
menuentry "%s" --class os --unrestricted {
4343
echo Loading kernel...
44-
linux ($root)` + constants.ISOKernelPath(arch) + ` cdroot root=live:CDLABEL=%s rd.live.dir=/ rd.live.squashimg=rootfs.squashfs console=tty1 console=ttyS0 elemental.disable elemental.setup=` + constants.ISOCloudInitPath + `
44+
linux ($root)` + constants.ISOKernelPath(arch) + ` cdroot root=live:CDLABEL=%s rd.live.dir=` + constants.ISOLoaderPath(arch) + ` rd.live.squashimg=rootfs.squashfs console=tty1 console=ttyS0 elemental.disable elemental.setup=` + constants.ISOCloudInitPath + `
4545
echo Loading initrd...
4646
initrd ($root)` + constants.ISOInitrdPath(arch) + `
4747
}
@@ -78,8 +78,8 @@ func NewBuildISOAction(cfg *types.BuildConfig, spec *types.LiveISO, opts ...Buil
7878
return b
7979
}
8080

81-
// BuildISORun will install the system from a given configuration
82-
func (b *BuildISOAction) ISORun() error {
81+
// Run will install the system from a given configuration
82+
func (b *BuildISOAction) Run() error {
8383
cleanup := utils.NewCleanStack()
8484
var err error
8585
defer func() { err = cleanup.Cleanup(err) }()
@@ -170,11 +170,11 @@ func (b *BuildISOAction) ISORun() error {
170170

171171
image := &types.Image{
172172
Source: types.NewDirSrc(rootDir),
173-
File: filepath.Join(isoDir, constants.ISORootFile),
173+
File: filepath.Join(bootDir, constants.ISORootFile),
174174
FS: constants.SquashFs,
175175
}
176176

177-
err = elemental.DeployRecoverySystem(b.cfg.Config, image, bootDir)
177+
err = elemental.DeployRecoverySystem(b.cfg.Config, image)
178178
if err != nil {
179179
b.cfg.Logger.Errorf("Failed preparing ISO's root tree: %v", err)
180180
return err

pkg/action/build_test.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ var _ = Describe("Build Actions", func() {
127127
}
128128

129129
buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
130-
err := buildISO.ISORun()
130+
err := buildISO.Run()
131131

132132
Expect(err).ShouldNot(HaveOccurred())
133133
})
@@ -138,7 +138,7 @@ var _ = Describe("Build Actions", func() {
138138
iso.RootFS = append(iso.RootFS, rootSrc)
139139

140140
buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
141-
err := buildISO.ISORun()
141+
err := buildISO.Run()
142142
Expect(err).Should(HaveOccurred())
143143
})
144144
It("Fails on prepare ISO", func() {
@@ -148,7 +148,7 @@ var _ = Describe("Build Actions", func() {
148148
iso.RootFS = append(iso.RootFS, rootSrc)
149149

150150
buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
151-
err := buildISO.ISORun()
151+
err := buildISO.Run()
152152

153153
Expect(err).Should(HaveOccurred())
154154
})
@@ -161,14 +161,14 @@ var _ = Describe("Build Actions", func() {
161161

162162
By("fails without kernel")
163163
buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
164-
err = buildISO.ISORun()
164+
err = buildISO.Run()
165165
Expect(err).Should(HaveOccurred())
166166

167167
By("fails without initrd")
168168
_, err = fs.Create("/local/dir/boot/vmlinuz")
169169
Expect(err).ShouldNot(HaveOccurred())
170170
buildISO = action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
171-
err = buildISO.ISORun()
171+
err = buildISO.Run()
172172
Expect(err).Should(HaveOccurred())
173173
})
174174
It("Fails installing uefi sources", func() {
@@ -178,7 +178,7 @@ var _ = Describe("Build Actions", func() {
178178
iso.UEFI = []*types.ImageSource{uefiSrc}
179179

180180
buildISO := action.NewBuildISOAction(cfg, iso)
181-
err := buildISO.ISORun()
181+
err := buildISO.Run()
182182
Expect(err).Should(HaveOccurred())
183183
})
184184
It("Fails on ISO filesystem creation", func() {
@@ -193,7 +193,7 @@ var _ = Describe("Build Actions", func() {
193193
}
194194

195195
buildISO := action.NewBuildISOAction(cfg, iso, action.WithLiveBootloader(bootloader))
196-
err := buildISO.ISORun()
196+
err := buildISO.Run()
197197

198198
Expect(err).Should(HaveOccurred())
199199
})
@@ -228,7 +228,7 @@ var _ = Describe("Build Actions", func() {
228228
Expect(buildDisk.BuildDiskRun()).To(Succeed())
229229

230230
Expect(runner.MatchMilestones([][]string{
231-
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/recovery.img"},
231+
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/boot/recovery.img"},
232232
{"mkfs.ext4", "-L", "COS_STATE"},
233233
{"losetup", "--show", "-f", "/tmp/test/build/state.part"},
234234
{"mkfs.vfat", "-n", "COS_GRUB"},
@@ -255,7 +255,7 @@ var _ = Describe("Build Actions", func() {
255255
Expect(buildDisk.BuildDiskRun()).To(Succeed())
256256

257257
Expect(runner.MatchMilestones([][]string{
258-
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/recovery.img"},
258+
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/boot/recovery.img"},
259259
{"mkfs.vfat", "-n", "COS_GRUB"},
260260
{"mkfs.ext4", "-L", "COS_OEM"},
261261
{"mkfs.ext4", "-L", "COS_RECOVERY"},
@@ -274,7 +274,7 @@ var _ = Describe("Build Actions", func() {
274274
Expect(buildDisk.BuildDiskRun()).NotTo(Succeed())
275275

276276
Expect(runner.MatchMilestones([][]string{
277-
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/recovery.img"},
277+
{"mksquashfs", "/tmp/test/build/recovery.img.root", "/tmp/test/build/recovery/boot/recovery.img"},
278278
})).To(Succeed())
279279

280280
// failed before preparing partitions images

pkg/action/install.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -249,7 +249,7 @@ func (i InstallAction) Run() (err error) {
249249
}
250250
recoverySystem.Source.SetDigest(i.spec.System.GetDigest())
251251
}
252-
err = elemental.DeployRecoverySystem(i.cfg.Config, &recoverySystem, recoveryBootDir)
252+
err = elemental.DeployRecoverySystem(i.cfg.Config, &recoverySystem)
253253
if err != nil {
254254
i.cfg.Logger.Errorf("Failed deploying recovery image: %v", err)
255255
return elementalError.NewFromError(err, elementalError.DeployImage)

pkg/action/upgrade-recovery.go

Lines changed: 55 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -84,18 +84,22 @@ func NewUpgradeRecoveryAction(config *types.RunConfig, spec *types.UpgradeSpec,
8484
return u, nil
8585
}
8686

87-
func (u UpgradeRecoveryAction) Info(s string, args ...interface{}) {
87+
func (u UpgradeRecoveryAction) Infof(s string, args ...interface{}) {
8888
u.cfg.Logger.Infof(s, args...)
8989
}
9090

91-
func (u UpgradeRecoveryAction) Debug(s string, args ...interface{}) {
91+
func (u UpgradeRecoveryAction) Debugf(s string, args ...interface{}) {
9292
u.cfg.Logger.Debugf(s, args...)
9393
}
9494

95-
func (u UpgradeRecoveryAction) Error(s string, args ...interface{}) {
95+
func (u UpgradeRecoveryAction) Errorf(s string, args ...interface{}) {
9696
u.cfg.Logger.Errorf(s, args...)
9797
}
9898

99+
func (u UpgradeRecoveryAction) Warnf(s string, args ...interface{}) {
100+
u.cfg.Logger.Warnf(s, args...)
101+
}
102+
99103
func (u *UpgradeRecoveryAction) mountRWPartitions(cleanup *utils.CleanStack) error {
100104
umount, err := elemental.MountRWPartition(u.cfg.Config, u.spec.Partitions.Recovery)
101105
if err != nil {
@@ -146,48 +150,79 @@ func (u *UpgradeRecoveryAction) Run() (err error) {
146150
return err
147151
}
148152

149-
// Create recovery /boot dir if not exists
150-
bootDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, "boot")
151-
if err := utils.MkdirAll(u.cfg.Fs, bootDir, constants.DirPerm); err != nil {
152-
u.cfg.Logger.Errorf("failed creating recovery boot dir: %v", err)
153-
return elementalError.NewFromError(err, elementalError.CreateDir)
153+
// Remove any traces of previously errored upgrades
154+
transitionDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.BootTransitionDir)
155+
u.Debugf("removing any orphaned recovery system %s", transitionDir)
156+
err = utils.RemoveAll(u.cfg.Fs, transitionDir)
157+
if err != nil {
158+
u.Errorf("failed removing orphaned recovery image: %s", err.Error())
159+
return err
154160
}
155161

156-
// Upgrade recovery
157-
err = elemental.DeployRecoverySystem(u.cfg.Config, &u.spec.RecoverySystem, bootDir)
162+
// Deploy recovery system to transition dir
163+
err = elemental.DeployRecoverySystem(u.cfg.Config, &u.spec.RecoverySystem)
158164
if err != nil {
159-
u.cfg.Logger.Errorf("failed deploying recovery image: %v", err)
165+
u.cfg.Logger.Errorf("failed deploying recovery image: %s", err.Error())
160166
return elementalError.NewFromError(err, elementalError.DeployImage)
161167
}
162-
recoveryFile := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.RecoveryImgFile)
163-
transitionFile := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.TransitionImgFile)
164-
if ok, _ := utils.Exists(u.cfg.Fs, recoveryFile); ok {
165-
err = u.cfg.Fs.Remove(recoveryFile)
168+
169+
// Switch places on /boot and transition-dir
170+
bootDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.BootDir)
171+
oldBootDir := filepath.Join(u.spec.Partitions.Recovery.MountPoint, constants.OldBootDir)
172+
173+
// If a previous upgrade failed, remove old boot-dir
174+
err = utils.RemoveAll(u.cfg.Fs, oldBootDir)
175+
if err != nil {
176+
u.Errorf("failed removing orphaned recovery image: %s", err.Error())
177+
return err
178+
}
179+
180+
// Rename current boot-dir in case we need to use it again
181+
if ok, _ := utils.Exists(u.cfg.Fs, bootDir); ok {
182+
err = u.cfg.Fs.Rename(bootDir, oldBootDir)
166183
if err != nil {
167-
u.Error("failed removing old recovery image")
184+
u.Errorf("failed removing old recovery image: %s", err.Error())
168185
return err
169186
}
170187
}
171-
err = u.cfg.Fs.Rename(transitionFile, recoveryFile)
188+
189+
// Move new boot-dir to /boot
190+
err = u.cfg.Fs.Rename(transitionDir, bootDir)
172191
if err != nil {
173-
u.Error("failed renaming transition recovery image")
192+
u.cfg.Logger.Errorf("failed renaming transition recovery image: %s", err.Error())
193+
194+
// Try to salvage old recovery system
195+
if ok, _ := utils.Exists(u.cfg.Fs, oldBootDir); ok {
196+
err = u.cfg.Fs.Rename(oldBootDir, bootDir)
197+
if err != nil {
198+
u.cfg.Logger.Errorf("failed salvaging old recovery system: %s", err.Error())
199+
}
200+
}
201+
174202
return err
175203
}
176204

205+
// Remove old boot-dir when new recovery system is in place
206+
err = utils.RemoveAll(u.cfg.Fs, oldBootDir)
207+
if err != nil {
208+
u.Warnf("failed removing old recovery image: %s", err.Error())
209+
}
210+
177211
// Update state.yaml file on recovery and state partitions
178212
if u.updateInstallState {
179213
err = u.upgradeInstallStateYaml()
180214
if err != nil {
181-
u.Error("failed upgrading installation metadata")
215+
u.Errorf("failed upgrading installation metadata: %s", err.Error())
182216
return err
183217
}
184218
}
185219

186-
u.Info("Recovery upgrade completed")
220+
u.Infof("Recovery upgrade completed")
187221

188222
// Do not reboot/poweroff on cleanup errors
189223
err = cleanup.Cleanup(err)
190224
if err != nil {
225+
u.Errorf("failed cleanup: %s", err.Error())
191226
return elementalError.NewFromError(err, elementalError.Cleanup)
192227
}
193228

pkg/action/upgrade-recovery_test.go

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,7 @@ var _ = Describe("Upgrade Recovery Actions", func() {
175175
Expect(err).To(HaveOccurred())
176176
})
177177
It("Successfully upgrades recovery from docker image", Label("docker"), func() {
178-
recoveryImgPath := filepath.Join(constants.LiveDir, constants.RecoveryImgFile)
178+
recoveryImgPath := filepath.Join(constants.LiveDir, constants.BootDir, constants.RecoveryImgFile)
179179
spec := PrepareTestRecoveryImage(config, constants.LiveDir, fs, runner)
180180

181181
// This should be the old image
@@ -212,7 +212,7 @@ var _ = Describe("Upgrade Recovery Actions", func() {
212212
Expect(spec.State.Date).ToNot(BeEmpty(), "post-upgrade state should contain a date")
213213
})
214214
It("Successfully skips updateInstallState", Label("docker"), func() {
215-
recoveryImgPath := filepath.Join(constants.LiveDir, constants.RecoveryImgFile)
215+
recoveryImgPath := filepath.Join(constants.LiveDir, constants.BootDir, constants.RecoveryImgFile)
216216
spec := PrepareTestRecoveryImage(config, constants.LiveDir, fs, runner)
217217

218218
// This should be the old image
@@ -253,7 +253,6 @@ var _ = Describe("Upgrade Recovery Actions", func() {
253253
})
254254

255255
func PrepareTestRecoveryImage(config *types.RunConfig, recoveryPath string, fs vfs.FS, runner *mocks.FakeRunner) *types.UpgradeSpec {
256-
GinkgoHelper()
257256
// Create installState with squashed recovery
258257
statePath := filepath.Join(constants.RunningStateDir, constants.InstallStateFile)
259258
installState := &types.InstallState{
@@ -270,28 +269,30 @@ func PrepareTestRecoveryImage(config *types.RunConfig, recoveryPath string, fs v
270269
}
271270
Expect(config.WriteInstallState(installState, statePath, statePath)).ShouldNot(HaveOccurred())
272271

273-
recoveryImgPath := filepath.Join(recoveryPath, constants.RecoveryImgFile)
274-
Expect(fs.WriteFile(recoveryImgPath, []byte("recovery"), constants.FilePerm)).ShouldNot(HaveOccurred())
275-
276-
transitionDir := filepath.Join(recoveryPath, "transition.imgTree")
277-
Expect(utils.MkdirAll(fs, filepath.Join(transitionDir, "lib/modules/6.6"), constants.DirPerm)).ShouldNot(HaveOccurred())
278-
bootDir := filepath.Join(transitionDir, "boot")
279-
Expect(utils.MkdirAll(fs, bootDir, constants.DirPerm)).ShouldNot(HaveOccurred())
280-
Expect(fs.WriteFile(filepath.Join(bootDir, "vmlinuz-6.6"), []byte("kernel"), constants.FilePerm)).ShouldNot(HaveOccurred())
281-
Expect(fs.WriteFile(filepath.Join(bootDir, "elemental.initrd-6.6"), []byte("initrd"), constants.FilePerm)).ShouldNot(HaveOccurred())
272+
for _, rootDir := range []string{"/some/dir", recoveryPath} {
273+
bootDir := filepath.Join(rootDir, "boot")
274+
Expect(utils.MkdirAll(fs, bootDir, constants.DirPerm)).ShouldNot(HaveOccurred())
275+
recoveryImgPath := filepath.Join(bootDir, constants.RecoveryImgFile)
276+
Expect(fs.WriteFile(recoveryImgPath, []byte("recovery"), constants.FilePerm)).ShouldNot(HaveOccurred())
277+
Expect(utils.MkdirAll(fs, filepath.Join(rootDir, "lib/modules/6.6"), constants.DirPerm)).ShouldNot(HaveOccurred())
278+
Expect(utils.MkdirAll(fs, bootDir, constants.DirPerm)).ShouldNot(HaveOccurred())
279+
Expect(fs.WriteFile(filepath.Join(bootDir, "vmlinuz-6.6"), []byte("kernel"), constants.FilePerm)).ShouldNot(HaveOccurred())
280+
Expect(fs.WriteFile(filepath.Join(bootDir, "elemental.initrd-6.6"), []byte("initrd"), constants.FilePerm)).ShouldNot(HaveOccurred())
281+
}
282282

283283
spec, err := conf.NewUpgradeSpec(config.Config)
284284
Expect(err).ShouldNot(HaveOccurred())
285285

286286
spec.System = types.NewDockerSrc("alpine")
287287
spec.RecoveryUpgrade = true
288-
spec.RecoverySystem.Source = spec.System
288+
spec.RecoverySystem.Source = types.NewDirSrc("/some/dir")
289289
spec.RecoverySystem.Size = 16
290290

291291
runner.SideEffect = func(command string, args ...string) ([]byte, error) {
292292
if command == "mksquashfs" && args[1] == spec.RecoverySystem.File {
293293
// create the transition img for squash to fake it
294-
_, _ = fs.Create(spec.RecoverySystem.File)
294+
_, err = fs.Create(spec.RecoverySystem.File)
295+
Expect(err).To(Succeed())
295296
}
296297
return []byte{}, nil
297298
}

pkg/action/upgrade_test.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -294,14 +294,12 @@ var _ = Describe("Runtime Actions", func() {
294294
Expect(runner.IncludesCmds([][]string{{"poweroff", "-f"}})).To(BeNil())
295295
})
296296
It("Successfully upgrades recovery from docker image", Label("docker"), func() {
297-
recoveryImgPath := filepath.Join(constants.LiveDir, constants.RecoveryImgFile)
297+
recoveryImgPath := filepath.Join(constants.LiveDir, constants.BootDir, constants.RecoveryImgFile)
298298
spec := PrepareTestRecoveryImage(config, constants.LiveDir, fs, runner)
299299

300300
// This should be the old image
301301
info, err := fs.Stat(recoveryImgPath)
302302
Expect(err).ToNot(HaveOccurred())
303-
// Image size should be empty
304-
Expect(info.Size()).To(BeNumerically(">", 0))
305303
Expect(info.IsDir()).To(BeFalse())
306304
f, _ := fs.ReadFile(recoveryImgPath)
307305
Expect(f).To(ContainSubstring("recovery"))
@@ -314,11 +312,9 @@ var _ = Describe("Runtime Actions", func() {
314312
// This should be the new image
315313
info, err = fs.Stat(recoveryImgPath)
316314
Expect(err).ToNot(HaveOccurred())
317-
// Image size should be empty
318-
Expect(info.Size()).To(BeNumerically("==", 0))
319315
Expect(info.IsDir()).To(BeFalse())
320316
f, _ = fs.ReadFile(recoveryImgPath)
321-
Expect(f).ToNot(ContainSubstring("recovery"))
317+
Expect(f).To(BeEmpty())
322318

323319
// Transition squash should not exist
324320
info, err = fs.Stat(spec.RecoverySystem.File)

0 commit comments

Comments
 (0)