Skip to content

Commit fa98cef

Browse files
committed
add complete support for tape reclaim
Signed-off-by: Utkarsh Srivastava <[email protected]>
1 parent 581330d commit fa98cef

File tree

7 files changed

+159
-8
lines changed

7 files changed

+159
-8
lines changed

config.js

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -928,6 +928,8 @@ config.NSFS_GLACIER_DMAPI_PMIG_DAYS = config.S3_RESTORE_REQUEST_MAX_DAYS;
928928
// accidental blocking reads from happening.
929929
config.NSFS_GLACIER_DMAPI_FINALIZE_RESTORE_ENABLE = false;
930930

931+
config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM = false;
932+
931933
config.NSFS_STATFS_CACHE_SIZE = 10000;
932934
config.NSFS_STATFS_CACHE_EXPIRY_MS = 1 * 1000;
933935

@@ -971,7 +973,7 @@ config.NSFS_GLACIER_MIGRATE_LOG_THRESHOLD = 50 * 1024;
971973
config.NSFS_GLACIER_METRICS_STATFS_PATHS = [];
972974
config.NSFS_GLACIER_METRICS_STATFS_INTERVAL = 60 * 1000; // Refresh statfs value every minute
973975

974-
/**
976+
/**
975977
* NSFS_GLACIER_RESERVED_BUCKET_TAGS defines an object of bucket tags which will be reserved
976978
* by the system and PUT operations for them via S3 API would be limited - as in they would be
977979
* mutable only if specified and only under certain conditions.
@@ -982,7 +984,7 @@ config.NSFS_GLACIER_METRICS_STATFS_INTERVAL = 60 * 1000; // Refresh statfs value
982984
* default: any,
983985
* event: boolean
984986
* }>}
985-
*
987+
*
986988
* @example
987989
* {
988990
'deep-archive-copies': {

src/cmd/manage_nsfs.js

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -879,6 +879,9 @@ async function manage_glacier_operations(action, argv) {
879879
case GLACIER_ACTIONS.EXPIRY:
880880
await manage_nsfs_glacier.process_expiry();
881881
break;
882+
case GLACIER_ACTIONS.RECLAIM:
883+
await manage_nsfs_glacier.process_reclaim();
884+
break;
882885
default:
883886
throw_cli_error(ManageCLIError.InvalidGlacierOperation);
884887
}

src/manage_nsfs/manage_nsfs_constants.js

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ const GLACIER_ACTIONS = Object.freeze({
2626
MIGRATE: 'migrate',
2727
RESTORE: 'restore',
2828
EXPIRY: 'expiry',
29+
RECLAIM: 'reclaim',
2930
});
3031

3132
const DIAGNOSE_ACTIONS = Object.freeze({
@@ -72,6 +73,7 @@ const VALID_OPTIONS_GLACIER = {
7273
'migrate': new Set([ CONFIG_ROOT_FLAG]),
7374
'restore': new Set([ CONFIG_ROOT_FLAG]),
7475
'expiry': new Set([ CONFIG_ROOT_FLAG]),
76+
'reclaim': new Set([ CONFIG_ROOT_FLAG]),
7577
};
7678

7779
const VALID_OPTIONS_DIAGNOSE = {

src/manage_nsfs/manage_nsfs_glacier.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,19 @@ async function process_expiry() {
5858
}
5959
}
6060

61+
async function process_reclaim() {
62+
const fs_context = native_fs_utils.get_process_fs_context();
63+
const backend = Glacier.getBackend();
64+
65+
if (
66+
await backend.low_free_space() ||
67+
!(await time_exceeded(fs_context, config.NSFS_GLACIER_RESTORE_INTERVAL, Glacier.RECLAIM_TIMESTAMP_FILE))
68+
) return;
69+
70+
await backend.perform(prepare_galcier_fs_context(fs_context), "RECLAIM");
71+
const timestamp_file_path = path.join(config.NSFS_GLACIER_LOGS_DIR, Glacier.RECLAIM_TIMESTAMP_FILE);
72+
await record_current_time(fs_context, timestamp_file_path);
73+
}
6174

6275
/**
6376
* time_exceeded returns true if the time between last run recorded in the given
@@ -129,3 +142,4 @@ function prepare_galcier_fs_context(fs_context) {
129142
exports.process_migrations = process_migrations;
130143
exports.process_restores = process_restores;
131144
exports.process_expiry = process_expiry;
145+
exports.process_reclaim = process_reclaim;

src/sdk/glacier.js

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ class Glacier {
2121
static MIGRATE_TIMESTAMP_FILE = 'migrate.timestamp';
2222
static RESTORE_TIMESTAMP_FILE = 'restore.timestamp';
2323
static EXPIRY_TIMESTAMP_FILE = 'expiry.timestamp';
24+
static RECLAIM_TIMESTAMP_FILE = 'reclaim.timestamp';
2425

2526
/**
2627
* XATTR_RESTORE_REQUEST is set to a NUMBER (expiry days) by `restore_object` when
@@ -71,10 +72,21 @@ class Glacier {
7172
*/
7273
static GPFS_DMAPI_XATTR_TAPE_TPS = 'dmapi.IBMTPS';
7374

75+
/**
76+
* GPFS_DMAPI_XATTR_TAPE_UID xattr contains UID which contains the unique ID of the UID
77+
*
78+
* Example: `1284427297506873931-5499940123615166566-1799306066-279655-0` (here 279655 is
79+
* the inode number)
80+
*
81+
* NOTE: If IBMUID EA exists, that means the file is either migrated or premigrated.
82+
*/
83+
static GPFS_DMAPI_XATTR_TAPE_UID = 'dmapi.IBMUID';
84+
7485
static MIGRATE_WAL_NAME = 'migrate';
7586
static MIGRATE_STAGE_WAL_NAME = 'stage.migrate';
7687
static RESTORE_WAL_NAME = 'restore';
7788
static RESTORE_STAGE_WAL_NAME = 'stage.restore';
89+
static RECLAIM_WAL_NAME = 'reclaim';
7890

7991
/** @type {nb.RestoreState} */
8092
static RESTORE_STATUS_CAN_RESTORE = 'CAN_RESTORE';
@@ -86,6 +98,7 @@ class Glacier {
8698
static GLACIER_CLUSTER_LOCK = 'glacier.cluster.lock';
8799
static GLACIER_MIGRATE_CLUSTER_LOCK = 'glacier.cluster.migrate.lock';
88100
static GLACIER_RESTORE_CLUSTER_LOCK = 'glacier.cluster.restore.lock';
101+
static GLACIER_RECLAIM_CLUSTER_LOCK = 'glacier.cluster.reclaim.lock';
89102
static GLACIER_SCAN_LOCK = 'glacier.scan.lock';
90103

91104
/**
@@ -181,6 +194,20 @@ class Glacier {
181194
throw new Error('Unimplementented');
182195
}
183196

197+
/**
198+
* reclaim cleans up inindexed items in the underlying
199+
* glacier storage
200+
*
201+
* NOTE: This needs to be implemented by each backend.
202+
* @param {nb.NativeFSContext} fs_context
203+
* @param {LogFile} log_file log filename
204+
* @param {(entry: string) => Promise<void>} failure_recorder
205+
* @returns {Promise<boolean>}
206+
*/
207+
async reclaim(fs_context, log_file, failure_recorder) {
208+
throw new Error('Unimplementented');
209+
}
210+
184211
/**
185212
* low_free_space must return true if the backend has
186213
* low free space.
@@ -199,7 +226,7 @@ class Glacier {
199226

200227
/**
201228
* @param {nb.NativeFSContext} fs_context
202-
* @param {"MIGRATION" | "RESTORE" | "EXPIRY"} type
229+
* @param {"MIGRATION" | "RESTORE" | "EXPIRY" | "RECLAIM"} type
203230
*/
204231
async perform(fs_context, type) {
205232
const lock_path = lock_file => path.join(config.NSFS_GLACIER_LOGS_DIR, lock_file);
@@ -217,8 +244,8 @@ class Glacier {
217244
* ) => Promise<boolean>} log_cb */
218245

219246
/**
220-
* @param {string} namespace
221-
* @param {log_cb} cb
247+
* @param {string} namespace
248+
* @param {log_cb} cb
222249
*/
223250
const process_glacier_logs = async (namespace, cb) => {
224251
const logs = new PersistentLogger(
@@ -266,6 +293,10 @@ class Glacier {
266293
this.restore.bind(this),
267294
Glacier.GLACIER_RESTORE_CLUSTER_LOCK,
268295
);
296+
} else if (type === 'RECLAIM') {
297+
await native_fs_utils.lock_and_run(fs_context, lock_path(Glacier.GLACIER_RECLAIM_CLUSTER_LOCK), async () => {
298+
await process_glacier_logs(Glacier.RECLAIM_WAL_NAME, this.reclaim.bind(this));
299+
});
269300
}
270301
}
271302

src/sdk/glacier_tapecloud.js

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ function get_bin_path(bin_name) {
2424
class TapeCloudUtils {
2525
static MIGRATE_SCRIPT = 'migrate';
2626
static RECALL_SCRIPT = 'recall';
27+
static RECLAIM_SCRIPT = 'reclaim';
2728
static TASK_SHOW_SCRIPT = 'task_show';
2829
static PROCESS_EXPIRED_SCRIPT = 'process_expired';
2930
static LOW_FREE_SPACE_SCRIPT = 'low_free_space';
@@ -182,6 +183,29 @@ class TapeCloudUtils {
182183
}
183184
}
184185

186+
/**
187+
* reclaim takes name of a file which contains the list
188+
* of the files to be reclaimed.
189+
*
190+
* reclaim doesn't perform any failure handling and expects the
191+
* underlying scripts to take care of retries.
192+
*
193+
* @param {string} file filename
194+
* @returns {Promise<boolean>} Indicates success if true
195+
*/
196+
static async reclaim(file) {
197+
try {
198+
dbg.log1("Starting reclaim for file", file);
199+
const out = await exec(`${get_bin_path(TapeCloudUtils.RECLAIM_SCRIPT)} ${file}`, { return_stdout: true });
200+
dbg.log0("reclaim finished with:", out);
201+
dbg.log0("Finished reclaim for file", file);
202+
} catch (error) {
203+
dbg.error("Failed to run TapeCloudUtils.reclaim for file:", file, "due to error:", error);
204+
}
205+
206+
return true;
207+
}
208+
185209
static async process_expired() {
186210
dbg.log1("Starting process_expired");
187211
const out = await exec(`${get_bin_path(TapeCloudUtils.PROCESS_EXPIRED_SCRIPT)}`, { return_stdout: true });
@@ -416,6 +440,21 @@ class TapeCloudGlacier extends Glacier {
416440
}
417441
}
418442

443+
/**
444+
*
445+
* @param {nb.NativeFSContext} fs_context
446+
* @param {LogFile} log_file log filename
447+
* @param {(entry: string) => Promise<void>} failure_recorder
448+
* @returns {Promise<boolean>}
449+
*/
450+
async reclaim(fs_context, log_file, failure_recorder) {
451+
try {
452+
return this._process_reclaimed(log_file.log_path);
453+
} catch (error) {
454+
dbg.error('unexpected error occured while running tapecloud.reclaim:', error);
455+
}
456+
}
457+
419458
async low_free_space() {
420459
const result = await exec(get_bin_path(TapeCloudUtils.LOW_FREE_SPACE_SCRIPT), { return_stdout: true });
421460
return result.toLowerCase().trim() === 'true';
@@ -457,6 +496,17 @@ class TapeCloudGlacier extends Glacier {
457496
return TapeCloudUtils.process_expired();
458497
}
459498

499+
/**
500+
* _process_reclaimed should perform reclaimed
501+
*
502+
* NOTE: Must be overwritten for tests
503+
* @param {string} file
504+
* @returns {Promise<boolean>}
505+
*/
506+
async _process_reclaimed(file) {
507+
return TapeCloudUtils.reclaim(file);
508+
}
509+
460510
/**
461511
* finalizes the restore by setting the required EAs
462512
*

src/sdk/namespace_fs.js

Lines changed: 52 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1405,6 +1405,8 @@ class NamespaceFS {
14051405
const is_disabled_dir_content = this._is_directory_content(file_path, params.key) && this._is_versioning_disabled();
14061406

14071407
const stat = await target_file.stat(fs_context);
1408+
const file_path_stat = config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM &&
1409+
await nb_native().fs.stat(fs_context, file_path).catch(_.noop);
14081410
this._verify_encryption(params.encryption, this._get_encryption_info(stat));
14091411

14101412
const copy_xattr = params.copy_source && params.xattr_copy;
@@ -1453,6 +1455,11 @@ class NamespaceFS {
14531455
dbg.log1('NamespaceFS._finish_upload:', open_mode, file_path, upload_path, fs_xattr);
14541456

14551457
if (!same_inode && !part_upload) {
1458+
// If the target file is already on tape then this is a candidate for tape reclaim
1459+
if (file_path_stat && file_path_stat.xattr[Glacier.GPFS_DMAPI_XATTR_TAPE_INDICATOR]) {
1460+
await this.append_to_reclaim_wal(fs_context, upload_path, file_path_stat);
1461+
}
1462+
14561463
await this._move_to_dest(fs_context, upload_path, file_path, target_file, open_mode, params.key);
14571464
}
14581465

@@ -2118,13 +2125,19 @@ class NamespaceFS {
21182125
try {
21192126
files = await this._open_files(fs_context, { src_path: file_path, delete_version: true });
21202127
await this._verify_lifecycle_filter_and_unlink(fs_context, params, file_path, files.delete_version);
2128+
await this.append_to_reclaim_wal(fs_context, file_path);
21212129
} catch (err) {
21222130
if (err.code !== 'ENOENT') throw err;
21232131
} finally {
21242132
if (files) await this._close_files(fs_context, files.delete_version, undefined, true);
21252133
}
21262134
} else {
2127-
await native_fs_utils.unlink_ignore_enoent(fs_context, file_path);
2135+
try {
2136+
await nb_native().fs.unlink(fs_context, file_path);
2137+
await this.append_to_reclaim_wal(fs_context, file_path);
2138+
} catch (err) {
2139+
if (err.code !== 'ENOENT' && err.code !== 'EISDIR') throw err;
2140+
}
21282141
}
21292142

21302143
await this._delete_path_dirs(file_path, fs_context);
@@ -3709,6 +3722,30 @@ class NamespaceFS {
37093722
await NamespaceFS.restore_wal.append(entry);
37103723
}
37113724

3725+
/**
3726+
*
3727+
* @param {nb.NativeFSContext} fs_context
3728+
* @param {string} file_path
3729+
* @param {nb.NativeFSStats} [stat]
3730+
* @returns
3731+
*/
3732+
async append_to_reclaim_wal(fs_context, file_path, stat) {
3733+
console.log('calling append_to_reclaim_wal:', file_path);
3734+
if (!config.NSFS_GLACIER_LOGS_ENABLED || !config.NSFS_GLACIER_DMAPI_ENABLE_TAPE_RECLAIM) return;
3735+
3736+
if (!stat) {
3737+
stat = await nb_native().fs.stat(fs_context, file_path);
3738+
}
3739+
3740+
const data = JSON.stringify({
3741+
full_path: file_path,
3742+
logical_size: stat.size,
3743+
ea: stat.xattr,
3744+
});
3745+
console.log('calling append_to_reclaim_wal data:', { file_path, data });
3746+
await NamespaceFS.reclaim_wal.append(data);
3747+
}
3748+
37123749
static get migrate_wal() {
37133750
if (!NamespaceFS._migrate_wal) {
37143751
NamespaceFS._migrate_wal = new PersistentLogger(config.NSFS_GLACIER_LOGS_DIR, Glacier.MIGRATE_WAL_NAME, {
@@ -3731,6 +3768,17 @@ class NamespaceFS {
37313768
return NamespaceFS._restore_wal;
37323769
}
37333770

3771+
static get reclaim_wal() {
3772+
if (!NamespaceFS._reclaim_wal) {
3773+
NamespaceFS._reclaim_wal = new PersistentLogger(config.NSFS_GLACIER_LOGS_DIR, Glacier.RECLAIM_WAL_NAME, {
3774+
poll_interval: config.NSFS_GLACIER_LOGS_POLL_INTERVAL,
3775+
locking: 'SHARED',
3776+
});
3777+
}
3778+
3779+
return NamespaceFS._reclaim_wal;
3780+
}
3781+
37343782
////////////////////////////
37353783
// LIFECYLE HELPERS //
37363784
////////////////////////////
@@ -3803,7 +3851,8 @@ NamespaceFS._migrate_wal = null;
38033851
/** @type {PersistentLogger} */
38043852
NamespaceFS._restore_wal = null;
38053853

3854+
/** @type {PersistentLogger} */
3855+
NamespaceFS._reclaim_wal = null;
3856+
38063857
module.exports = NamespaceFS;
38073858
module.exports.multi_buffer_pool = multi_buffer_pool;
3808-
3809-

0 commit comments

Comments
 (0)