diff --git a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java index 1477eb5e0a6..85158992ebf 100644 --- a/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java +++ b/hadoop-ozone/integration-test/src/test/java/org/apache/hadoop/ozone/repair/om/TestFSORepairTool.java @@ -33,6 +33,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.fs.CommonConfigurationKeysPublic; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; @@ -367,6 +368,17 @@ private int dryRun(String... args) { return execute(true, args); } + private int executeWithDb(boolean dryRun, String omDbPath, String... args) { + List argList = new ArrayList<>(Arrays.asList("om", "fso-tree", "--db", omDbPath)); + if (dryRun) { + argList.add("--dry-run"); + } + argList.addAll(Arrays.asList(args)); + + return withTextFromSystemIn("y") + .execute(() -> cmd.execute(argList.toArray(new String[0]))); + } + private int execute(boolean dryRun, String... args) { List argList = new ArrayList<>(Arrays.asList("om", "fso-tree", "--db", dbPath)); if (dryRun) { @@ -378,6 +390,39 @@ private int execute(boolean dryRun, String... args) { .execute(() -> cmd.execute(argList.toArray(new String[0]))); } + @Order(ORDER_DRY_RUN) + @Test + public void testAlternateOmDbDirName() throws Exception { + File original = new File(OMStorage.getOmDbDir(cluster.getConf()), OM_DB_NAME); + // Place backup under a different parent directory to ensure we don't + // accidentally open the original om.db due to path handling bugs. + File backupParent = new File(OMStorage.getOmDbDir(cluster.getConf()), "copy"); + File backup = new File(backupParent, "om-db-backup"); + + if (backup.exists()) { + FileUtils.deleteDirectory(backup); + } + if (backupParent.exists()) { + FileUtils.deleteDirectory(backupParent); + } + boolean created = backupParent.mkdirs(); + if (!created && !backupParent.exists()) { + throw new IOException("Failed to create backup parent directory: " + backupParent); + } + FileUtils.copyDirectory(original, backup); + + out.reset(); + String expectedOutput = serializeReport(fullReport); + int exitCode = executeWithDb(true, backup.getPath()); + assertEquals(0, exitCode, err.getOutput()); + + String cliOutput = out.getOutput(); + String reportOutput = extractRelevantSection(cliOutput); + assertEquals(expectedOutput, reportOutput); + + FileUtils.deleteDirectory(backupParent); + } + private int countTableEntries(Table table) throws Exception { int count = 0; try (Table.KeyValueIterator iterator = table.iterator()) { @@ -405,12 +450,12 @@ private String serializeReport(FSORepairTool.Report report) { report.getReachable().getDirs(), report.getReachable().getFiles(), report.getReachable().getBytes(), - report.getUnreachable().getDirs(), - report.getUnreachable().getFiles(), - report.getUnreachable().getBytes(), - report.getUnreferenced().getDirs(), - report.getUnreferenced().getFiles(), - report.getUnreferenced().getBytes() + report.getPendingToDelete().getDirs(), + report.getPendingToDelete().getFiles(), + report.getPendingToDelete().getBytes(), + report.getOrphaned().getDirs(), + report.getOrphaned().getFiles(), + report.getOrphaned().getBytes() ); } @@ -462,14 +507,14 @@ private static FSORepairTool.Report buildEmptyTree() throws IOException { fs.mkdirs(new Path("/vol-empty/bucket-empty")); FSORepairTool.ReportStatistics reachableCount = new FSORepairTool.ReportStatistics(0, 0, 0); - FSORepairTool.ReportStatistics unreachableCount = + FSORepairTool.ReportStatistics pendingToDeleteCount = new FSORepairTool.ReportStatistics(0, 0, 0); - FSORepairTool.ReportStatistics unreferencedCount = + FSORepairTool.ReportStatistics orphanedCount = new FSORepairTool.ReportStatistics(0, 0, 0); return new FSORepairTool.Report.Builder() .setReachable(reachableCount) - .setUnreachable(unreachableCount) - .setUnreferenced(unreferencedCount) + .setPendingToDelete(pendingToDeleteCount) + .setOrphaned(orphanedCount) .build(); } @@ -507,14 +552,14 @@ private static FSORepairTool.Report buildTreeWithUnreachableObjects(String volum FSORepairTool.ReportStatistics reachableCount = new FSORepairTool.ReportStatistics(1, 1, fileSize); - FSORepairTool.ReportStatistics unreachableCount = + FSORepairTool.ReportStatistics pendingToDeleteCount = new FSORepairTool.ReportStatistics(1, 2, fileSize * 2L); - FSORepairTool.ReportStatistics unreferencedCount = + FSORepairTool.ReportStatistics orphanedCount = new FSORepairTool.ReportStatistics(0, 0, 0); return new FSORepairTool.Report.Builder() .setReachable(reachableCount) - .setUnreachable(unreachableCount) - .setUnreferenced(unreferencedCount) + .setPendingToDelete(pendingToDeleteCount) + .setOrphaned(orphanedCount) .build(); } @@ -581,15 +626,15 @@ private static FSORepairTool.Report buildDisconnectedTree(String volume, String assertDisconnectedTreePartiallyReadable(volume, bucket); - // dir1 does not count towards the unreferenced directories the tool + // dir1 does not count towards the orphaned directories the tool // will see. It was deleted completely so the tool will never see it. FSORepairTool.ReportStatistics reachableCount = new FSORepairTool.ReportStatistics(1, 1, fileSize); - FSORepairTool.ReportStatistics unreferencedCount = + FSORepairTool.ReportStatistics orphanedCount = new FSORepairTool.ReportStatistics(1, 3, fileSize * 3L); return new FSORepairTool.Report.Builder() .setReachable(reachableCount) - .setUnreferenced(unreferencedCount) + .setOrphaned(orphanedCount) .build(); } diff --git a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java index c7b071a6e8d..a8640139b74 100644 --- a/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java +++ b/hadoop-ozone/ozone-manager/src/main/java/org/apache/hadoop/ozone/om/OmMetadataManagerImpl.java @@ -416,7 +416,7 @@ public static DBStore loadDB(OzoneConfiguration configuration, File metaDir, int .build(); } - private static DBStoreBuilder newDBStoreBuilder(OzoneConfiguration conf, String name, File dir) { + public static DBStoreBuilder newDBStoreBuilder(OzoneConfiguration conf, String name, File dir) { return DBStoreBuilder.newBuilder(conf, OMDBDefinition.get(), name, dir.toPath()); } diff --git a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java index cf7eb997f32..8cb1b58e65b 100644 --- a/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java +++ b/hadoop-ozone/tools/src/main/java/org/apache/hadoop/ozone/repair/om/FSORepairTool.java @@ -57,12 +57,12 @@ /** * Base Tool to identify and repair disconnected FSO trees across all buckets. - * This tool logs information about reachable, unreachable and unreferenced files and directories in debug mode - * and moves these unreferenced files and directories to the deleted tables in repair mode. + * This tool logs information about reachable, pendingToDelete and orphaned files and directories in debug mode + * and moves these orphaned files and directories to the deleted tables in repair mode. * If deletes are still in progress (the deleted directory table is not empty), the tool - * reports that the tree is unreachable, even though pending deletes would fix the issue. - * If not, the tool reports them as unreferenced and deletes them in repair mode. + * reports that the tree is pendingToDelete, even though pending deletes would fix the issue. + * If not, the tool reports them as orphaned and deletes them in repair mode. * Before using the tool, make sure all OMs are stopped, and that all Ratis logs have been flushed to the OM DB. * This can be done using `ozone admin prepare` before running the tool, and `ozone admin @@ -70,24 +70,24 @@ * The tool will run a DFS from each bucket, and save all reachable directories as objectID-based keys in a * temporary RocksDB instance called "temp.db" in the same directory as om.db. It will also scan the - * deletedDirectoryTable to identify objects pending deletion and store them as original keys in an unreachable + * deletedDirectoryTable to identify objects pending deletion and store them as original keys in a pendingToDelete * table within the same temp.db instance. * It will then scan the entire file and directory tables for each bucket to classify each object: * - REACHABLE: Object's parent is in the reachable table (accessible from bucket root) - * - UNREACHABLE: Object is in the unreachable table (pending deletion) - * - UNREFERENCED: Object is neither reachable nor unreachable (orphaned, needs repair) + * - PENDING_TO_DELETE: Object is in the pendingToDelete table (pending deletion) + * - ORPHANED: Object is neither reachable nor pendingToDelete (orphaned, needs repair) * The tool is idempotent. temp.db will be automatically deleted when the tool finishes to ensure clean state. */ @CommandLine.Command( name = "fso-tree", - description = "Identify and repair a disconnected FSO tree by marking unreferenced (orphaned) entries for " + + description = "Identify and repair a disconnected FSO tree by marking orphaned entries for " + "deletion. OM should be stopped while this tool is run." ) public class FSORepairTool extends RepairTool { private static final Logger LOG = LoggerFactory.getLogger(FSORepairTool.class); private static final String REACHABLE_TABLE = "reachable"; - private static final String UNREACHABLE_TABLE = "unreachable"; + private static final String PENDING_TO_DELETE_TABLE = "pendingToDelete"; private static final byte[] EMPTY_BYTE_ARRAY = {}; @CommandLine.Option(names = {"--db"}, @@ -136,15 +136,15 @@ private class Impl { private final Table snapshotInfoTable; private DBStore tempDB; private TypedTable reachableTable; - private TypedTable unreachableTable; + private TypedTable pendingToDeleteTable; private final ReportStatistics reachableStats; - private final ReportStatistics unreachableStats; - private final ReportStatistics unreferencedStats; + private final ReportStatistics pendingToDeleteStats; + private final ReportStatistics orphanedStats; Impl() throws IOException { this.reachableStats = new ReportStatistics(0, 0, 0); - this.unreachableStats = new ReportStatistics(0, 0, 0); - this.unreferencedStats = new ReportStatistics(0, 0, 0); + this.pendingToDeleteStats = new ReportStatistics(0, 0, 0); + this.orphanedStats = new ReportStatistics(0, 0, 0); this.store = getStoreFromPath(omDBPath); this.volumeTable = OMDBDefinition.VOLUME_TABLE_DEF.getTable(store); @@ -269,15 +269,15 @@ private void processBucket(OmVolumeArgs volume, OmBucketInfo bucketInfo) throws } info("Processing bucket: " + volume.getVolume() + "/" + bucketInfo.getBucketName()); markReachableObjectsInBucket(volume, bucketInfo); - markUnreachableObjectsInBucket(volume, bucketInfo); - handleUnreachableAndUnreferencedObjects(volume, bucketInfo); + markPendingToDeleteObjectsInBucket(volume, bucketInfo); + handlePendingToDeleteAndOrphanedObjects(volume, bucketInfo); } private Report buildReportAndLog() { Report report = new Report.Builder() .setReachable(reachableStats) - .setUnreachable(unreachableStats) - .setUnreferenced(unreferencedStats) + .setPendingToDelete(pendingToDeleteStats) + .setOrphaned(orphanedStats) .build(); info("\n" + report); @@ -315,7 +315,7 @@ private void markReachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo buck } } - private void markUnreachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { + private void markPendingToDeleteObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { // Only put directories in the stack. // Directory keys should have the form /volumeID/bucketID/parentID/name. Stack dirKeyStack = new Stack<>(); @@ -343,8 +343,8 @@ private void markUnreachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bu String childPrefix = OM_KEY_PREFIX + volume.getObjectID() + OM_KEY_PREFIX + bucket.getObjectID() + OM_KEY_PREFIX + deletedObjectID + OM_KEY_PREFIX; - // Find all children of this deleted directory and mark as unreachable - Collection childDirs = getChildDirectoriesAndMarkAsUnreachable(childPrefix); + // Find all children of this deleted directory and mark as pendingToDelete + Collection childDirs = getChildDirectoriesAndMarkAsPendingToDelete(childPrefix); dirKeyStack.addAll(childDirs); } } @@ -360,16 +360,16 @@ private void markUnreachableObjectsInBucket(OmVolumeArgs volume, OmBucketInfo bu continue; } - // For unreachable directories, we need to build the prefix based on their objectID + // For pendingToDelete directories, we need to build the prefix based on their objectID String childPrefix = OM_KEY_PREFIX + volume.getObjectID() + OM_KEY_PREFIX + bucket.getObjectID() + OM_KEY_PREFIX + currentDir.getObjectID() + OM_KEY_PREFIX; - Collection childDirs = getChildDirectoriesAndMarkAsUnreachable(childPrefix); + Collection childDirs = getChildDirectoriesAndMarkAsPendingToDelete(childPrefix); dirKeyStack.addAll(childDirs); } } - private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { - // Check for unreachable and unreferenced directories in the bucket. + private void handlePendingToDeleteAndOrphanedObjects(OmVolumeArgs volume, OmBucketInfo bucket) throws IOException { + // Check for pendingToDelete and orphaned directories in the bucket. String bucketPrefix = OM_KEY_PREFIX + volume.getObjectID() + OM_KEY_PREFIX + @@ -388,10 +388,10 @@ private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBuck } if (!isReachable(dirKey)) { - if (!isUnreachable(dirKey)) { - unreferencedStats.addDir(); + if (!isPendingToDelete(dirKey)) { + orphanedStats.addDir(); - info("Deleting unreferenced directory " + dirKey); + info("Deleting orphaned directory " + dirKey); if (!isDryRun()) { OmDirectoryInfo dirInfo = dirEntry.getValue(); markDirectoryForDeletion(volume.getVolume(), bucket.getBucketName(), dirKey, dirInfo); @@ -401,7 +401,7 @@ private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBuck } } - // Check for unreachable and unreferenced files + // Check for pendingToDelete and orphaned files try (TableIterator> fileIterator = fileTable.iterator()) { fileIterator.seek(bucketPrefix); @@ -415,10 +415,10 @@ private void handleUnreachableAndUnreferencedObjects(OmVolumeArgs volume, OmBuck OmKeyInfo fileInfo = fileEntry.getValue(); if (!isReachable(fileKey)) { - if (!isUnreachable(fileKey)) { - unreferencedStats.addFile(fileInfo.getDataSize()); + if (!isPendingToDelete(fileKey)) { + orphanedStats.addFile(fileInfo.getDataSize()); - info("Deleting unreferenced file " + fileKey); + info("Deleting orphaned file " + fileKey); if (!isDryRun()) { markFileForDeletion(bucket, fileKey, fileInfo); } @@ -497,10 +497,10 @@ private Collection getChildDirectoriesAndMarkAsReachable(OmVolumeArgs vo return childDirs; } - private Collection getChildDirectoriesAndMarkAsUnreachable(String dirPrefix) throws IOException { + private Collection getChildDirectoriesAndMarkAsPendingToDelete(String dirPrefix) throws IOException { Collection childDirs = new ArrayList<>(); - // Find child directories and mark them as unreachable + // Find child directories and mark them as pendingToDelete try (TableIterator> dirIterator = directoryTable.iterator()) { // Start searching the directory table at the current directory's @@ -517,14 +517,14 @@ private Collection getChildDirectoriesAndMarkAsUnreachable(String dirPre // Ensure this is an immediate child, not a deeper descendant String relativePath = childDirKey.substring(dirPrefix.length()); if (!relativePath.contains(OM_KEY_PREFIX)) { - addUnreachableEntry(childDirKey); + addPendingToDeleteEntry(childDirKey); childDirs.add(childDirKey); - unreachableStats.addDir(); + pendingToDeleteStats.addDir(); } } } - // Find child files and mark them as unreachable + // Find child files and mark them as pendingToDelete try (TableIterator> fileIterator = fileTable.iterator()) { fileIterator.seek(dirPrefix); @@ -539,8 +539,8 @@ private Collection getChildDirectoriesAndMarkAsUnreachable(String dirPre // Ensure this is an immediate child, not a deeper descendant String relativePath = childFileKey.substring(dirPrefix.length()); if (!relativePath.contains(OM_KEY_PREFIX)) { - addUnreachableEntry(childFileKey); - unreachableStats.addFile(childFileEntry.getValue().getDataSize()); + addPendingToDeleteEntry(childFileKey); + pendingToDeleteStats.addFile(childFileEntry.getValue().getDataSize()); } } } @@ -559,12 +559,12 @@ private void addReachableEntry(OmVolumeArgs volume, OmBucketInfo bucket, WithObj } /** - * Add the specified object to the unreachable table, indicating it is part + * Add the specified object to the pendingToDelete table, indicating it is part * of the disconnected FSO tree. */ - private void addUnreachableEntry(String originalKey) throws IOException { + private void addPendingToDeleteEntry(String originalKey) throws IOException { // No value is needed for this table. - unreachableTable.put(originalKey, EMPTY_BYTE_ARRAY); + pendingToDeleteTable.put(originalKey, EMPTY_BYTE_ARRAY); } /** @@ -579,15 +579,15 @@ protected boolean isReachable(String fileOrDirKey) throws IOException { /** * @param fileOrDirKey The key of a file or directory in RocksDB. - * @return true if the entry itself is in the unreachable table. + * @return true if the entry itself is in the pendingToDelete table. */ - protected boolean isUnreachable(String fileOrDirKey) throws IOException { - return unreachableTable.get(fileOrDirKey) != null; + protected boolean isPendingToDelete(String fileOrDirKey) throws IOException { + return pendingToDeleteTable.get(fileOrDirKey) != null; } private void openTempDB() throws IOException { File tempDBFile = new File(new File(omDBPath).getParentFile(), "temp.db"); - info("Creating database with reachable and unreachable tables at " + tempDBFile); + info("Creating database with reachable and pendingToDelete tables at " + tempDBFile); // Delete the DB from the last run if it exists. if (tempDBFile.exists()) { FileUtils.deleteDirectory(tempDBFile); @@ -598,10 +598,10 @@ private void openTempDB() throws IOException { .setName("temp.db") .setPath(tempDBFile.getParentFile().toPath()) .addTable(REACHABLE_TABLE) - .addTable(UNREACHABLE_TABLE) + .addTable(PENDING_TO_DELETE_TABLE) .build(); reachableTable = tempDB.getTable(REACHABLE_TABLE, StringCodec.get(), ByteArrayCodec.get()); - unreachableTable = tempDB.getTable(UNREACHABLE_TABLE, StringCodec.get(), ByteArrayCodec.get()); + pendingToDeleteTable = tempDB.getTable(PENDING_TO_DELETE_TABLE, StringCodec.get(), ByteArrayCodec.get()); } private void closeTempDB() throws IOException { @@ -622,7 +622,15 @@ protected static DBStore getStoreFromPath(String dbPath) throws IOException { "not exist or is not a RocksDB directory.", dbPath)); } // Load RocksDB and tables needed. - return OmMetadataManagerImpl.loadDB(new OzoneConfiguration(), new File(dbPath).getParentFile(), -1); + // Use the actual directory name from the provided path (e.g., "om.db", "om-db-backup") + String dbName = omDBFile.getName(); + return OmMetadataManagerImpl.newDBStoreBuilder(new OzoneConfiguration(), dbName, omDBFile.getParentFile()) + .setOpenReadOnly(false) + .setEnableCompactionDag(true) + .setCreateCheckpointDirs(true) + .setEnableRocksDbMetrics(true) + .setMaxNumberOfOpenFiles(-1) + .build(); } /** @@ -664,46 +672,46 @@ private static String buildReachableParentKey(String fileOrDirKey) { */ public static class Report { private final ReportStatistics reachable; - private final ReportStatistics unreachable; - private final ReportStatistics unreferenced; + private final ReportStatistics pendingToDelete; + private final ReportStatistics orphaned; /** * Builds one report that is the aggregate of multiple others. */ public Report(Report... reports) { reachable = new ReportStatistics(); - unreachable = new ReportStatistics(); - unreferenced = new ReportStatistics(); + pendingToDelete = new ReportStatistics(); + orphaned = new ReportStatistics(); for (Report report : reports) { reachable.add(report.reachable); - unreachable.add(report.unreachable); - unreferenced.add(report.unreferenced); + pendingToDelete.add(report.pendingToDelete); + orphaned.add(report.orphaned); } } private Report(Report.Builder builder) { this.reachable = builder.reachable; - this.unreachable = builder.unreachable; - this.unreferenced = builder.unreferenced; + this.pendingToDelete = builder.pendingToDelete; + this.orphaned = builder.orphaned; } public ReportStatistics getReachable() { return reachable; } - public ReportStatistics getUnreachable() { - return unreachable; + public ReportStatistics getPendingToDelete() { + return pendingToDelete; } - public ReportStatistics getUnreferenced() { - return unreferenced; + public ReportStatistics getOrphaned() { + return orphaned; } @Override public String toString() { - return "Reachable:" + reachable + "\nUnreachable (Pending to delete):" + unreachable + - "\nUnreferenced (Orphaned):" + unreferenced; + return "Reachable:" + reachable + "\nUnreachable (Pending to delete):" + pendingToDelete + + "\nUnreferenced (Orphaned):" + orphaned; } @Override @@ -719,13 +727,13 @@ public boolean equals(Object other) { // Useful for testing. System.out.println("Comparing reports\nExpect:\n" + this + "\nActual:\n" + report); - return reachable.equals(report.reachable) && unreachable.equals(report.unreachable) && - unreferenced.equals(report.unreferenced); + return reachable.equals(report.reachable) && pendingToDelete.equals(report.pendingToDelete) && + orphaned.equals(report.orphaned); } @Override public int hashCode() { - return Objects.hash(reachable, unreachable, unreferenced); + return Objects.hash(reachable, pendingToDelete, orphaned); } /** @@ -733,8 +741,8 @@ public int hashCode() { */ public static final class Builder { private ReportStatistics reachable = new ReportStatistics(); - private ReportStatistics unreachable = new ReportStatistics(); - private ReportStatistics unreferenced = new ReportStatistics(); + private ReportStatistics pendingToDelete = new ReportStatistics(); + private ReportStatistics orphaned = new ReportStatistics(); public Builder() { } @@ -744,13 +752,13 @@ public Builder setReachable(ReportStatistics reachable) { return this; } - public Builder setUnreachable(ReportStatistics unreachable) { - this.unreachable = unreachable; + public Builder setPendingToDelete(ReportStatistics pendingToDelete) { + this.pendingToDelete = pendingToDelete; return this; } - public Builder setUnreferenced(ReportStatistics unreferenced) { - this.unreferenced = unreferenced; + public Builder setOrphaned(ReportStatistics orphaned) { + this.orphaned = orphaned; return this; } @@ -761,7 +769,7 @@ public Report build() { } /** - * Represents the statistics of reachable and unreachable data. + * Represents the statistics of reachable and pendingToDelete data. * This gives the count of dirs, files and bytes. */