Skip to content

Commit c557d95

Browse files
SgtPookiCopilot
andauthored
fix: pieces can be removed from a data-set (#253)
* fix: pieces can be removed from a data-set * fix: handle pieces pending removal * fix: display piece onchain vs pdpserver status * fix: rm cmd requires --piece and --data-set * fix: fix default to orphaned piece status * deps: upgrade to latest synapse * fix: use new createStorageContext signature in rm * test: fix mock merge issue * chore: remove unnecessary as string casting * fix: logger is no longer required for createStorageContext * test: remove-piece is tested * test: assert runRmPiece happy path * chore: remove outdated comment * Update src/core/data-set/get-detailed-data-set.ts * Update src/rm/remove-piece.ts * Update src/rm/remove-piece.ts * Update src/rm/remove-piece.ts * chore: update jsdoc comments in runRmPiece * fix: removePiece requires storageContext * fix: document more about piece orphans * fix: export Warning type --------- Co-authored-by: Copilot <[email protected]>
1 parent 84332cd commit c557d95

File tree

20 files changed

+987
-40
lines changed

20 files changed

+987
-40
lines changed

package.json

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,10 @@
3838
"types": "./dist/core/payments/index.d.ts",
3939
"default": "./dist/core/payments/index.js"
4040
},
41+
"./core/piece": {
42+
"types": "./dist/core/piece/index.d.ts",
43+
"default": "./dist/core/piece/index.js"
44+
},
4145
"./core/synapse": {
4246
"types": "./dist/core/synapse/index.d.ts",
4347
"default": "./dist/core/synapse/index.js"
@@ -109,8 +113,8 @@
109113
"homepage": "https://github.com/filecoin-project/filecoin-pin#readme",
110114
"dependencies": {
111115
"@clack/prompts": "^0.11.0",
112-
"@filoz/synapse-core": "^0.1.3",
113-
"@filoz/synapse-sdk": "^0.36.0",
116+
"@filoz/synapse-core": "^0.1.4",
117+
"@filoz/synapse-sdk": "^0.36.1",
114118
"@helia/unixfs": "^6.0.1",
115119
"@ipld/car": "^5.4.2",
116120
"commander": "^14.0.1",

src/cli.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import { addCommand } from './commands/add.js'
66
import { dataSetCommand } from './commands/data-set.js'
77
import { importCommand } from './commands/import.js'
88
import { paymentsCommand } from './commands/payments.js'
9+
import { rmCommand } from './commands/rm.js'
910
import { serverCommand } from './commands/server.js'
1011
import { checkForUpdate, type UpdateCheckStatus } from './common/version-check.js'
1112
import { version as packageVersion } from './core/utils/version.js'
@@ -24,6 +25,7 @@ program.addCommand(paymentsCommand)
2425
program.addCommand(dataSetCommand)
2526
program.addCommand(importCommand)
2627
program.addCommand(addCommand)
28+
program.addCommand(rmCommand)
2729

2830
// Default action - show help if no command specified
2931
program.action(() => {

src/commands/rm.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { Command } from 'commander'
2+
import { runRmPiece } from '../rm/index.js'
3+
import { addAuthOptions } from '../utils/cli-options.js'
4+
5+
export const rmCommand = new Command('rm')
6+
.description('Remove a Piece from a DataSet')
7+
.requiredOption('--piece <cid>', 'Piece CID to remove')
8+
.requiredOption('--data-set <id>', 'DataSet ID to remove the piece from')
9+
.option('--wait-for-confirmation', 'Wait for transaction confirmation before exiting')
10+
.action(async (options) => {
11+
try {
12+
await runRmPiece(options)
13+
} catch {
14+
// Error already displayed by clack UI in runRmPiece
15+
process.exit(1)
16+
}
17+
})
18+
19+
addAuthOptions(rmCommand)

src/core/data-set/calculate-actual-storage.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ import type { Synapse } from '@filoz/synapse-sdk'
22
import PQueue from 'p-queue'
33
import type { Logger } from 'pino'
44
import { createStorageContextFromDataSetId } from '../synapse/storage-context-helper.js'
5-
import type { ProgressEvent, ProgressEventHandler } from '../utils/types.js'
5+
import type { ProgressEvent, ProgressEventHandler, Warning } from '../utils/types.js'
66
import { getDataSetPieces } from './get-data-set-pieces.js'
7-
import type { DataSetSummary, DataSetWarning } from './types.js'
7+
import type { DataSetSummary } from './types.js'
88

99
export interface ActualStorageResult {
1010
/** Total storage in bytes across all active data sets */
@@ -18,7 +18,7 @@ export interface ActualStorageResult {
1818
/** Whether the calculation timed out */
1919
timedOut?: boolean
2020
/** Non-fatal warnings encountered during calculation */
21-
warnings: DataSetWarning[]
21+
warnings: Warning[]
2222
}
2323

2424
export type ActualStorageProgressEvents = ProgressEvent<
@@ -101,7 +101,7 @@ export async function calculateActualStorage(
101101
const maxParallelPerProvider = Math.max(1, options?.maxParallelPerProvider ?? 10)
102102
const onProgress = options?.onProgress
103103

104-
const warnings: DataSetWarning[] = []
104+
const warnings: Warning[] = []
105105
let totalBytes = 0n
106106
let pieceCount = 0
107107
let dataSetsProcessed = 0

src/core/data-set/get-data-set-pieces.ts

Lines changed: 77 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,20 @@
77
*/
88

99
import { getSizeFromPieceCID } from '@filoz/synapse-core/piece'
10-
import { METADATA_KEYS, type StorageContext, type Synapse, WarmStorageService } from '@filoz/synapse-sdk'
10+
import {
11+
type DataSetPieceData,
12+
METADATA_KEYS,
13+
PDPServer,
14+
PDPVerifier,
15+
type StorageContext,
16+
type Synapse,
17+
WarmStorageService,
18+
} from '@filoz/synapse-sdk'
19+
import { reconcilePieceStatus } from '../piece/piece-status.js'
20+
import type { Warning } from '../utils/types.js'
1121
import { isStorageContextWithDataSetId } from './type-guards.js'
12-
import type {
13-
DataSetPiecesResult,
14-
DataSetWarning,
15-
GetDataSetPiecesOptions,
16-
PieceInfo,
17-
StorageContextWithDataSetId,
18-
} from './types.js'
22+
import type { DataSetPiecesResult, GetDataSetPiecesOptions, PieceInfo, StorageContextWithDataSetId } from './types.js'
23+
import { PieceStatus } from './types.js'
1924

2025
/**
2126
* Get all pieces for a dataset from a StorageContext
@@ -57,15 +62,58 @@ export async function getDataSetPieces(
5762
}
5863

5964
const pieces: PieceInfo[] = []
60-
const warnings: DataSetWarning[] = []
65+
const warnings: Warning[] = []
66+
67+
// call PDPVerifier.getScheduledRemovals to get the list of pieces that are scheduled for removal
68+
let scheduledRemovals: number[] = []
69+
let pdpServerPieces: DataSetPieceData[] | null = null
70+
try {
71+
const warmStorage = await WarmStorageService.create(synapse.getProvider(), synapse.getWarmStorageAddress())
72+
const pdpVerifier = new PDPVerifier(synapse.getProvider(), warmStorage.getPDPVerifierAddress())
73+
scheduledRemovals = await pdpVerifier.getScheduledRemovals(storageContext.dataSetId)
74+
try {
75+
const providerInfo = await synapse.getProviderInfo(storageContext.provider.serviceProvider)
76+
const pdpServer = new PDPServer(null, providerInfo.products?.PDP?.data?.serviceURL ?? '')
77+
const dataSet = await pdpServer.getDataSet(storageContext.dataSetId)
78+
pdpServerPieces = dataSet.pieces
79+
} catch (error) {
80+
logger?.warn({ error }, 'Failed to fetch provider data for scheduled removals and orphan detection')
81+
warnings.push({
82+
code: 'PROVIDER_DATA_UNAVAILABLE',
83+
message: 'Failed to fetch provider data; orphan detection disabled',
84+
context: { dataSetId: storageContext.dataSetId, error: String(error) },
85+
})
86+
}
87+
} catch (error) {
88+
logger?.warn({ error }, 'Failed to get scheduled removals')
89+
warnings.push({
90+
code: 'SCHEDULED_REMOVALS_UNAVAILABLE',
91+
message: 'Failed to get scheduled removals',
92+
context: { dataSetId: storageContext.dataSetId, error: String(error) },
93+
})
94+
}
6195

6296
// Use the async generator to fetch all pieces
6397
try {
6498
const getPiecesOptions = { ...(signal && { signal }) }
99+
const providerPiecesById = pdpServerPieces ? new Map(pdpServerPieces.map((piece) => [piece.pieceId, piece])) : null
65100
for await (const piece of storageContext.getPieces(getPiecesOptions)) {
66101
const pieceId = piece.pieceId
67102
const pieceCid = piece.pieceCid
68-
const pieceInfo: PieceInfo = { pieceId, pieceCid: pieceCid.toString() }
103+
const { status, warning } = reconcilePieceStatus({
104+
pieceId,
105+
pieceCid,
106+
scheduledRemovals,
107+
providerPiecesById,
108+
})
109+
const pieceInfo: PieceInfo = {
110+
pieceId,
111+
pieceCid: pieceCid.toString(),
112+
status,
113+
}
114+
if (warning) {
115+
warnings.push(warning)
116+
}
69117

70118
// Calculate piece size from CID
71119
try {
@@ -79,6 +127,24 @@ export async function getDataSetPieces(
79127

80128
pieces.push(pieceInfo)
81129
}
130+
if (providerPiecesById !== null) {
131+
// reconcilePieceStatus removes provider matches as we stream on-chain pieces.
132+
// Remaining entries are only reported by the provider, which are off-chain orphans.
133+
for (const piece of providerPiecesById.values()) {
134+
// add the rest of the pieces to the pieces list
135+
pieces.push({
136+
pieceId: piece.pieceId,
137+
pieceCid: piece.pieceCid.toString(),
138+
status: PieceStatus.OFFCHAIN_ORPHANED,
139+
})
140+
warnings.push({
141+
code: 'OFFCHAIN_ORPHANED',
142+
message: 'Piece is reported by provider but not on-chain',
143+
context: { pieceId: piece.pieceId, pieceCid: piece.pieceCid.toString() },
144+
})
145+
}
146+
}
147+
pieces.sort((a, b) => a.pieceId - b.pieceId)
82148
} catch (error) {
83149
if (error instanceof Error && error.name === 'AbortError') {
84150
throw error
@@ -122,7 +188,7 @@ async function enrichPiecesWithMetadata(
122188
synapse: Synapse,
123189
storageContext: StorageContextWithDataSetId,
124190
pieces: PieceInfo[],
125-
warnings: DataSetWarning[],
191+
warnings: Warning[],
126192
logger?: GetDataSetPiecesOptions['logger']
127193
): Promise<void> {
128194
const dataSetId = storageContext.dataSetId

src/core/data-set/types.ts

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,24 @@
1010

1111
import type { EnhancedDataSetInfo, ProviderInfo, StorageContext } from '@filoz/synapse-sdk'
1212
import type { Logger } from 'pino'
13+
import type { Warning } from '../utils/types.js'
14+
15+
/**
16+
* Status of the piece, e.g. "pending removal", "active", "orphaned"
17+
*
18+
* - PENDING_REMOVAL: the piece is scheduled for deletion, but still showing on chain
19+
* - ACTIVE: the piece is active, onchain and known by the provider
20+
* - ONCHAIN_ORPHANED: the piece is not known by the provider, but still on chain
21+
* - OFFCHAIN_ORPHANED: the piece is known by the provider, but not on chain
22+
*
23+
* The orphaned states should not happen, but have been observed and should be logged and displayed to the user.
24+
*/
25+
export enum PieceStatus {
26+
ACTIVE = 'ACTIVE',
27+
PENDING_REMOVAL = 'PENDING_REMOVAL',
28+
ONCHAIN_ORPHANED = 'ONCHAIN_ORPHANED',
29+
OFFCHAIN_ORPHANED = 'OFFCHAIN_ORPHANED',
30+
}
1331

1432
/**
1533
* Information about a single piece in a dataset
@@ -19,6 +37,7 @@ export interface PieceInfo {
1937
pieceId: number
2038
/** Piece Commitment (CommP) as string */
2139
pieceCid: string
40+
status: PieceStatus
2241
/** Root IPFS CID (from metadata, if available) */
2342
rootIpfsCid?: string
2443
/** Piece size in bytes (if available) */
@@ -38,19 +57,7 @@ export interface DataSetPiecesResult {
3857
/** Total size of all pieces in bytes (sum of individual piece sizes) */
3958
totalSizeBytes?: bigint
4059
/** Non-fatal warnings encountered during retrieval */
41-
warnings?: DataSetWarning[]
42-
}
43-
44-
/**
45-
* Structured warning for non-fatal issues
46-
*/
47-
export interface DataSetWarning {
48-
/** Machine-readable warning code (e.g., 'METADATA_FETCH_FAILED') */
49-
code: string
50-
/** Human-readable warning message */
51-
message: string
52-
/** Additional context data (e.g., { pieceId: 123, dataSetId: 456 }) */
53-
context?: Record<string, unknown>
60+
warnings?: Warning[]
5461
}
5562

5663
/**

src/core/piece/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export * from './remove-piece.js'

src/core/piece/piece-status.ts

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import type { DataSetPieceData } from '@filoz/synapse-sdk'
2+
import { PieceStatus } from '../data-set/types.js'
3+
import type { Warning } from '../utils/types.js'
4+
5+
interface PieceStatusContext {
6+
pieceId: number
7+
pieceCid: unknown
8+
/**
9+
* List of pieceIds that are scheduled for removal.
10+
*
11+
* This list is obtained from the PDPVerifier.getScheduledRemovals() method.
12+
*/
13+
scheduledRemovals: number[]
14+
/**
15+
* Map of provider-reported pieces keyed by pieceId.
16+
*
17+
* This map is mutated: when we confirm a piece is both on-chain and reported
18+
* by the provider, we delete it so leftovers represent provider-only pieces.
19+
*/
20+
providerPiecesById: Map<DataSetPieceData['pieceId'], DataSetPieceData> | null
21+
}
22+
23+
interface PieceStatusResult {
24+
status: PieceStatus
25+
warning?: Warning
26+
}
27+
28+
/**
29+
* Reconcile a piece's status across the two data sources we have:
30+
*
31+
* - On-chain: StorageContext.getPieces() (source of truth for what the PDP verifier knows)
32+
* - Provider-reported: PDPServer.getDataSet() (what the storage provider says it stores)
33+
*
34+
* https://github.com/filecoin-project/curio/issues/815 showed these can drift. This helper documents the rules we apply
35+
* to flag mismatches without blocking the listing flow:
36+
*
37+
* 1. If PDPVerifier marked the piece for removal, treat as PENDING_REMOVAL.
38+
* 2. If provider data is unavailable, assume ACTIVE (best effort).
39+
* 3. If provider reports the piece, treat as ACTIVE and remove it from the map so
40+
* any leftover entries become OFFCHAIN_ORPHANED later.
41+
* 4. Otherwise, the piece is on-chain but missing from the provider => ONCHAIN_ORPHANED.
42+
*
43+
* The optional warning conveys orphan cases to callers for user-facing messaging.
44+
*/
45+
export function reconcilePieceStatus(context: PieceStatusContext): PieceStatusResult {
46+
const { pieceId, pieceCid, scheduledRemovals, providerPiecesById } = context
47+
48+
if (scheduledRemovals.includes(pieceId)) {
49+
return { status: PieceStatus.PENDING_REMOVAL }
50+
}
51+
52+
if (providerPiecesById === null) {
53+
// No provider data to compare against; assume the on-chain view is accurate.
54+
return { status: PieceStatus.ACTIVE }
55+
}
56+
57+
if (providerPiecesById.has(pieceId)) {
58+
// Provider matches on-chain; remove so leftovers can be flagged as off-chain orphans.
59+
providerPiecesById.delete(pieceId)
60+
return { status: PieceStatus.ACTIVE }
61+
}
62+
63+
return {
64+
status: PieceStatus.ONCHAIN_ORPHANED,
65+
warning: {
66+
code: 'ONCHAIN_ORPHANED',
67+
message: 'Piece is on-chain but the provider does not report it',
68+
context: { pieceId, pieceCid },
69+
},
70+
}
71+
}

0 commit comments

Comments
 (0)