Skip to content

Commit d088de9

Browse files
committed
Implement line tracking for filesystem chunks in handleChunksWithError
1 parent 26d039c commit d088de9

File tree

2 files changed

+92
-0
lines changed

2 files changed

+92
-0
lines changed

pkg/handlers/handlers.go

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package handlers
33
import (
44
"archive/zip"
55
"bufio"
6+
"bytes"
67
"context"
78
"errors"
89
"fmt"
@@ -11,6 +12,7 @@ import (
1112

1213
"github.com/gabriel-vasile/mimetype"
1314
"github.com/mholt/archives"
15+
"google.golang.org/protobuf/proto"
1416

1517
logContext "github.com/trufflesecurity/trufflehog/v3/pkg/context"
1618
"github.com/trufflesecurity/trufflehog/v3/pkg/feature"
@@ -405,6 +407,8 @@ func handleChunksWithError(
405407
chunkSkel *sources.Chunk,
406408
reporter sources.ChunkReporter,
407409
) error {
410+
var linesConsumed int64
411+
408412
for {
409413
select {
410414
case dataOrErr, ok := <-dataErrChan:
@@ -422,7 +426,13 @@ func handleChunksWithError(
422426
}
423427
if len(dataOrErr.Data) > 0 {
424428
chunk := *chunkSkel
429+
if chunk.SourceMetadata != nil {
430+
if cloned, ok := proto.Clone(chunk.SourceMetadata).(*source_metadatapb.MetaData); ok {
431+
chunk.SourceMetadata = cloned
432+
}
433+
}
425434
chunk.Data = dataOrErr.Data
435+
linesConsumed = updateFilesystemLineMetadata(&chunk, linesConsumed)
426436
if err := reporter.ChunkOk(ctx, chunk); err != nil {
427437
return fmt.Errorf("error reporting chunk: %w", err)
428438
}
@@ -433,6 +443,38 @@ func handleChunksWithError(
433443
}
434444
}
435445

446+
// updateFilesystemLineMetadata sets the 1-based starting line for filesystem chunks and
447+
// updates the running total of lines consumed so subsequent chunks can be
448+
// correctly anchored. Only the unique portion of the chunk (excluding the peek
449+
// overlap) contributes to the running count so that lines aren't double counted.
450+
//
451+
// This relies on HandleFile's default chunk reader, which emits chunks that
452+
// contain DefaultChunkSize bytes of unique data followed by a DefaultPeekSize
453+
// overlap with the next chunk.
454+
func updateFilesystemLineMetadata(chunk *sources.Chunk, linesConsumed int64) int64 {
455+
if chunk.SourceMetadata == nil {
456+
return linesConsumed
457+
}
458+
fsMeta := chunk.SourceMetadata.GetFilesystem()
459+
if fsMeta == nil {
460+
return linesConsumed
461+
}
462+
463+
fsMeta.Line = linesConsumed + 1
464+
465+
data := chunk.Data
466+
if len(data) == 0 {
467+
return linesConsumed
468+
}
469+
470+
uniqueLen := len(data)
471+
if uniqueLen > sources.DefaultChunkSize {
472+
uniqueLen = sources.DefaultChunkSize
473+
}
474+
475+
return linesConsumed + int64(bytes.Count(data[:uniqueLen], []byte("\n")))
476+
}
477+
436478
// isFatal determines whether the given error is a fatal error that should
437479
// terminate processing the current file, or a non-critical error that can be logged and ignored.
438480
// "Fatal" errors include context cancellation, deadline exceeded, and the

pkg/handlers/handlers_test.go

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,12 @@ import (
1717
"time"
1818

1919
"github.com/stretchr/testify/assert"
20+
"github.com/stretchr/testify/require"
2021
diskbufferreader "github.com/trufflesecurity/disk-buffer-reader"
2122

2223
"github.com/trufflesecurity/trufflehog/v3/pkg/context"
24+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/source_metadatapb"
25+
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
2326
"github.com/trufflesecurity/trufflehog/v3/pkg/sources"
2427
)
2528

@@ -153,6 +156,53 @@ func BenchmarkHandleFile(b *testing.B) {
153156
}
154157
}
155158

159+
func TestHandleChunksWithErrorSetsFilesystemLine(t *testing.T) {
160+
chunkCh := make(chan *sources.Chunk, 2)
161+
reporter := sources.ChanReporter{Ch: chunkCh}
162+
163+
chunkSkel := &sources.Chunk{
164+
SourceType: sourcespb.SourceType_SOURCE_TYPE_FILESYSTEM,
165+
SourceMetadata: &source_metadatapb.MetaData{
166+
Data: &source_metadatapb.MetaData_Filesystem{
167+
Filesystem: &source_metadatapb.Filesystem{File: "test.txt"},
168+
},
169+
},
170+
}
171+
172+
chunkSize := sources.DefaultChunkSize
173+
peekSize := sources.DefaultPeekSize
174+
175+
chunkOneMain := bytes.Repeat([]byte("a\n"), chunkSize/2)
176+
chunkOnePeek := bytes.Repeat([]byte("p\n"), peekSize/2)
177+
chunkOne := append(chunkOneMain, chunkOnePeek...)
178+
179+
chunkTwo := bytes.Repeat([]byte("b\n"), 10)
180+
181+
dataErrChan := make(chan DataOrErr, 2)
182+
dataErrChan <- DataOrErr{Data: chunkOne}
183+
dataErrChan <- DataOrErr{Data: chunkTwo}
184+
close(dataErrChan)
185+
186+
require.NoError(t, handleChunksWithError(context.Background(), dataErrChan, chunkSkel, reporter))
187+
188+
close(chunkCh)
189+
var chunks []*sources.Chunk
190+
for ch := range chunkCh {
191+
chunks = append(chunks, ch)
192+
}
193+
194+
require.Len(t, chunks, 2)
195+
196+
firstMeta := chunks[0].SourceMetadata.GetFilesystem()
197+
require.NotNil(t, firstMeta)
198+
require.Equal(t, int64(1), firstMeta.GetLine())
199+
200+
linesInFirstChunk := int64(bytes.Count(chunkOne[:chunkSize], []byte("\n")))
201+
secondMeta := chunks[1].SourceMetadata.GetFilesystem()
202+
require.NotNil(t, secondMeta)
203+
require.Equal(t, linesInFirstChunk+1, secondMeta.GetLine())
204+
}
205+
156206
func TestSkipArchive(t *testing.T) {
157207
file, err := os.Open("testdata/test.tgz")
158208
assert.Nil(t, err)

0 commit comments

Comments
 (0)