-
-
Notifications
You must be signed in to change notification settings - Fork 10
Fix assure match detection and add coverage for parallel batches #185
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -75,7 +75,7 @@ func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure | |
| if policy.FilePattern == "" { | ||
| log.Warn().Str("policy", policy.ID).Msg("ASSURE Policy without a filepattern is suboptimal") | ||
| } | ||
| if len(filesToAssure) > 25 { | ||
| if len(filesToAssure) > parallelBatchSize { | ||
| matchesFound, err = executeParallelAssure(rgPath, codePatternAssureJSON, filesToAssure, writer) | ||
| } else { | ||
| matchesFound, err = executeSingleAssure(rgPath, codePatternAssureJSON, filesToAssure, targetDir, policy, writer) | ||
|
Comment on lines
77
to
81
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2 | Confidence: High The parallel execution threshold now uses the centralized |
||
|
|
@@ -87,6 +87,8 @@ func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure | |
|
|
||
| } | ||
|
|
||
| reportPolicyStatus(policy, matchesFound) | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2 | Confidence: High The new status reporting mechanism introduces a global handler pattern. While tests validate its behavior, the related context doesn't show other callers of |
||
|
|
||
| // Patch the JSON output file | ||
| err = patchJSONOutputFile(jsonOutputFile) | ||
| if err != nil { | ||
|
|
@@ -135,67 +137,22 @@ func executeAssure(policy Policy, rgPath string, targetDir string, filesToAssure | |
|
|
||
| func executeParallelAssure(rgPath string, baseArgs []string, filesToScan []string, writer *bufio.Writer) (bool, error) { | ||
|
|
||
| const batchSize = 25 | ||
| matched := true | ||
| var wg sync.WaitGroup | ||
| errChan := make(chan error, len(filesToScan)/batchSize+1) | ||
| var mu sync.Mutex | ||
|
|
||
| for i := 0; i < len(filesToScan); i += batchSize { | ||
| end := i + batchSize | ||
| if end > len(filesToScan) { | ||
| end = len(filesToScan) | ||
| } | ||
| batch := filesToScan[i:end] | ||
|
|
||
| // log.Debug().Msgf("RGM: %v", batch) | ||
|
|
||
| wg.Add(1) | ||
| go func(batch []string) { | ||
| defer wg.Done() | ||
| args := append(baseArgs, batch...) | ||
| cmd := exec.Command(rgPath, args...) | ||
| output, err := cmd.Output() | ||
|
|
||
| if err != nil { | ||
|
|
||
| if exitError, ok := err.(*exec.ExitError); ok { | ||
| // Exit code 1 in ripgrep means "no matches found" | ||
| if exitError.ExitCode() == 1 { | ||
| matched = false | ||
| err = nil // Reset error as this is the expected outcome for assure | ||
| } | ||
| } | ||
|
|
||
| if exitError, ok := err.(*exec.ExitError); ok && exitError.ExitCode() != 1 { | ||
| errChan <- fmt.Errorf("error executing ripgrep: %w", err) | ||
| return | ||
| } | ||
| } | ||
|
|
||
| mu.Lock() | ||
| _, writeErr := writer.Write(output) | ||
| if writeErr == nil { | ||
| writeErr = writer.Flush() | ||
| } | ||
| mu.Unlock() | ||
|
|
||
| if writeErr != nil { | ||
| errChan <- fmt.Errorf("error writing output: %w", writeErr) | ||
| } | ||
| }(batch) | ||
| } | ||
| return runParallelRipgrep(rgPath, baseArgs, filesToScan, func(output []byte) error { | ||
| mu.Lock() | ||
| defer mu.Unlock() | ||
|
|
||
| wg.Wait() | ||
| close(errChan) | ||
| if _, err := writer.Write(output); err != nil { | ||
| return fmt.Errorf("error writing output: %w", err) | ||
| } | ||
|
|
||
| for err := range errChan { | ||
| if err != nil { | ||
| return matched, err | ||
| if err := writer.Flush(); err != nil { | ||
| return fmt.Errorf("error writing output: %w", err) | ||
| } | ||
| } | ||
|
|
||
| return matched, nil | ||
| return nil | ||
| }) | ||
| } | ||
|
|
||
| func executeSingleAssure(rgPath string, baseArgs []string, filesToScan []string, targetDir string, policy Policy, writer *bufio.Writer) (bool, error) { | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,100 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "fmt" | ||
| "os" | ||
| "os/exec" | ||
| "path/filepath" | ||
| "sync" | ||
| "testing" | ||
| ) | ||
|
|
||
| func TestExecuteAssureParallelStatusHandler(t *testing.T) { | ||
| rgBinary, err := exec.LookPath("rg") | ||
| if err != nil { | ||
| t.Fatalf("rg binary not found: %v", err) | ||
| } | ||
|
|
||
| originalPolicyData := policyData | ||
| originalOutputDir := outputDir | ||
| policyData = &PolicyFile{} | ||
| baseOutputDir := t.TempDir() | ||
| outputDir = baseOutputDir | ||
| requiredDirs := []string{"_debug", "_sarif"} | ||
| for _, dir := range requiredDirs { | ||
| if err := os.MkdirAll(filepath.Join(outputDir, dir), 0o755); err != nil { | ||
| t.Fatalf("failed to create %s dir: %v", dir, err) | ||
| } | ||
| } | ||
| t.Cleanup(func() { | ||
| policyData = originalPolicyData | ||
| outputDir = originalOutputDir | ||
| }) | ||
|
|
||
| totalFiles := parallelBatchSize + 5 | ||
|
|
||
| tests := []struct { | ||
| name string | ||
| matchIndex int | ||
| wantMatch bool | ||
| }{ | ||
| {name: "match", matchIndex: 3, wantMatch: true}, | ||
| {name: "no_match", matchIndex: -1, wantMatch: false}, | ||
| } | ||
|
|
||
| for _, tc := range tests { | ||
| tc := tc | ||
| t.Run(tc.name, func(t *testing.T) { | ||
| targetDir := filepath.Join(baseOutputDir, tc.name) | ||
| if err := os.MkdirAll(targetDir, 0o755); err != nil { | ||
| t.Fatalf("failed to create target dir: %v", err) | ||
| } | ||
|
|
||
| files := make([]string, 0, totalFiles) | ||
| for i := 0; i < totalFiles; i++ { | ||
| filePath := filepath.Join(targetDir, fmt.Sprintf("file_%s_%d.txt", tc.name, i)) | ||
| content := "this file does not contain the pattern" | ||
| if tc.matchIndex >= 0 && i == tc.matchIndex { | ||
| content = "this file includes the special pattern" | ||
| } | ||
| if err := os.WriteFile(filePath, []byte(content), 0o644); err != nil { | ||
| t.Fatalf("failed to write test file: %v", err) | ||
| } | ||
| files = append(files, filePath) | ||
| } | ||
|
|
||
| policy := Policy{ | ||
| ID: fmt.Sprintf("policy-%s", tc.name), | ||
| FilePattern: "*.txt", | ||
| Regex: []string{"special"}, | ||
| } | ||
|
|
||
| var ( | ||
| mu sync.Mutex | ||
| statuses []bool | ||
| ) | ||
| SetPolicyStatusHandler(func(p Policy, matched bool) { | ||
| mu.Lock() | ||
| defer mu.Unlock() | ||
| statuses = append(statuses, matched) | ||
| }) | ||
| t.Cleanup(func() { | ||
| SetPolicyStatusHandler(nil) | ||
| }) | ||
|
|
||
| if err := executeAssure(policy, rgBinary, targetDir, files); err != nil { | ||
| t.Fatalf("executeAssure failed: %v", err) | ||
| } | ||
|
|
||
| mu.Lock() | ||
| defer mu.Unlock() | ||
| if len(statuses) == 0 { | ||
| t.Fatalf("status handler was not invoked") | ||
| } | ||
| got := statuses[len(statuses)-1] | ||
| if got != tc.wantMatch { | ||
| t.Fatalf("expected match status %v, got %v", tc.wantMatch, got) | ||
| } | ||
| }) | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,89 @@ | ||
| package cmd | ||
|
|
||
| import ( | ||
| "bytes" | ||
| "errors" | ||
| "fmt" | ||
| "os/exec" | ||
| "sync" | ||
| ) | ||
|
|
||
| const parallelBatchSize = 25 | ||
|
|
||
| type ripgrepOutputConsumer func([]byte) error | ||
|
|
||
| func runParallelRipgrep(rgPath string, baseArgs []string, files []string, consume ripgrepOutputConsumer) (bool, error) { | ||
| if len(files) == 0 { | ||
| return false, nil | ||
| } | ||
|
|
||
| var ( | ||
| matchesFound bool | ||
| matchesMu sync.Mutex | ||
| wg sync.WaitGroup | ||
| errChan = make(chan error, (len(files)+parallelBatchSize-1)/parallelBatchSize) | ||
| ) | ||
|
|
||
| for start := 0; start < len(files); start += parallelBatchSize { | ||
| end := start + parallelBatchSize | ||
| if end > len(files) { | ||
| end = len(files) | ||
| } | ||
|
|
||
| batch := append([]string(nil), files[start:end]...) | ||
| wg.Add(1) | ||
| go func(batch []string) { | ||
| defer wg.Done() | ||
|
|
||
| args := append(append([]string(nil), baseArgs...), batch...) | ||
| cmd := exec.Command(rgPath, args...) | ||
| output, err := cmd.Output() | ||
|
|
||
| found := false | ||
| if err != nil { | ||
| var exitErr *exec.ExitError | ||
| if errors.As(err, &exitErr) { | ||
| if exitErr.ExitCode() == 1 { | ||
| // No matches for this batch; still process the output below. | ||
| } else { | ||
| errChan <- fmt.Errorf("error executing ripgrep: %w", err) | ||
| return | ||
| } | ||
| } else { | ||
| errChan <- fmt.Errorf("error executing ripgrep: %w", err) | ||
| return | ||
| } | ||
| } else { | ||
| found = true | ||
| } | ||
|
|
||
| if !found && bytes.Contains(output, []byte(`"type":"match"`)) { | ||
| found = true | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Bug: Ripgrep Match Detection FlawThe
Comment on lines
+60
to
+62
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. P2 | Confidence: Medium Match detection relies on fragile string matching against JSON output. While functional, this could break if ripgrep's JSON format changes. A more robust approach would parse the JSON to check for match objects. The current implementation may also false-positive on content containing the literal string Code Suggestion: // Consider proper JSON parsing for robustness
var outputs []map[string]interface{}
if err := json.Unmarshal(output, &outputs); err == nil {
for _, entry := range outputs {
if t, ok := entry["type"].(string); ok && t == "match" {
found = true
break
}
}
} |
||
|
|
||
| if consume != nil { | ||
| if err := consume(output); err != nil { | ||
| errChan <- err | ||
| return | ||
| } | ||
| } | ||
|
|
||
| if found { | ||
| matchesMu.Lock() | ||
| matchesFound = true | ||
| matchesMu.Unlock() | ||
| } | ||
| }(batch) | ||
| } | ||
|
|
||
| wg.Wait() | ||
| close(errChan) | ||
|
|
||
| for err := range errChan { | ||
| if err != nil { | ||
| return matchesFound, err | ||
| } | ||
| } | ||
|
|
||
| return matchesFound, nil | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,30 @@ | ||
| package cmd | ||
|
|
||
| import "sync" | ||
|
|
||
| var ( | ||
| statusHandlerMu sync.RWMutex | ||
| policyStatusHandler = func(Policy, bool) {} | ||
| ) | ||
|
|
||
| // SetPolicyStatusHandler registers a handler invoked when an assure policy finishes executing. | ||
| // Passing nil resets the handler to a no-op implementation. | ||
| func SetPolicyStatusHandler(handler func(Policy, bool)) { | ||
| statusHandlerMu.Lock() | ||
| defer statusHandlerMu.Unlock() | ||
|
|
||
| if handler == nil { | ||
| policyStatusHandler = func(Policy, bool) {} | ||
| return | ||
| } | ||
|
|
||
| policyStatusHandler = handler | ||
| } | ||
|
|
||
| func reportPolicyStatus(policy Policy, matchesFound bool) { | ||
| statusHandlerMu.RLock() | ||
| handler := policyStatusHandler | ||
| statusHandlerMu.RUnlock() | ||
|
|
||
| handler(policy, matchesFound) | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
P2 | Confidence: High
Hard-coded array indices (e.g.,
[0]) assume a specific SARIF report structure. This is brittle—if the report format changes, this code may panic or misbehave. Add bounds checking or use helper methods to access nested properties safely.