Skip to content

Commit 75df59d

Browse files
committed
fix: add missing secrets worker to repository
The secrets worker was being ignored due to broad gitignore pattern. Added exception to allow workers/secrets/ directory while still ignoring actual secrets. Files added: - workers/secrets/Dockerfile - workers/secrets/requirements.txt - workers/secrets/worker.py
1 parent 4e14b42 commit 75df59d

File tree

4 files changed

+389
-0
lines changed

4 files changed

+389
-0
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ yarn-error.log*
240240
!**/secret_detection_benchmark_GROUND_TRUTH.json
241241
!**/secret_detection/results/
242242

243+
# Exception: Allow workers/secrets/ directory (secrets detection worker)
244+
!workers/secrets/
245+
!workers/secrets/**
246+
243247
secret*
244248
secrets/
245249
credentials*

workers/secrets/Dockerfile

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# FuzzForge Vertical Worker: Secret Detection
2+
#
3+
# Pre-installed tools for secret detection:
4+
# - Gitleaks v8.18.0
5+
# - TruffleHog v3.63.2
6+
# - Temporal worker
7+
8+
FROM python:3.11-slim
9+
10+
# Set working directory
11+
WORKDIR /app
12+
13+
# Install system dependencies
14+
RUN apt-get update && apt-get install -y \
15+
# Build essentials
16+
build-essential \
17+
# Development tools
18+
git \
19+
curl \
20+
wget \
21+
# Cleanup
22+
&& rm -rf /var/lib/apt/lists/*
23+
24+
# Install Gitleaks v8.18.0
25+
RUN wget -q https://github.com/gitleaks/gitleaks/releases/download/v8.18.0/gitleaks_8.18.0_linux_x64.tar.gz && \
26+
tar -xzf gitleaks_8.18.0_linux_x64.tar.gz && \
27+
mv gitleaks /usr/local/bin/ && \
28+
chmod +x /usr/local/bin/gitleaks && \
29+
rm gitleaks_8.18.0_linux_x64.tar.gz
30+
31+
# Install TruffleHog v3.63.2
32+
RUN wget -q https://github.com/trufflesecurity/trufflehog/releases/download/v3.63.2/trufflehog_3.63.2_linux_amd64.tar.gz && \
33+
tar -xzf trufflehog_3.63.2_linux_amd64.tar.gz && \
34+
mv trufflehog /usr/local/bin/ && \
35+
chmod +x /usr/local/bin/trufflehog && \
36+
rm trufflehog_3.63.2_linux_amd64.tar.gz
37+
38+
# Verify installations
39+
RUN gitleaks version && trufflehog --version
40+
41+
# Install Python dependencies for Temporal worker
42+
COPY requirements.txt /tmp/requirements.txt
43+
RUN pip3 install --no-cache-dir -r /tmp/requirements.txt && \
44+
rm /tmp/requirements.txt
45+
46+
# Create cache directory for downloaded targets
47+
RUN mkdir -p /cache && chmod 755 /cache
48+
49+
# Copy worker entrypoint
50+
COPY worker.py /app/worker.py
51+
52+
# Add toolbox and AI module to Python path (mounted at runtime)
53+
ENV PYTHONPATH="/app:/app/toolbox:/app/ai_src:${PYTHONPATH}"
54+
ENV PYTHONUNBUFFERED=1
55+
56+
# Healthcheck
57+
HEALTHCHECK --interval=30s --timeout=10s --start-period=40s --retries=3 \
58+
CMD python3 -c "import sys; sys.exit(0)"
59+
60+
# Run worker
61+
CMD ["python3", "/app/worker.py"]

workers/secrets/requirements.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Temporal worker dependencies
2+
temporalio>=1.5.0
3+
pydantic>=2.0.0
4+
5+
# Storage (MinIO/S3)
6+
boto3>=1.34.0
7+
8+
# Configuration
9+
pyyaml>=6.0.0
10+
11+
# HTTP Client (for real-time stats reporting)
12+
httpx>=0.27.0
13+
14+
# A2A Agent Communication (for LLM-based secret detection)
15+
a2a-sdk[all]>=0.1.0

workers/secrets/worker.py

Lines changed: 309 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,309 @@
1+
"""
2+
FuzzForge Vertical Worker: Secret Detection
3+
4+
This worker:
5+
1. Discovers workflows for the 'secrets' vertical from mounted toolbox
6+
2. Dynamically imports and registers workflow classes
7+
3. Connects to Temporal and processes tasks
8+
4. Handles activities for target download/upload from MinIO
9+
"""
10+
11+
import asyncio
12+
import importlib
13+
import inspect
14+
import logging
15+
import os
16+
import sys
17+
from pathlib import Path
18+
from typing import List, Any
19+
20+
import yaml
21+
from temporalio.client import Client
22+
from temporalio.worker import Worker
23+
24+
# Add toolbox to path for workflow and activity imports
25+
sys.path.insert(0, '/app/toolbox')
26+
27+
# Import common storage activities
28+
from toolbox.common.storage_activities import (
29+
get_target_activity,
30+
cleanup_cache_activity,
31+
upload_results_activity
32+
)
33+
34+
# Configure logging
35+
logging.basicConfig(
36+
level=os.getenv('LOG_LEVEL', 'INFO'),
37+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
38+
)
39+
logger = logging.getLogger(__name__)
40+
41+
42+
async def discover_workflows(vertical: str) -> List[Any]:
43+
"""
44+
Discover workflows for this vertical from mounted toolbox.
45+
46+
Args:
47+
vertical: The vertical name (e.g., 'secrets', 'python', 'web')
48+
49+
Returns:
50+
List of workflow classes decorated with @workflow.defn
51+
"""
52+
workflows = []
53+
toolbox_path = Path("/app/toolbox/workflows")
54+
55+
if not toolbox_path.exists():
56+
logger.warning(f"Toolbox path does not exist: {toolbox_path}")
57+
return workflows
58+
59+
logger.info(f"Scanning for workflows in: {toolbox_path}")
60+
61+
for workflow_dir in toolbox_path.iterdir():
62+
if not workflow_dir.is_dir():
63+
continue
64+
65+
# Skip special directories
66+
if workflow_dir.name.startswith('.') or workflow_dir.name == '__pycache__':
67+
continue
68+
69+
metadata_file = workflow_dir / "metadata.yaml"
70+
if not metadata_file.exists():
71+
logger.debug(f"No metadata.yaml in {workflow_dir.name}, skipping")
72+
continue
73+
74+
try:
75+
# Parse metadata
76+
with open(metadata_file) as f:
77+
metadata = yaml.safe_load(f)
78+
79+
# Check if workflow is for this vertical
80+
workflow_vertical = metadata.get("vertical")
81+
if workflow_vertical != vertical:
82+
logger.debug(
83+
f"Workflow {workflow_dir.name} is for vertical '{workflow_vertical}', "
84+
f"not '{vertical}', skipping"
85+
)
86+
continue
87+
88+
# Check if workflow.py exists
89+
workflow_file = workflow_dir / "workflow.py"
90+
if not workflow_file.exists():
91+
logger.warning(
92+
f"Workflow {workflow_dir.name} has metadata but no workflow.py, skipping"
93+
)
94+
continue
95+
96+
# Dynamically import workflow module
97+
module_name = f"toolbox.workflows.{workflow_dir.name}.workflow"
98+
logger.info(f"Importing workflow module: {module_name}")
99+
100+
try:
101+
module = importlib.import_module(module_name)
102+
except Exception as e:
103+
logger.error(
104+
f"Failed to import workflow module {module_name}: {e}",
105+
exc_info=True
106+
)
107+
continue
108+
109+
# Find @workflow.defn decorated classes
110+
found_workflows = False
111+
for name, obj in inspect.getmembers(module, inspect.isclass):
112+
# Check if class has Temporal workflow definition
113+
if hasattr(obj, '__temporal_workflow_definition'):
114+
workflows.append(obj)
115+
found_workflows = True
116+
logger.info(
117+
f"✓ Discovered workflow: {name} from {workflow_dir.name} "
118+
f"(vertical: {vertical})"
119+
)
120+
121+
if not found_workflows:
122+
logger.warning(
123+
f"Workflow {workflow_dir.name} has no @workflow.defn decorated classes"
124+
)
125+
126+
except Exception as e:
127+
logger.error(
128+
f"Error processing workflow {workflow_dir.name}: {e}",
129+
exc_info=True
130+
)
131+
continue
132+
133+
logger.info(f"Discovered {len(workflows)} workflows for vertical '{vertical}'")
134+
return workflows
135+
136+
137+
async def discover_activities(workflows_dir: Path) -> List[Any]:
138+
"""
139+
Discover activities from workflow directories.
140+
141+
Looks for activities.py files alongside workflow.py in each workflow directory.
142+
143+
Args:
144+
workflows_dir: Path to workflows directory
145+
146+
Returns:
147+
List of activity functions decorated with @activity.defn
148+
"""
149+
activities = []
150+
151+
if not workflows_dir.exists():
152+
logger.warning(f"Workflows directory does not exist: {workflows_dir}")
153+
return activities
154+
155+
logger.info(f"Scanning for workflow activities in: {workflows_dir}")
156+
157+
for workflow_dir in workflows_dir.iterdir():
158+
if not workflow_dir.is_dir():
159+
continue
160+
161+
# Skip special directories
162+
if workflow_dir.name.startswith('.') or workflow_dir.name == '__pycache__':
163+
continue
164+
165+
# Check if activities.py exists
166+
activities_file = workflow_dir / "activities.py"
167+
if not activities_file.exists():
168+
logger.debug(f"No activities.py in {workflow_dir.name}, skipping")
169+
continue
170+
171+
try:
172+
# Dynamically import activities module
173+
module_name = f"toolbox.workflows.{workflow_dir.name}.activities"
174+
logger.info(f"Importing activities module: {module_name}")
175+
176+
try:
177+
module = importlib.import_module(module_name)
178+
except Exception as e:
179+
logger.error(
180+
f"Failed to import activities module {module_name}: {e}",
181+
exc_info=True
182+
)
183+
continue
184+
185+
# Find @activity.defn decorated functions
186+
found_activities = False
187+
for name, obj in inspect.getmembers(module, inspect.isfunction):
188+
# Check if function has Temporal activity definition
189+
if hasattr(obj, '__temporal_activity_definition'):
190+
activities.append(obj)
191+
found_activities = True
192+
logger.info(
193+
f"✓ Discovered activity: {name} from {workflow_dir.name}"
194+
)
195+
196+
if not found_activities:
197+
logger.warning(
198+
f"Workflow {workflow_dir.name} has activities.py but no @activity.defn decorated functions"
199+
)
200+
201+
except Exception as e:
202+
logger.error(
203+
f"Error processing activities from {workflow_dir.name}: {e}",
204+
exc_info=True
205+
)
206+
continue
207+
208+
logger.info(f"Discovered {len(activities)} workflow-specific activities")
209+
return activities
210+
211+
212+
async def main():
213+
"""Main worker entry point"""
214+
# Get configuration from environment
215+
vertical = os.getenv("WORKER_VERTICAL", "secrets")
216+
temporal_address = os.getenv("TEMPORAL_ADDRESS", "localhost:7233")
217+
temporal_namespace = os.getenv("TEMPORAL_NAMESPACE", "default")
218+
task_queue = os.getenv("WORKER_TASK_QUEUE", f"{vertical}-queue")
219+
max_concurrent_activities = int(os.getenv("MAX_CONCURRENT_ACTIVITIES", "5"))
220+
221+
logger.info("=" * 60)
222+
logger.info(f"FuzzForge Vertical Worker: {vertical}")
223+
logger.info("=" * 60)
224+
logger.info(f"Temporal Address: {temporal_address}")
225+
logger.info(f"Temporal Namespace: {temporal_namespace}")
226+
logger.info(f"Task Queue: {task_queue}")
227+
logger.info(f"Max Concurrent Activities: {max_concurrent_activities}")
228+
logger.info("=" * 60)
229+
230+
# Discover workflows for this vertical
231+
logger.info(f"Discovering workflows for vertical: {vertical}")
232+
workflows = await discover_workflows(vertical)
233+
234+
if not workflows:
235+
logger.error(f"No workflows found for vertical: {vertical}")
236+
logger.error("Worker cannot start without workflows. Exiting...")
237+
sys.exit(1)
238+
239+
# Discover activities from workflow directories
240+
logger.info("Discovering workflow-specific activities...")
241+
workflows_dir = Path("/app/toolbox/workflows")
242+
workflow_activities = await discover_activities(workflows_dir)
243+
244+
# Combine common storage activities with workflow-specific activities
245+
activities = [
246+
get_target_activity,
247+
cleanup_cache_activity,
248+
upload_results_activity
249+
] + workflow_activities
250+
251+
logger.info(
252+
f"Total activities registered: {len(activities)} "
253+
f"(3 common + {len(workflow_activities)} workflow-specific)"
254+
)
255+
256+
# Connect to Temporal
257+
logger.info(f"Connecting to Temporal at {temporal_address}...")
258+
try:
259+
client = await Client.connect(
260+
temporal_address,
261+
namespace=temporal_namespace
262+
)
263+
logger.info("✓ Connected to Temporal successfully")
264+
except Exception as e:
265+
logger.error(f"Failed to connect to Temporal: {e}", exc_info=True)
266+
sys.exit(1)
267+
268+
# Create worker with discovered workflows and activities
269+
logger.info(f"Creating worker on task queue: {task_queue}")
270+
271+
try:
272+
worker = Worker(
273+
client,
274+
task_queue=task_queue,
275+
workflows=workflows,
276+
activities=activities,
277+
max_concurrent_activities=max_concurrent_activities
278+
)
279+
logger.info("✓ Worker created successfully")
280+
except Exception as e:
281+
logger.error(f"Failed to create worker: {e}", exc_info=True)
282+
sys.exit(1)
283+
284+
# Start worker
285+
logger.info("=" * 60)
286+
logger.info(f"🚀 Worker started for vertical '{vertical}'")
287+
logger.info(f"📦 Registered {len(workflows)} workflows")
288+
logger.info(f"⚙️ Registered {len(activities)} activities")
289+
logger.info(f"📨 Listening on task queue: {task_queue}")
290+
logger.info("=" * 60)
291+
logger.info("Worker is ready to process tasks...")
292+
293+
try:
294+
await worker.run()
295+
except KeyboardInterrupt:
296+
logger.info("Shutting down worker (keyboard interrupt)...")
297+
except Exception as e:
298+
logger.error(f"Worker error: {e}", exc_info=True)
299+
raise
300+
301+
302+
if __name__ == "__main__":
303+
try:
304+
asyncio.run(main())
305+
except KeyboardInterrupt:
306+
logger.info("Worker stopped")
307+
except Exception as e:
308+
logger.error(f"Fatal error: {e}", exc_info=True)
309+
sys.exit(1)

0 commit comments

Comments
 (0)