Skip to content
Merged
60 changes: 37 additions & 23 deletions apps/webapp/app/v3/services/aiRunFilterService.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ const AIFilterResponseSchema = z
export interface QueryQueues {
query(
search: string | undefined,
type: "task" | "custom" | undefined
type: "task" | "custom" | undefined,
): Promise<{
queues: string[];
}>;
Expand All @@ -39,14 +39,14 @@ export interface QueryQueues {
export interface QueryVersions {
query(
versionPrefix: string | undefined,
isCurrent: boolean | undefined
isCurrent: boolean | undefined,
): Promise<
| {
versions: string[];
}
versions: string[];
}
| {
version: string;
}
version: string;
}
>;
}

Expand All @@ -64,13 +64,13 @@ export interface QueryTasks {

export type AIFilterResult =
| {
success: true;
filters: TaskRunListSearchFilters;
}
success: true;
filters: TaskRunListSearchFilters;
}
| {
success: false;
error: string;
};
success: false;
error: string;
};

export class AIRunFilterService {
constructor(
Expand All @@ -80,7 +80,7 @@ export class AIRunFilterService {
queryQueues: QueryQueues;
queryTasks: QueryTasks;
},
private readonly model: LanguageModelV1 = openai("gpt-4o-mini")
private readonly model: LanguageModelV1 = openai("gpt-4o-mini"),
) {}

async call(text: string, environmentId: string): Promise<AIFilterResult> {
Expand All @@ -92,7 +92,9 @@ export class AIRunFilterService {
lookupTags: tool({
description: "Look up available tags in the environment",
parameters: z.object({
query: z.string().optional().describe("Optional search query to filter tags"),
query: z.string().optional().describe(
"Optional search query to filter tags",
),
}),
execute: async ({ query }) => {
return await this.queryFns.queryTags.query(query);
Expand All @@ -110,22 +112,27 @@ export class AIRunFilterService {
.string()
.optional()
.describe(
"Optional version name to filter (e.g. 20250701.1), it uses contains to compare. Don't pass `latest` or `current`, the query has to be in the reverse date format specified. Leave out to get all recent versions."
"Optional version name to filter (e.g. 20250701.1), it uses contains to compare. Don't pass `latest` or `current`, the query has to be in the reverse date format specified. Leave out to get all recent versions.",
),
}),
execute: async ({ versionPrefix, isCurrent }) => {
return await this.queryFns.queryVersions.query(versionPrefix, isCurrent);
return await this.queryFns.queryVersions.query(
versionPrefix,
isCurrent,
);
},
}),
lookupQueues: tool({
description: "Look up available queues in the environment",
parameters: z.object({
query: z.string().optional().describe("Optional search query to filter queues"),
query: z.string().optional().describe(
"Optional search query to filter queues",
),
type: z
.enum(["task", "custom"])
.optional()
.describe(
"Filter by queue type, only do this if the user specifies it explicitly."
"Filter by queue type, only do this if the user specifies it explicitly.",
),
}),
execute: async ({ query, type }) => {
Expand All @@ -142,12 +149,15 @@ export class AIRunFilterService {
}),
},
maxSteps: 5,
system: `You are an AI assistant that converts natural language descriptions into structured filter parameters for a task run filtering system.
system:
`You are an AI assistant that converts natural language descriptions into structured filter parameters for a task run filtering system.

Available filter options:
- statuses: Array of run statuses (PENDING, EXECUTING, COMPLETED_SUCCESSFULLY, COMPLETED_WITH_ERRORS, CANCELED, TIMED_OUT, CRASHED, etc.)
- period: Time period string (e.g., "1h", "7d", "30d", "1y")
- from/to: ISO date string. Today's date is ${new Date().toISOString()}, if they only specify a day use the current month. If they don't specify a year use the current year. If they don't specify a time of day use midnight.
- from/to: ISO date string. Today's date is ${
new Date().toISOString()
}, if they only specify a day use the current month. If they don't specify a year use the current year. If they don't specify a time of day use midnight.
- tags: Array of tag names to filter by. Use the lookupTags tool to get the tags.
- tasks: Array of task identifiers to filter by. Use the lookupTasks tool to get the tasks.
- machines: Array of machine presets (micro, small, small-2x, medium, large, xlarge, etc.)
Expand All @@ -159,7 +169,7 @@ export class AIRunFilterService {
- scheduleId: Specific schedule ID to filter by


Common patterns to recognize:
Common workflows to recognize:
- "failed runs" → statuses: ["COMPLETED_WITH_ERRORS", "CRASHED", "TIMED_OUT", "SYSTEM_FAILURE"].
- "runs not dequeued yet" → statuses: ["PENDING", "PENDING_VERSION", "DELAYED"]
- If they say "only failed" then only use "COMPLETED_WITH_ERRORS".
Expand Down Expand Up @@ -232,7 +242,9 @@ export class AIRunFilterService {
}

// Validate the filters against the schema to catch any issues
const validationResult = AIFilters.safeParse(result.experimental_output.filters);
const validationResult = AIFilters.safeParse(
result.experimental_output.filters,
);
if (!validationResult.success) {
logger.error("AI filter validation failed", {
errors: validationResult.error.errors,
Expand All @@ -252,7 +264,9 @@ export class AIRunFilterService {
from: validationResult.data.from
? new Date(validationResult.data.from).getTime()
: undefined,
to: validationResult.data.to ? new Date(validationResult.data.to).getTime() : undefined,
to: validationResult.data.to
? new Date(validationResult.data.to).getTime()
: undefined,
},
};
} catch (error) {
Expand Down
10 changes: 10 additions & 0 deletions docs/docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,16 @@
}
]
},
{
"group": "Use cases",
"pages": [
"guides/use-cases/overview",
"guides/use-cases/data-processing-etl",
"guides/use-cases/media-generation",
"guides/use-cases/media-processing",
"guides/use-cases/marketing"
]
},
{
"group": "Example projects",
"pages": [
Expand Down
4 changes: 4 additions & 0 deletions docs/guides/introduction.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ mode: "center"
<Card title="SvelteKit" img="/images/logo-svelte.png" href="/guides/community/sveltekit" />
</CardGroup>

import UseCasesCards from "/snippets/use-cases-cards.mdx";

## Guides

Get set up fast using our detailed walk-through guides.
Expand All @@ -39,6 +41,8 @@ Get set up fast using our detailed walk-through guides.
| [Using webhooks in Next.js](/guides/frameworks/nextjs-webhooks) | Trigger tasks from a webhook in Next.js |
| [Using webhooks in Remix](/guides/frameworks/remix-webhooks) | Trigger tasks from a webhook in Remix |

<UseCasesCards />

## Example projects

Example projects are full projects with example repos you can fork and use. These are a great way of learning how to use Trigger.dev in your projects.
Expand Down
159 changes: 159 additions & 0 deletions docs/guides/use-cases/data-processing-etl.mdx
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
---
title: "Data processing & ETL workflows"
sidebarTitle: "Data processing & ETL"
description: "Learn how to use Trigger.dev for data processing and ETL (Extract, Transform, Load), including web scraping, database synchronization, batch enrichment and more."
---

import UseCasesCards from "/snippets/use-cases-cards.mdx";

## Overview

Build complex data pipelines that process large datasets without timeouts. Handle streaming analytics, batch enrichment, web scraping, database sync, and file processing with automatic retries and progress tracking.

## Featured examples

<CardGroup cols={3}>
<Card
title="Realtime CSV importer"
icon="book"
href="/guides/example-projects/realtime-csv-importer"
>
Import CSV files with progress streamed live to frontend.
</Card>
<Card title="Web scraper with BrowserBase" icon="book" href="/guides/examples/scrape-hacker-news">
Scrape websites using BrowserBase and Puppeteer.
</Card>
<Card
title="Supabase database webhooks"
icon="book"
href="/guides/frameworks/supabase-edge-functions-database-webhooks"
>
Trigger tasks from Supabase database webhooks.
</Card>
</CardGroup>

## Benefits of using Trigger.dev for data processing & ETL workflows

**Process datasets for hours without timeouts:** Handle multi-hour transformations, large file processing, or complete database exports. No execution time limits.

**Parallel processing with built-in rate limiting:** Process thousands of records simultaneously while respecting API rate limits. Scale efficiently without overwhelming downstream services.

**Stream progress to your users in real-time:** Show row-by-row processing status updating live in your dashboard. Users see exactly where processing is and how long remains.

## Production use cases

<CardGroup cols={1}>
<Card title="MagicSchool AI customer story" href="https://trigger.dev/customers/magicschool-ai-customer-story">

Read how MagicSchool AI uses Trigger.dev to generate insights from millions of student interactions.

</Card>

<Card title="Comp AI customer story" href="https://trigger.dev/customers/comp-ai-customer-story">

Read how Comp AI uses Trigger.dev to automate evidence collection at scale, powering their open source, AI-driven compliance platform.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Add hyphen to "open source" when used as adjective modifier.

Line 54 should use "open-source" (hyphenated) when modifying "compliance platform."

Apply this diff:

-Read how Comp AI uses Trigger.dev to automate evidence collection at scale, powering their open source, AI-driven compliance platform.
+Read how Comp AI uses Trigger.dev to automate evidence collection at scale, powering their open-source, AI-driven compliance platform.
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
Read how Comp AI uses Trigger.dev to automate evidence collection at scale, powering their open source, AI-driven compliance platform.
Read how Comp AI uses Trigger.dev to automate evidence collection at scale, powering their open-source, AI-driven compliance platform.
🧰 Tools
🪛 LanguageTool

[grammar] ~54-~54: Use a hyphen to join words.
Context: ...collection at scale, powering their open source, AI-driven compliance platform. ...

(QB_NEW_EN_HYPHEN)

🤖 Prompt for AI Agents
In docs/guides/use-cases/data-processing-etl.mdx around line 54, the phrase
"open source, AI-driven compliance platform" uses "open source" as an adjectival
modifier; change it to the hyphenated form "open-source" so it reads
"open-source, AI-driven compliance platform." Make only this wording change and
keep punctuation and spacing consistent.


</Card>
<Card title="Midday customer story" href="https://trigger.dev/customers/midday-customer-story">

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Fix verb agreement: "Midday use" → "Midday uses".

Line 58 has a subject-verb agreement error.

Apply this diff:

-Read how Midday use Trigger.dev to sync large volumes of bank transactions in their financial management platform.
+Read how Midday uses Trigger.dev to sync large volumes of bank transactions in their financial management platform.

Committable suggestion skipped: line range outside the PR's diff.

🧰 Tools
🪛 LanguageTool

[style] ~58-~58: Three successive sentences begin with the same word. Consider rewording the sentence or use a thesaurus to find a synonym.
Context: ....dev/customers/midday-customer-story"> Read how Midday use Trigger.dev to sync larg...

(ENGLISH_WORD_REPEAT_BEGINNING_RULE)

🤖 Prompt for AI Agents
In docs/guides/use-cases/data-processing-etl.mdx around line 58, change the
phrase "Midday use" to "Midday uses" to correct subject-verb/noun-number
agreement; update the text at that line so the plural form "uses" is used where
the context refers to multiple or general usages.

Read how Midday use Trigger.dev to sync large volumes of bank transactions in their financial management platform.

</Card>
</CardGroup>

## Example workflow patterns

<Tabs>
<Tab title="CSV file import">
Simple CSV import pipeline. Receives file upload, parses CSV rows, validates data, imports to database with progress tracking.

<div align="center">

```mermaid
graph TB
A[importCSV] --> B[parseCSVFile]
B --> C[validateRows]
C --> D[bulkInsertToDB]
D --> E[notifyCompletion]
```

</div>
</Tab>

<Tab title="Multi-source ETL pipeline">
**Coordinator pattern with parallel extraction**. Batch triggers parallel extraction from multiple sources (APIs, databases, S3), transforms and validates data, loads to data warehouse with monitoring.

<div align="center">

```mermaid
graph TB
A[runETLPipeline] --> B[coordinateExtraction]
B --> C[batchTriggerAndWait]

C --> D[extractFromAPI]
C --> E[extractFromDatabase]
C --> F[extractFromS3]

D --> G[transformData]
E --> G
F --> G

G --> H[validateData]
H --> I[loadToWarehouse]
```

</div>
</Tab>

<Tab title="Parallel web scraping">
**Coordinator pattern with browser automation**. Launches headless browsers in parallel to scrape multiple pages, extracts structured data, cleans and normalizes content, stores in database.

<div align="center">

```mermaid
graph TB
A[scrapeSite] --> B[coordinateScraping]
B --> C[batchTriggerAndWait]

C --> D[scrapePage1]
C --> E[scrapePage2]
C --> F[scrapePageN]

D --> G[cleanData]
E --> G
F --> G

G --> H[normalizeData]
H --> I[storeInDatabase]
```

</div>
</Tab>

<Tab title="Batch data enrichment">
**Coordinator pattern with rate limiting**. Fetches records needing enrichment, batch triggers parallel API calls with configurable concurrency to respect rate limits, validates enriched data, updates database.

<div align="center">

```mermaid
graph TB
A[enrichRecords] --> B[fetchRecordsToEnrich]
B --> C[coordinateEnrichment]
C --> D[batchTriggerAndWait]

D --> E[enrichRecord1]
D --> F[enrichRecord2]
D --> G[enrichRecordN]

E --> H[validateEnrichedData]
F --> H
G --> H

H --> I[updateDatabase]
```

</div>
</Tab>
</Tabs>

<UseCasesCards />
Loading