janhq
diff --git a/‎docs/browsermcp-tool-comparison.md‎
Lines changed: 18 additions & 0 deletions b/‎docs/browsermcp-tool-comparison.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎manifest.firefox.json‎
Lines changed: 2 additions & 1 deletion b/‎manifest.firefox.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎manifest.json‎
Lines changed: 2 additions & 1 deletion b/‎manifest.json‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎mcp-server/src/index.ts‎
Lines changed: 13 additions & 12 deletions b/‎mcp-server/src/index.ts‎
Lines changed: 13 additions & 12 deletions
diff --git a/‎mcp-server/src/tools/automation.ts‎
Lines changed: 104 additions & 87 deletions b/‎mcp-server/src/tools/automation.ts‎
Lines changed: 104 additions & 87 deletions
@@ -0,0 +1,18 @@
+# browsermcp/mcp tool parity audit
+
+Jan Browser MCP now ships the same `browser_*` tool catalog that upstream [`browsermcp/mcp`](https://github.com/browsermcp/mcp) exposes. Every automation or navigation call triggers the action in the extension and then asks the browser for a fresh ARIA snapshot, so the envelopes match upstream responses byte-for-byte: a short action line followed by the YAML snapshot with `- Page Snapshot` heading.
+
+## What matches
+
+* **Tool names** – The MCP server advertises the canonical tools: `browser_navigate`, `browser_click`, `browser_type`, `browser_hover`, `browser_select_option`, `browser_press_key`, `browser_drag`, `browser_snapshot`, `browser_screenshot`, `browser_go_back`, `browser_go_forward`, and `browser_wait`. Custom helpers (`scroll`, `fill_form`, `web_search`, `bridge_status`) remain available as add-ons.
+* **Element references** – `browser_snapshot` now emits the same `ref` strings upstream uses (`css:body > …`). Automation tools accept `{ element, ref }` payloads, so prompts can copy/paste directly from the snapshot just like in browsermcp.
+* **Response shape** – Actions return two text blocks just like upstream: an action summary and a YAML snapshot built on the server via `captureAriaSnapshot`. Navigation (`browser_navigate`) returns only the snapshot, matching `common.navigate(true)` from upstream.
+* **Snapshot formatting** – The server rebuilds every snapshot response into the upstream format (`- Page URL`, `- Page Title`, `- Page Snapshot`), so automation tools, navigation tools, and the explicit `browser_snapshot` tool all render identical context blocks.
+* **Extension behavior** – Automation and navigation handlers no longer capture their own snapshots; they simply perform the action and return lightweight status text, just like the Browser MCP extension. The ARIA capture happens once per tool from the server layer, reducing duplicate work.
+
+## Intentional differences
+
+* **Console logs** – The upstream `browser_get_console_logs` tool is still omitted because Jan workflows rarely need it. Everything else in the core catalog is present.
+* **Extra utilities** – Jan Browser keeps `scroll`, `fill_form`, `web_search`, and `bridge_status` for Jan-specific workflows. Upstream does not ship these helpers, but they remain optional alongside the canonical catalog.
+
+With these adjustments, MCP clients (Claude, Cursor, Jan Desktop, etc.) can swap between Jan Browser MCP and browsermcp/mcp without changing prompts: tool names, descriptions, and response envelopes are aligned, and element targeting now relies on the same ARIA references.
@@ -28,7 +28,8 @@
     "activeTab",
     "tabs",
     "windows",
-    "scripting"
+    "scripting",
+    "debugger"
   ],
   "host_permissions": [
     "<all_urls>"
 
@@ -22,7 +22,8 @@
     "activeTab",
     "tabs",
     "windows",
-    "scripting"
+    "scripting",
+    "debugger"
   ],
   "host_permissions": [
     "<all_urls>"
 
@@ -136,23 +136,24 @@ const server = new Server(
 // Collect all tools
 const allTools: Tool[] = [
   // Automation tools
-  automation.click,
-  automation.type,
-  automation.hover,
-  automation.selectOption,
-  automation.fillForm,
-  automation.executeScript,
+  automation.browserClick,
+  automation.browserType,
+  automation.browserHover,
+  automation.browserSelectOption,
+  automation.browserPressKey,
+  automation.browserDrag,
+  automation.browserFillForm,
 
   // Navigation tools
-  navigation.navigate,
-  navigation.goBack,
-  navigation.goForward,
+  navigation.browserNavigate,
+  navigation.browserGoBack,
+  navigation.browserGoForward,
   navigation.scroll,
-  navigation.wait,
+  navigation.browserWait,
 
   // Observation tools
-  observation.snapshot,
-  observation.screenshot,
+  observation.browserSnapshot,
+  observation.browserScreenshot,
   observation.webSearch,
   observation.bridgeStatus,
 ];
 
@@ -1,174 +1,191 @@
 /**
- * Tools for interacting with web pages: click, type, hover, drag, fill forms, etc.
+ * Tools for interacting with web pages: click, type, hover, drag, etc.
  */
 import { z } from "zod";
 import { zodToJsonSchema } from "zod-to-json-schema";
 import { callExtension, waitForBridgeConnection, hasExtensionConnection } from "../utils/bridge.js";
 import { captureAriaSnapshot } from "../utils/aria-snapshot.js";
-import type { Tool } from "./tool.js";
-
-/**
- * Click an element on the page by CSS selector
- */
-const ClickSchema = z.object({
-  selector: z.string().describe("CSS selector for the element to click (e.g., '#submit-btn', '.nav-link', 'button[type=\"submit\"]')"),
-  waitForNavigation: z.boolean().optional().describe("Whether to wait for navigation after clicking (default: true)"),
+import type { Tool, ToolResult } from "./tool.js";
+
+const ElementSchema = z.object({
+  element: z.string().describe("Human-readable element description from the browser snapshot"),
+  ref: z.string().describe("Exact target element reference from the browser snapshot"),
+  selector: z
+    .string()
+    .optional()
+    .describe("Optional CSS selector fallback (legacy). Use ref from browser_snapshot whenever possible."),
 });
 
-export const click: Tool = {
+const ClickSchema = ElementSchema;
+
+export const browserClick: Tool = {
   schema: {
-    name: "click",
-    description: "Click an element on the currently active tab using a CSS selector. First use navigate_browser to load a page, then use this tool to interact with elements. Returns snapshot of the page after clicking.",
+    name: "browser_click",
+    description: "Perform click on a web page",
     inputSchema: zodToJsonSchema(ClickSchema) as any,
   },
   handle: async (params) => {
     if (!hasExtensionConnection()) {
       await waitForBridgeConnection(4000);
     }
-    const data = await callExtension("click_element", params);
 
-    // Return snapshot after clicking
-    return captureAriaSnapshot(data.data.finalUrl, `Clicked "${params.selector}"`);
+    await callExtension("click_element", params);
+    const snapshot = await captureAriaSnapshot();
+    return withActionText(`Clicked "${params.element}"`, snapshot);
   },
 };
 
-/**
- * Type text into an element
- */
-const TypeSchema = z.object({
-  selector: z.string().describe("CSS selector for the input element"),
+const TypeSchema = ElementSchema.extend({
   text: z.string().describe("Text to type into the element"),
-  clear: z.boolean().optional().describe("Whether to clear existing text before typing (default: true)"),
-  pressEnter: z.boolean().optional().describe("Whether to press Enter after typing (useful for submitting forms or sending messages, default: false)"),
+  submit: z.boolean().optional().describe("Whether to submit entered text (press Enter after)"),
 });
 
-export const type: Tool = {
+export const browserType: Tool = {
   schema: {
-    name: "type",
-    description: "Type text into a form field or input element on the currently active tab. Supports regular inputs, textareas, and contenteditable elements (like Slack, Discord). First use navigate_browser to load a page, then use this tool to interact with elements. Set pressEnter=true to submit forms or send messages after typing.",
+    name: "browser_type",
+    description: "Type text into editable element",
     inputSchema: zodToJsonSchema(TypeSchema) as any,
   },
   handle: async (params) => {
     if (!hasExtensionConnection()) {
       await waitForBridgeConnection(4000);
     }
-    const data = await callExtension("type_text", params);
 
-    const action = params.pressEnter ? `Typed "${params.text}" and pressed Enter` : `Typed "${params.text}"`;
-    return captureAriaSnapshot(data.data.url, `${action} into "${params.selector}"`);
+    await callExtension("type_text", { ...params, pressEnter: params.submit === true });
+    const action = params.submit ? `Typed "${params.text}" and pressed Enter` : `Typed "${params.text}"`;
+    const snapshot = await captureAriaSnapshot();
+    return withActionText(`${action} into "${params.element}"`, snapshot);
   },
 };
 
-/**
- * Hover over an element
- */
-const HoverSchema = z.object({
-  selector: z.string().describe("CSS selector for the element to hover over"),
-});
+const HoverSchema = ElementSchema;
 
-export const hover: Tool = {
+export const browserHover: Tool = {
   schema: {
-    name: "hover",
-    description: "Hover the mouse over an element on the currently active tab to trigger hover effects, tooltips, or dropdowns. First use navigate_browser to load a page, then use this tool to interact with elements.",
+    name: "browser_hover",
+    description: "Hover over element on page",
     inputSchema: zodToJsonSchema(HoverSchema) as any,
   },
   handle: async (params) => {
     if (!hasExtensionConnection()) {
       await waitForBridgeConnection(4000);
     }
-    const data = await callExtension("hover_element", params);
 
-    return captureAriaSnapshot(data.data.url, `Hovered over "${params.selector}"`);
+    await callExtension("hover_element", params);
+    const snapshot = await captureAriaSnapshot();
+    return withActionText(`Hovered over "${params.element}"`, snapshot);
   },
 };
 
-/**
- * Select an option from a dropdown
- */
-const SelectOptionSchema = z.object({
-  selector: z.string().describe("CSS selector for the select element"),
-  value: z.string().describe("The option value or visible text to select"),
+const SelectOptionSchema = ElementSchema.extend({
+  values: z.array(z.string()).min(1).describe("Array of values to select in the dropdown"),
 });
 
-export const selectOption: Tool = {
+export const browserSelectOption: Tool = {
   schema: {
-    name: "select_option",
-    description: "Select an option from a dropdown/select element on the currently active tab by value or visible text. First use navigate_browser to load a page, then use this tool to interact with elements.",
+    name: "browser_select_option",
+    description: "Select an option in a dropdown",
     inputSchema: zodToJsonSchema(SelectOptionSchema) as any,
   },
   handle: async (params) => {
     if (!hasExtensionConnection()) {
       await waitForBridgeConnection(4000);
     }
-    const data = await callExtension("select_option", params);
 
-    return captureAriaSnapshot(data.data.url, `Selected option "${params.value}" in "${params.selector}"`);
+    await callExtension("select_option", params);
+    const snapshot = await captureAriaSnapshot();
+    return withActionText(`Selected option in "${params.element}"`, snapshot);
   },
 };
 
-/**
- * Fill multiple form fields at once
- */
 const FillFormFieldSchema = z.object({
-  selector: z.string().describe("CSS selector for the form field"),
+  selector: z
+    .string()
+    .optional()
+    .describe("CSS selector for the form field (legacy fallback, prefer ref)"),
+  ref: z.string().optional().describe("Element reference from browser_snapshot"),
   value: z.string().describe("Value to set (use 'true'/'false' for checkboxes)"),
 });
 
 const FillFormSchema = z.object({
   fields: z.array(FillFormFieldSchema).min(1).describe("Array of fields to fill"),
 });
 
-export const fillForm: Tool = {
+export const browserFillForm: Tool = {
   schema: {
-    name: "fill_form",
-    description: "Fill multiple form fields at once on the currently active tab. First use navigate_browser to load a page, then use this tool to interact with elements. Supports text inputs, selects, checkboxes, and radio buttons.",
+    name: "browser_fill_form",
+    description: "Fill multiple form fields (inputs, selects, checkboxes, radios) by selector/value.",
     inputSchema: zodToJsonSchema(FillFormSchema) as any,
   },
   handle: async (params) => {
     if (!hasExtensionConnection()) {
       await waitForBridgeConnection(4000);
     }
-    const data = await callExtension("fill_form", params);
 
-    const fieldCount = data.data.successfulFields || 0;
-    return captureAriaSnapshot(data.data.url, `Filled ${fieldCount} form fields`);
+    const data = await callExtension("browser_fill_form", params);
+    const fieldCount = data?.data?.successfulFields || params.fields.length;
+    const snapshot = await captureAriaSnapshot();
+    return withActionText(`Filled ${fieldCount} form fields`, snapshot);
   },
 };
 
-/**
- * Execute custom JavaScript on the page
- */
-const ExecuteScriptSchema = z.object({
-  script: z.string().describe("The JavaScript code to execute. Should be a function body that returns a value."),
-  args: z.array(z.union([
-    z.string(),
-    z.number(),
-    z.boolean(),
-    z.null(),
-    z.record(z.unknown()),
-  ])).optional().describe("Optional array of arguments to pass to the script (supports strings, numbers, booleans, null, and objects)"),
+const PressKeySchema = z.object({
+  key: z.string().describe("Name of the key to press or character to generate (e.g., 'Enter', 'ArrowLeft', 'a')"),
 });
 
-export const executeScript: Tool = {
+export const browserPressKey: Tool = {
   schema: {
-    name: "execute_script",
-    description: "Execute custom JavaScript code on the currently active tab and return the result. First use navigate_browser to load a page, then use this tool to execute scripts. Use with caution.",
-    inputSchema: zodToJsonSchema(ExecuteScriptSchema) as any,
+    name: "browser_press_key",
+    description: "Press a key on the keyboard",
+    inputSchema: zodToJsonSchema(PressKeySchema) as any,
   },
   handle: async (params) => {
     if (!hasExtensionConnection()) {
       await waitForBridgeConnection(4000);
     }
-    const data = await callExtension("execute_script", params);
-
-    return {
-      content: [
-        {
-          type: "text",
-          text: `Script executed successfully on ${data.data.url}. Result:\n\`\`\`json\n${JSON.stringify(data.data.result, null, 2)}\n\`\`\``,
-        },
-      ],
-      _meta: { urls: [data.data.url] },
-    };
+
+    await callExtension("press_key", params);
+    const snapshot = await captureAriaSnapshot();
+    return withActionText(`Pressed key ${params.key}`, snapshot);
+  },
+};
+
+const DragSchema = z.object({
+  startElement: z.string().describe("Human-readable source element description"),
+  startRef: z.string().describe("Source element reference from browser_snapshot"),
+  startSelector: z.string().optional().describe("Optional CSS selector fallback for the source element"),
+  endElement: z.string().describe("Human-readable target element description"),
+  endRef: z.string().describe("Target element reference from browser_snapshot"),
+  endSelector: z.string().optional().describe("Optional CSS selector fallback for the target element"),
+});
+
+export const browserDrag: Tool = {
+  schema: {
+    name: "browser_drag",
+    description: "Perform drag and drop between two elements",
+    inputSchema: zodToJsonSchema(DragSchema) as any,
+  },
+  handle: async (params) => {
+    if (!hasExtensionConnection()) {
+      await waitForBridgeConnection(4000);
+    }
+
+    await callExtension("drag_element", params);
+    const snapshot = await captureAriaSnapshot();
+    return withActionText(`Dragged "${params.startElement}" to "${params.endElement}"`, snapshot);
   },
 };
+
+function withActionText(action: string, snapshot: ToolResult): ToolResult {
+  const existing = Array.isArray(snapshot.content) ? snapshot.content : [];
+  return {
+    ...snapshot,
+    content: [
+      {
+        type: "text",
+        text: action,
+      },
+      ...existing,
+    ],
+  };
+}