Skip to content

Commit d24afd0

Browse files
committed
feat(search): switch to DuckDuckGo as default search provider with Google fallback
- Replace Google search as the default for web scraping - Implement performDuckDuckGoSearchAndScrape() with robust HTML parsing - Update manifest.json to include DuckDuckGo host permissions - Add user preference toggle for DuckDuckGo-only mode in Options - Update sidepanel to use unified SEARCH_AND_SCRAPE API - Add Checkbox component for consistent UI patterns - Resolves Google blocking issues by prioritizing DuckDuckGo
1 parent 4c3df7a commit d24afd0

File tree

9 files changed

+618
-32
lines changed

9 files changed

+618
-32
lines changed

bun.lock

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
"": {
55
"name": "jan-browser-extension",
66
"dependencies": {
7+
"@radix-ui/react-checkbox": "^1.1.6",
78
"@radix-ui/react-label": "^2.1.7",
89
"@radix-ui/react-popover": "^1.1.14",
910
"@radix-ui/react-scroll-area": "^1.2.9",
@@ -192,10 +193,12 @@
192193

193194
"@radix-ui/number": ["@radix-ui/[email protected]", "", {}, "sha512-MkKCwxlXTgz6CFoJx3pCwn07GKp36+aZyu/u2Ln2VrA5DcdyCZkASEDBTd8x5whTQQL5CiYf4prXKLcgQdv29g=="],
194195

195-
"@radix-ui/primitive": ["@radix-ui/[email protected].2", "", {}, "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA=="],
196+
"@radix-ui/primitive": ["@radix-ui/[email protected].3", "", {}, "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg=="],
196197

197198
"@radix-ui/react-arrow": ["@radix-ui/[email protected]", "", { "dependencies": { "@radix-ui/react-primitive": "2.1.3" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-F+M1tLhO+mlQaOWspE8Wstg+z6PwxwRd8oQ8IXceWz92kfAmalTRf0EjrouQeo7QssEPfCn05B4Ihs1K9WQ/7w=="],
198199

200+
"@radix-ui/react-checkbox": ["@radix-ui/[email protected]", "", { "dependencies": { "@radix-ui/primitive": "1.1.3", "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-context": "1.1.2", "@radix-ui/react-presence": "1.1.5", "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-controllable-state": "1.2.2", "@radix-ui/react-use-previous": "1.1.1", "@radix-ui/react-use-size": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-wBbpv+NQftHDdG86Qc0pIyXk5IR3tM8Vd0nWLKDcX8nNn4nXFOFwsKuqw2okA/1D/mpaAkmuyndrPJTYDNZtFw=="],
201+
199202
"@radix-ui/react-compose-refs": ["@radix-ui/[email protected]", "", { "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-z4eqJvfiNnFMHIIvXP3CY57y2WJs5g2v3X0zm9mEJkrkNv4rDxu+sg9Jh8EkXyeqBkB7SOcboo9dMVqhyrACIg=="],
200203

201204
"@radix-ui/react-context": ["@radix-ui/[email protected]", "", { "peerDependencies": { "@types/react": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react"] }, "sha512-jCi/QKUM2r1Ju5a3J64TH2A5SpKAgh0LpknyqdQ4m6DCV0xJ2HG1xARRwNGPQfi1SLdLWZ1OJz6F4OMBBNiGJA=="],
@@ -218,7 +221,7 @@
218221

219222
"@radix-ui/react-portal": ["@radix-ui/[email protected]", "", { "dependencies": { "@radix-ui/react-primitive": "2.1.3", "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-bpIxvq03if6UNwXZ+HTK71JLh4APvnXntDc6XOX8UVq4XQOVl7lwok0AvIl+b8zgCw3fSaVTZMpAPPagXbKmHQ=="],
220223

221-
"@radix-ui/react-presence": ["@radix-ui/[email protected].4", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-ueDqRbdc4/bkaQT3GIpLQssRlFgWaL/U2z/S31qRwwLWoxHLgry3SIfCwhxeQNbirEUXFa+lq3RL3oBYXtcmIA=="],
224+
"@radix-ui/react-presence": ["@radix-ui/[email protected].5", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-/jfEwNDdQVBCNvjkGit4h6pMOzq8bHkopq458dPt2lMjx+eBQUohZNG9A7DtO/O5ukSbxuaNGXMjHicgwy6rQQ=="],
222225

223226
"@radix-ui/react-primitive": ["@radix-ui/[email protected]", "", { "dependencies": { "@radix-ui/react-slot": "1.2.3" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-m9gTwRkhy2lvCPe6QJp4d3G1TYEUHn/FzJUtq9MjH46an1wJU+GdoGC5VLof8RX8Ft/DlpshApkhswDLZzHIcQ=="],
224227

@@ -762,7 +765,19 @@
762765

763766
"@babel/helper-compilation-targets/semver": ["[email protected]", "", { "bin": "bin/semver.js" }, "sha512-BR7VvDCVHO+q2xBEWskxS6DJE1qRnb7DxzUrogb71CWoSficBxYsiAGd+Kl0mmq/MprG9yArRkyrQxTO6XjMzA=="],
764767

765-
"@radix-ui/react-switch/@radix-ui/primitive": ["@radix-ui/[email protected]", "", {}, "sha512-JTF99U/6XIjCBo0wqkU5sK10glYe27MRRsfwoiq5zzOEZLHU3A3KCMa5X/azekYRCJ0HlwI0crAXS/5dEHTzDg=="],
768+
"@radix-ui/react-dismissable-layer/@radix-ui/primitive": ["@radix-ui/[email protected]", "", {}, "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA=="],
769+
770+
"@radix-ui/react-popover/@radix-ui/primitive": ["@radix-ui/[email protected]", "", {}, "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA=="],
771+
772+
"@radix-ui/react-popover/@radix-ui/react-presence": ["@radix-ui/[email protected]", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-ueDqRbdc4/bkaQT3GIpLQssRlFgWaL/U2z/S31qRwwLWoxHLgry3SIfCwhxeQNbirEUXFa+lq3RL3oBYXtcmIA=="],
773+
774+
"@radix-ui/react-scroll-area/@radix-ui/primitive": ["@radix-ui/[email protected]", "", {}, "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA=="],
775+
776+
"@radix-ui/react-scroll-area/@radix-ui/react-presence": ["@radix-ui/[email protected]", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-ueDqRbdc4/bkaQT3GIpLQssRlFgWaL/U2z/S31qRwwLWoxHLgry3SIfCwhxeQNbirEUXFa+lq3RL3oBYXtcmIA=="],
777+
778+
"@radix-ui/react-tooltip/@radix-ui/primitive": ["@radix-ui/[email protected]", "", {}, "sha512-XnbHrrprsNqZKQhStrSwgRUQzoCI1glLzdw79xiZPoofhGICeZRSQ3dIxAKH1gb3OHfNf4d6f+vAv3kil2eggA=="],
779+
780+
"@radix-ui/react-tooltip/@radix-ui/react-presence": ["@radix-ui/[email protected]", "", { "dependencies": { "@radix-ui/react-compose-refs": "1.1.2", "@radix-ui/react-use-layout-effect": "1.1.1" }, "peerDependencies": { "@types/react": "*", "@types/react-dom": "*", "react": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc", "react-dom": "^16.8 || ^17.0 || ^18.0 || ^19.0 || ^19.0.0-rc" }, "optionalPeers": ["@types/react", "@types/react-dom"] }, "sha512-ueDqRbdc4/bkaQT3GIpLQssRlFgWaL/U2z/S31qRwwLWoxHLgry3SIfCwhxeQNbirEUXFa+lq3RL3oBYXtcmIA=="],
766781

767782
"chalk/supports-color": ["[email protected]", "", { "dependencies": { "has-flag": "^4.0.0" } }, "sha512-qpCAvRl9stuOHveKsn7HncJRvv501qIacKzQlO/+Lwxc9+0q2wLyv4Dfvt80/DPn2pqOBsJdDiogXGR9+OvwRw=="],
768783

issue_google.md

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
# Issue: Google SERP scraping is unreliable/intermittent
2+
3+
## Summary
4+
- __Problem__: Our extension’s Google search scraping sometimes returns 0 results or stalls despite opening a SERP tab.
5+
- __Impact__: Blocks the MCP `search` tool and side panel features that depend on Google results; user-facing queries fail or degrade.
6+
- __Scope__: Background flow (`performGoogleSearchAndScrape()`), content script selectors/ready checks, and MCP bridge.
7+
8+
## Environment
9+
- Chrome Extension (MV3) with background service worker: `src/background.js`
10+
- Content script scraping: `src/content.js`
11+
- MCP bridge/server (websocket): `mcp/search-server/src/index.ts`
12+
13+
## Expected vs Actual
14+
- __Expected__: Given a query, open Google SERP and return N organic results (`numResults`, 1–10) + optional answer box.
15+
- __Actual__: Intermittently returns empty results or fails readiness; sometimes appears to stop on unrelated tabs.
16+
17+
## Repro (minimal)
18+
1. Start dev processes: `npm run dev:all`.
19+
2. From MCP client, call `search` with a query and `numResults` (e.g., 5).
20+
3. Observe occasional 0 results or readiness timeouts, despite SERP loading.
21+
22+
## What we tried (code references)
23+
- __Threaded numResults end-to-end__
24+
- Bridge handler forwards `params.numResults``performGoogleSearchAndScrape()` in `src/background.js`.
25+
- Content script respects `payload.numResults` in `SCRAPE_GOOGLE_SERP` (`src/content.js`).
26+
- __Realistic Google URLs__
27+
- Added `oq`, `sourceid=chrome`, `ie=UTF-8`, `hl`, `gl`, `sclient=gws-wiz-serp` in `performGoogleSearchAndScrape()` (`src/background.js`).
28+
- __Readiness polling with jitter__
29+
- `WAIT_FOR_SERP_READY` randomized interval; min results derived from `numResults` (`src/content.js` + `src/background.js`).
30+
- __Human-like interaction__
31+
- `HUMANIZE_SERP` small smooth scrolls and random waits before scraping (`src/content.js`, invoked from `src/background.js`).
32+
- __Robust scraping path + debug mode__
33+
- Primary selectors for organic results; fallback path; optional diagnostics (`SCRAPE_GOOGLE_SERP` in `src/content.js`).
34+
- __MCP bridge path__
35+
- `connectMcpBridge()``performGoogleSearchAndScrape()`; returns `{answerBox, results}` (`src/background.js`, `mcp/search-server/src/index.ts`).
36+
37+
## Observations
38+
- Google DOM/layout varies by locale/experiment; some pages delay organic nodes or interleave modules (Top stories, AI Overviews, People Also Ask), breaking early readiness assumptions.
39+
- Intermittent 0 organic results even when SERP visually loads (likely timing/selector drift).
40+
- Rare cases where active tab context/logs suggest focus on a non-SERP tab during scrape window.
41+
42+
## Proposed next steps
43+
- __Add DuckDuckGo fallback__ (if Google readiness fails or results.length === 0):
44+
- Open DDG SERP and scrape organic results with analogous handlers.
45+
- New content handlers: `WAIT_FOR_DDG_READY`, `SCRAPE_DDG_SERP` (`src/content.js`).
46+
- Gate by feature flag and surface source in the response payload.
47+
- __Selector hardening__ for Google:
48+
- Expand organic container selectors and ignore ads/modules consistently.
49+
- Add a second-chance readiness after minor scroll/short delay.
50+
- __Timeout/threshold tuning__:
51+
- Slightly increase `readinessTimeoutMs` for cold start; minResults heuristic based on `numResults` and page modules.
52+
- __Better tab targeting__:
53+
- Ensure scrape messages are sent only to the created SERP tab ID and verify URL origin before acting.
54+
- __Optional__: Introduce Google CSE HTTP fallback when running outside the browser (if policy allows and keys are available).
55+
56+
## Implementation plan
57+
58+
- __Permissions__
59+
- Add DuckDuckGo host permission in `manifest.json`: `https://duckduckgo.com/*`.
60+
61+
- __Background flow__ (`src/background.js`)
62+
- Track and use the created SERP `tabId` exclusively for readiness/scrape messaging.
63+
- Verify origin before acting using helpers like `isGoogleSerp(url)` / `isDuckDuckGoSerp(url)`.
64+
- Control flow: try Google → if readiness fails or 0 results, fall back to DDG and return its results.
65+
- Always include `{ source: 'google' | 'duckduckgo' }` in the returned payload, together with `results` and optional `answerBox`.
66+
- Tune timeouts: allow a slightly higher `readinessTimeoutMs` on first try; keep jittered polling; add a second-chance readiness after a small scroll.
67+
68+
- __Content script__ (`src/content.js`)
69+
- Google hardening: broaden organic-result detection under the main results area and consistently ignore ads/modules (Top stories, PAA, AI Overviews, etc.).
70+
- Add `WAIT_FOR_DDG_READY` and `SCRAPE_DDG_SERP` handlers with robust selectors that target organic result cards under the DDG results container, skipping sponsored/instant-answer blocks.
71+
- Add a second-chance readiness probe after a short delay + minor scroll before declaring failure.
72+
- Instrument debug logs and counts for readiness attempts, total results, and timing when debug mode is enabled.
73+
74+
- __MCP bridge__ (`mcp/search-server/src/index.ts`)
75+
- Ensure `numResults` threading and pass-through of `{ source, results, answerBox }` from background to MCP response.
76+
77+
- __UI__ (`ui/sidepanel/App.jsx`)
78+
- Surface the search source (Google/DDG) and basic diagnostics in debug mode.
79+
80+
- __Docs & tests__
81+
- Update `docs/SPEC*.md` on scraping sources and behavior.
82+
- Add a manual smoke test checklist and capture sample logs to verify readiness/scrape timing.
83+
84+
## Task checklist
85+
86+
- [ ] Add `https://duckduckgo.com/*` to `manifest.json` host permissions.
87+
- [ ] Background: restrict messages to the created SERP tab; verify URL origin; implement fallback to DDG; return `{ source, results, answerBox }`.
88+
- [ ] Content: implement `WAIT_FOR_DDG_READY` and `SCRAPE_DDG_SERP`; harden Google selectors; add second-chance readiness and gentle scroll.
89+
- [ ] Timing: tune `readinessTimeoutMs` and polling jitter; enable a one-time extended timeout on cold start.
90+
- [ ] Debug: add diagnostics counters/timing and expose them behind a debug flag.
91+
- [ ] MCP: thread `numResults` and include `source` in responses.
92+
- [ ] UI: display search `source` and show debug diagnostics in dev mode.
93+
- [ ] Docs: update SPEC/ADR and add selector notes; add a manual smoke-test checklist.
94+
95+
## Acceptance criteria
96+
- With `numResults` in [1..10], at least 95% of calls return the requested number from Google OR fall back to DDG with the same count.
97+
- No stalls; response includes source metadata: `{ source: 'google' | 'duckduckgo' }`.
98+
- Debug mode shows counts and timing diagnostics for readiness and scrape.

manifest.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,9 @@
2626
],
2727
"host_permissions": [
2828
"https://*/*",
29-
"http://*/*"
29+
"http://*/*",
30+
"https://duckduckgo.com/*",
31+
"https://html.duckduckgo.com/*"
3032
],
3133
"background": {
3234
"service_worker": "src/background.js",

package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
"author": "",
2323
"license": "Apache-2.0",
2424
"dependencies": {
25+
"@radix-ui/react-checkbox": "^1.1.6",
2526
"@radix-ui/react-label": "^2.1.7",
2627
"@radix-ui/react-popover": "^1.1.14",
2728
"@radix-ui/react-scroll-area": "^1.2.9",

0 commit comments

Comments
 (0)