Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions docs/mkdocs/docs/miro_api.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# MiroAPI

!!! warning "Preview Documentation"
This service is currently in preview and limited to internal access. Public release will follow once it is production-ready.

## Overview
MiroAPI provides an internal caching layer for Serper Search and Jina Scrape to reduce costs, speed up development, and enable reproducible "go-back-in-time" sandbox runs by serving recorded results when available.

### Step 1: Apply for a MiroAPI key
Request a MiroAPI key through the internal portal.

### Step 2: Configure .env
```
# API for Google Search (recommended)
SERPER_API_KEY="svc-miro-api01-replace-with-your-key"
SERPER_BASE_URL="https://miro-api.miromind.site/serper"

# API for Web Scraping (recommended)
JINA_API_KEY="svc-miro-api01-replace-with-your-key"
JINA_BASE_URL="https://miro-api.miromind.site/jina"
```



1 change: 1 addition & 0 deletions docs/mkdocs/mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ nav:
- tool-python: tool_python.md
- Advanced Features:
- E2B Advanced Features: e2b_advanced_features.md
- MiroAPI: miro_api.md
- Add New Tools: contribute_tools.md

- LLM Clients:
Expand Down
165 changes: 165 additions & 0 deletions src/tool/mcp_servers/miroapi_serper_mcp_server.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# Copyright 2025 Miromind.ai
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
adapted from
https://github.com/MiroMindAI/MiroRL/blob/5073693549ffe05a157a1886e87650ef3be6606e/mirorl/tools/serper_search.py#L1
"""

import os
from typing import Any, Dict

import requests
from mcp.server.fastmcp import FastMCP
from tenacity import (
retry,
retry_if_exception_type,
stop_after_attempt,
wait_exponential,
)

from .utils.url_unquote import decode_http_urls_in_dict

SERPER_BASE_URL = os.getenv("SERPER_BASE_URL", "https://google.serper.dev")
SERPER_API_KEY = os.getenv("SERPER_API_KEY", "")


# Initialize FastMCP server
mcp = FastMCP("serper-mcp-server")


@retry(
stop=stop_after_attempt(3),
wait=wait_exponential(multiplier=1, min=4, max=10),
retry=retry_if_exception_type(
(requests.ConnectionError, requests.Timeout, requests.HTTPError)
),
)
def make_serper_request(
payload: Dict[str, Any], headers: Dict[str, str]
) -> requests.Response:
"""Make HTTP request to Serper API with retry logic."""
response = requests.post(f"{SERPER_BASE_URL}/search", json=payload, headers=headers)
response.raise_for_status()
return response


def _is_huggingface_dataset_or_space_url(url):
"""
Check if the URL is a HuggingFace dataset or space URL.
:param url: The URL to check
:return: True if it's a HuggingFace dataset or space URL, False otherwise
"""
if not url:
return False
return "huggingface.co/datasets" in url or "huggingface.co/spaces" in url


@mcp.tool()
def google_search(
q: str,
gl: str = "us",
hl: str = "en",
location: str | None = None,
num: int | None = None,
tbs: str | None = None,
page: int | None = None,
autocorrect: bool | None = None,
) -> Dict[str, Any]:
"""
Tool to perform web searches via Serper API and retrieve rich results.
It is able to retrieve organic search results, people also ask,
related searches, and knowledge graph.
Args:
q: Search query string
gl: Optional region code for search results in ISO 3166-1 alpha-2 format (e.g., 'us')
hl: Optional language code for search results in ISO 639-1 format (e.g., 'en')
location: Optional location for search results (e.g., 'SoHo, New York, United States', 'California, United States')
num: Number of results to return (default: 10)
tbs: Time-based search filter ('qdr:h' for past hour, 'qdr:d' for past day, 'qdr:w' for past week,
'qdr:m' for past month, 'qdr:y' for past year)
page: Page number of results to return (default: 1)
autocorrect: Whether to autocorrect spelling in query
Returns:
Dictionary containing search results and metadata.
"""
# Check for API key
if not SERPER_API_KEY:
return {
"success": False,
"error": "SERPER_API_KEY environment variable not set",
"results": [],
}

# Validate required parameter
if not q or not q.strip():
return {
"success": False,
"error": "Search query 'q' is required and cannot be empty",
"results": [],
}

try:
# Build payload with all supported parameters
payload: dict[str, Any] = {
Copy link

Copilot AI Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use the imported Dict type from typing instead of the built-in dict for consistency with the function signature and other type annotations in the file.

Suggested change
payload: dict[str, Any] = {
payload: Dict[str, Any] = {

Copilot uses AI. Check for mistakes.
"q": q.strip(),
"gl": gl,
"hl": hl,
}

# Add optional parameters if provided
if location:
payload["location"] = location
if num is not None:
payload["num"] = num
else:
payload["num"] = 10 # Default
if tbs:
payload["tbs"] = tbs
if page is not None:
payload["page"] = page
if autocorrect is not None:
payload["autocorrect"] = autocorrect

# Set up headers
headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"}

# Make the API request
response = make_serper_request(payload, headers)
data = response.json()

# filter out HuggingFace dataset or space urls
organic_results = []
if "organic" in data:
for item in data["organic"]:
if _is_huggingface_dataset_or_space_url(item.get("link", "")):
continue
organic_results.append(item)

# Keep all original fields, but overwrite "organic"
response_data = dict(data)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

response_data["organic"] = organic_results
response_data = decode_http_urls_in_dict(response_data)

return response_data

except Exception as e:
return {"success": False, "error": f"Unexpected error: {str(e)}", "results": []}


if __name__ == "__main__":
mcp.run()
46 changes: 38 additions & 8 deletions src/tool/mcp_servers/searching_mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#
# SPDX-License-Identifier: Apache-2.0

import sys
import os
import json
import requests
Expand All @@ -17,7 +18,11 @@


SERPER_API_KEY = os.environ.get("SERPER_API_KEY", "")
SERPER_BASE_URL = os.environ.get("SERPER_BASE_URL", "https://google.serper.dev")
JINA_API_KEY = os.environ.get("JINA_API_KEY", "")
JINA_BASE_URL = os.environ.get("JINA_BASE_URL", "https://r.jina.ai")

IS_MIRO_API = True if "miro" in SERPER_BASE_URL or "miro" in JINA_BASE_URL else False

# Google search result filtering environment variables
REMOVE_SNIPPETS = os.environ.get("REMOVE_SNIPPETS", "").lower() in ("true", "1", "yes")
Expand Down Expand Up @@ -122,11 +127,18 @@ async def google_search(
arguments["location"] = location
if tbs:
arguments["tbs"] = tbs
server_params = StdioServerParameters(
command="npx",
args=["-y", "serper-search-scrape-mcp-server"],
env={"SERPER_API_KEY": SERPER_API_KEY},
)
if IS_MIRO_API:
server_params = StdioServerParameters(
command=sys.executable,
args=["-m", "src.tool.mcp_servers.miroapi_serper_mcp_server"],
env={"SERPER_API_KEY": SERPER_API_KEY, "SERPER_BASE_URL": SERPER_BASE_URL},
)
else:
server_params = StdioServerParameters(
command="npx",
args=["-y", "serper-search-scrape-mcp-server"],
env={"SERPER_API_KEY": SERPER_API_KEY},
)
result_content = ""
retry_count = 0
max_retries = 5
Expand Down Expand Up @@ -348,7 +360,12 @@ async def search_wiki_revision(
content = await smart_request(
url=base_url,
params=params,
env={"SERPER_API_KEY": SERPER_API_KEY, "JINA_API_KEY": JINA_API_KEY},
env={
"SERPER_API_KEY": SERPER_API_KEY,
"JINA_API_KEY": JINA_API_KEY,
"SERPER_BASE_URL": SERPER_BASE_URL,
"JINA_BASE_URL": JINA_BASE_URL,
},
)
data = request_to_json(content)

Expand Down Expand Up @@ -527,6 +544,8 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) ->
env={
"SERPER_API_KEY": SERPER_API_KEY,
"JINA_API_KEY": JINA_API_KEY,
"SERPER_BASE_URL": SERPER_BASE_URL,
"JINA_BASE_URL": JINA_BASE_URL,
},
)
data = request_to_json(content)
Expand Down Expand Up @@ -585,7 +604,12 @@ async def search_archived_webpage(url: str, year: int, month: int, day: int) ->
content = await smart_request(
url=base_url,
params={"url": url},
env={"SERPER_API_KEY": SERPER_API_KEY, "JINA_API_KEY": JINA_API_KEY},
env={
"SERPER_API_KEY": SERPER_API_KEY,
"JINA_API_KEY": JINA_API_KEY,
"SERPER_BASE_URL": SERPER_BASE_URL,
"JINA_BASE_URL": JINA_BASE_URL,
},
)
data = request_to_json(content)
if "archived_snapshots" in data and "closest" in data["archived_snapshots"]:
Expand Down Expand Up @@ -664,7 +688,13 @@ async def scrape_website(url: str) -> str:
"""
# TODO: Long Content Handling
return await smart_request(
url, env={"SERPER_API_KEY": SERPER_API_KEY, "JINA_API_KEY": JINA_API_KEY}
url,
env={
"SERPER_API_KEY": SERPER_API_KEY,
"JINA_API_KEY": JINA_API_KEY,
"SERPER_BASE_URL": SERPER_BASE_URL,
"JINA_BASE_URL": JINA_BASE_URL,
},
)


Expand Down
28 changes: 18 additions & 10 deletions src/tool/mcp_servers/utils/smart_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,16 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str:
if env:
JINA_API_KEY = env.get("JINA_API_KEY", "")
SERPER_API_KEY = env.get("SERPER_API_KEY", "")
JINA_BASE_URL = env.get("JINA_BASE_URL", "https://r.jina.ai")
Copy link

Copilot AI Oct 13, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The JINA_BASE_URL variable is extracted from env but not assigned when env is None. This creates inconsistent behavior between the two branches - consider setting a default value in the else block as well.

Copilot uses AI. Check for mistakes.
else:
JINA_API_KEY = ""
SERPER_API_KEY = ""

if JINA_API_KEY == "" and SERPER_API_KEY == "":
return "[ERROR]: JINA_API_KEY and SERPER_API_KEY are not set, smart_request is not available."

IS_MIRO_API = True if "miro" in JINA_BASE_URL else False

# Auto-add https:// if no protocol is specified
protocol_hint = ""
if not url.startswith(("http://", "https://")):
Expand Down Expand Up @@ -65,21 +68,24 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str:
):
youtube_hint = "[NOTE]: If you need to get information about its visual or audio content, please use tool 'visual_audio_youtube_analyzing' instead. This tool may not be able to provide visual and audio content of a YouTube Video.\n\n"

content, jina_err = await scrape_jina(url, JINA_API_KEY)
content, jina_err = await scrape_jina(url, JINA_API_KEY, JINA_BASE_URL)
if jina_err:
error_msg += f"Failed to get content from Jina.ai: {jina_err}\n"
elif content is None or content.strip() == "":
error_msg += "No content got from Jina.ai.\n"
else:
return protocol_hint + youtube_hint + content

content, serper_err = await scrape_serper(url, SERPER_API_KEY)
if serper_err:
error_msg += f"Failed to get content from SERPER: {serper_err}\n"
elif content is None or content.strip() == "":
error_msg += "No content got from SERPER.\n"
else:
return protocol_hint + youtube_hint + content
if not IS_MIRO_API:
# Try Serper API for scraping if not using Miro API
# (Miro API does not support caching Serper scraping results)
content, serper_err = await scrape_serper(url, SERPER_API_KEY)
if serper_err:
error_msg += f"Failed to get content from SERPER: {serper_err}\n"
elif content is None or content.strip() == "":
error_msg += "No content got from SERPER.\n"
else:
return protocol_hint + youtube_hint + content

content, request_err = scrape_request(url)
if request_err:
Expand All @@ -99,7 +105,9 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str:
await asyncio.sleep(4**retry_count)


async def scrape_jina(url: str, jina_api_key: str) -> tuple[str, str]:
async def scrape_jina(
url: str, jina_api_key: str, jina_base_url: str
) -> tuple[str, str]:
# Use Jina.ai reader API to convert URL to LLM-friendly text
if jina_api_key == "":
return (
Expand All @@ -116,7 +124,7 @@ async def scrape_jina(url: str, jina_api_key: str) -> tuple[str, str]:
"X-With-Shadow-Dom": "true",
}

jina_url = f"https://r.jina.ai/{url}"
jina_url = f"{jina_base_url}/{url}"
try:
response = requests.get(jina_url, headers=jina_headers, timeout=120)
if response.status_code == 422:
Expand Down
Loading