-
Notifications
You must be signed in to change notification settings - Fork 154
feat(tool): add miroapi support #76
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,24 @@ | ||
| # MiroAPI | ||
|
|
||
| !!! warning "Preview Documentation" | ||
| This service is currently in preview and limited to internal access. Public release will follow once it is production-ready. | ||
|
|
||
| ## Overview | ||
| MiroAPI provides an internal caching layer for Serper Search and Jina Scrape to reduce costs, speed up development, and enable reproducible "go-back-in-time" sandbox runs by serving recorded results when available. | ||
|
|
||
| ### Step 1: Apply for a MiroAPI key | ||
| Request a MiroAPI key through the internal portal. | ||
|
|
||
| ### Step 2: Configure .env | ||
| ``` | ||
| # API for Google Search (recommended) | ||
| SERPER_API_KEY="svc-miro-api01-replace-with-your-key" | ||
| SERPER_BASE_URL="https://miro-api.miromind.site/serper" | ||
|
|
||
| # API for Web Scraping (recommended) | ||
| JINA_API_KEY="svc-miro-api01-replace-with-your-key" | ||
| JINA_BASE_URL="https://miro-api.miromind.site/jina" | ||
| ``` | ||
|
|
||
|
|
||
|
|
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,165 @@ | ||
| # Copyright 2025 Miromind.ai | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| """ | ||
| adapted from | ||
| https://github.com/MiroMindAI/MiroRL/blob/5073693549ffe05a157a1886e87650ef3be6606e/mirorl/tools/serper_search.py#L1 | ||
| """ | ||
|
|
||
| import os | ||
| from typing import Any, Dict | ||
|
|
||
| import requests | ||
| from mcp.server.fastmcp import FastMCP | ||
| from tenacity import ( | ||
| retry, | ||
| retry_if_exception_type, | ||
| stop_after_attempt, | ||
| wait_exponential, | ||
| ) | ||
|
|
||
| from .utils.url_unquote import decode_http_urls_in_dict | ||
|
|
||
| SERPER_BASE_URL = os.getenv("SERPER_BASE_URL", "https://google.serper.dev") | ||
| SERPER_API_KEY = os.getenv("SERPER_API_KEY", "") | ||
|
|
||
|
|
||
| # Initialize FastMCP server | ||
| mcp = FastMCP("serper-mcp-server") | ||
|
|
||
|
|
||
| @retry( | ||
| stop=stop_after_attempt(3), | ||
| wait=wait_exponential(multiplier=1, min=4, max=10), | ||
| retry=retry_if_exception_type( | ||
| (requests.ConnectionError, requests.Timeout, requests.HTTPError) | ||
| ), | ||
| ) | ||
| def make_serper_request( | ||
| payload: Dict[str, Any], headers: Dict[str, str] | ||
| ) -> requests.Response: | ||
| """Make HTTP request to Serper API with retry logic.""" | ||
| response = requests.post(f"{SERPER_BASE_URL}/search", json=payload, headers=headers) | ||
| response.raise_for_status() | ||
| return response | ||
|
|
||
|
|
||
| def _is_huggingface_dataset_or_space_url(url): | ||
| """ | ||
| Check if the URL is a HuggingFace dataset or space URL. | ||
| :param url: The URL to check | ||
| :return: True if it's a HuggingFace dataset or space URL, False otherwise | ||
| """ | ||
| if not url: | ||
| return False | ||
| return "huggingface.co/datasets" in url or "huggingface.co/spaces" in url | ||
|
|
||
|
|
||
| @mcp.tool() | ||
| def google_search( | ||
| q: str, | ||
| gl: str = "us", | ||
| hl: str = "en", | ||
| location: str | None = None, | ||
| num: int | None = None, | ||
| tbs: str | None = None, | ||
| page: int | None = None, | ||
| autocorrect: bool | None = None, | ||
| ) -> Dict[str, Any]: | ||
| """ | ||
| Tool to perform web searches via Serper API and retrieve rich results. | ||
| It is able to retrieve organic search results, people also ask, | ||
| related searches, and knowledge graph. | ||
| Args: | ||
| q: Search query string | ||
| gl: Optional region code for search results in ISO 3166-1 alpha-2 format (e.g., 'us') | ||
| hl: Optional language code for search results in ISO 639-1 format (e.g., 'en') | ||
| location: Optional location for search results (e.g., 'SoHo, New York, United States', 'California, United States') | ||
| num: Number of results to return (default: 10) | ||
| tbs: Time-based search filter ('qdr:h' for past hour, 'qdr:d' for past day, 'qdr:w' for past week, | ||
| 'qdr:m' for past month, 'qdr:y' for past year) | ||
| page: Page number of results to return (default: 1) | ||
| autocorrect: Whether to autocorrect spelling in query | ||
| Returns: | ||
| Dictionary containing search results and metadata. | ||
| """ | ||
| # Check for API key | ||
| if not SERPER_API_KEY: | ||
| return { | ||
| "success": False, | ||
| "error": "SERPER_API_KEY environment variable not set", | ||
| "results": [], | ||
| } | ||
|
|
||
| # Validate required parameter | ||
| if not q or not q.strip(): | ||
| return { | ||
| "success": False, | ||
| "error": "Search query 'q' is required and cannot be empty", | ||
| "results": [], | ||
| } | ||
|
|
||
| try: | ||
| # Build payload with all supported parameters | ||
| payload: dict[str, Any] = { | ||
| "q": q.strip(), | ||
| "gl": gl, | ||
| "hl": hl, | ||
| } | ||
|
|
||
| # Add optional parameters if provided | ||
| if location: | ||
| payload["location"] = location | ||
| if num is not None: | ||
| payload["num"] = num | ||
| else: | ||
| payload["num"] = 10 # Default | ||
| if tbs: | ||
| payload["tbs"] = tbs | ||
| if page is not None: | ||
| payload["page"] = page | ||
| if autocorrect is not None: | ||
| payload["autocorrect"] = autocorrect | ||
|
|
||
| # Set up headers | ||
| headers = {"X-API-KEY": SERPER_API_KEY, "Content-Type": "application/json"} | ||
|
|
||
| # Make the API request | ||
| response = make_serper_request(payload, headers) | ||
| data = response.json() | ||
|
|
||
| # filter out HuggingFace dataset or space urls | ||
| organic_results = [] | ||
| if "organic" in data: | ||
| for item in data["organic"]: | ||
| if _is_huggingface_dataset_or_space_url(item.get("link", "")): | ||
| continue | ||
| organic_results.append(item) | ||
|
|
||
| # Keep all original fields, but overwrite "organic" | ||
| response_data = dict(data) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. not sure if the returned object is the same shape. |
||
| response_data["organic"] = organic_results | ||
| response_data = decode_http_urls_in_dict(response_data) | ||
|
|
||
| return response_data | ||
|
|
||
| except Exception as e: | ||
| return {"success": False, "error": f"Unexpected error: {str(e)}", "results": []} | ||
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| mcp.run() | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,13 +30,16 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str: | |
| if env: | ||
| JINA_API_KEY = env.get("JINA_API_KEY", "") | ||
| SERPER_API_KEY = env.get("SERPER_API_KEY", "") | ||
| JINA_BASE_URL = env.get("JINA_BASE_URL", "https://r.jina.ai") | ||
|
||
| else: | ||
| JINA_API_KEY = "" | ||
| SERPER_API_KEY = "" | ||
|
|
||
| if JINA_API_KEY == "" and SERPER_API_KEY == "": | ||
| return "[ERROR]: JINA_API_KEY and SERPER_API_KEY are not set, smart_request is not available." | ||
|
|
||
| IS_MIRO_API = True if "miro" in JINA_BASE_URL else False | ||
|
|
||
| # Auto-add https:// if no protocol is specified | ||
| protocol_hint = "" | ||
| if not url.startswith(("http://", "https://")): | ||
|
|
@@ -65,21 +68,24 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str: | |
| ): | ||
| youtube_hint = "[NOTE]: If you need to get information about its visual or audio content, please use tool 'visual_audio_youtube_analyzing' instead. This tool may not be able to provide visual and audio content of a YouTube Video.\n\n" | ||
|
|
||
| content, jina_err = await scrape_jina(url, JINA_API_KEY) | ||
| content, jina_err = await scrape_jina(url, JINA_API_KEY, JINA_BASE_URL) | ||
| if jina_err: | ||
| error_msg += f"Failed to get content from Jina.ai: {jina_err}\n" | ||
| elif content is None or content.strip() == "": | ||
| error_msg += "No content got from Jina.ai.\n" | ||
| else: | ||
| return protocol_hint + youtube_hint + content | ||
|
|
||
| content, serper_err = await scrape_serper(url, SERPER_API_KEY) | ||
| if serper_err: | ||
| error_msg += f"Failed to get content from SERPER: {serper_err}\n" | ||
| elif content is None or content.strip() == "": | ||
| error_msg += "No content got from SERPER.\n" | ||
| else: | ||
| return protocol_hint + youtube_hint + content | ||
| if not IS_MIRO_API: | ||
| # Try Serper API for scraping if not using Miro API | ||
| # (Miro API does not support caching Serper scraping results) | ||
| content, serper_err = await scrape_serper(url, SERPER_API_KEY) | ||
| if serper_err: | ||
| error_msg += f"Failed to get content from SERPER: {serper_err}\n" | ||
| elif content is None or content.strip() == "": | ||
| error_msg += "No content got from SERPER.\n" | ||
| else: | ||
| return protocol_hint + youtube_hint + content | ||
|
|
||
| content, request_err = scrape_request(url) | ||
| if request_err: | ||
|
|
@@ -99,7 +105,9 @@ async def smart_request(url: str, params: dict = None, env: dict = None) -> str: | |
| await asyncio.sleep(4**retry_count) | ||
|
|
||
|
|
||
| async def scrape_jina(url: str, jina_api_key: str) -> tuple[str, str]: | ||
| async def scrape_jina( | ||
| url: str, jina_api_key: str, jina_base_url: str | ||
| ) -> tuple[str, str]: | ||
| # Use Jina.ai reader API to convert URL to LLM-friendly text | ||
| if jina_api_key == "": | ||
| return ( | ||
|
|
@@ -116,7 +124,7 @@ async def scrape_jina(url: str, jina_api_key: str) -> tuple[str, str]: | |
| "X-With-Shadow-Dom": "true", | ||
| } | ||
|
|
||
| jina_url = f"https://r.jina.ai/{url}" | ||
| jina_url = f"{jina_base_url}/{url}" | ||
| try: | ||
| response = requests.get(jina_url, headers=jina_headers, timeout=120) | ||
| if response.status_code == 422: | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use the imported
Dicttype from typing instead of the built-indictfor consistency with the function signature and other type annotations in the file.