diff --git a/config/tool/tool-audio-os.yaml b/config/tool/tool-audio-os.yaml
new file mode 100644
index 0000000..bf33f6a
--- /dev/null
+++ b/config/tool/tool-audio-os.yaml
@@ -0,0 +1,9 @@
+name: "tool-audio-os"
+tool_command: "python"
+args:
+  - "-m"
+  - "src.tool.mcp_servers.audio_mcp_server_os"
+env:
+  WHISPER_API_KEY: "${oc.env:WHISPER_API_KEY}"
+  WHISPER_BASE_URL: "${oc.env:WHISPER_BASE_URL}"
+  WHISPER_MODEL_NAME: "${oc.env:WHISPER_MODEL_NAME}"
\ No newline at end of file
diff --git a/config/tool/tool-image-video-os.yaml b/config/tool/tool-image-video-os.yaml
new file mode 100644
index 0000000..f9a61df
--- /dev/null
+++ b/config/tool/tool-image-video-os.yaml
@@ -0,0 +1,9 @@
+name: "tool-image-video-os"
+tool_command: "python"
+args:
+  - "-m"
+  - "src.tool.mcp_servers.vision_mcp_server_os"
+env:
+  VISION_API_KEY: "${oc.env:VISION_API_KEY}"
+  VISION_BASE_URL: "${oc.env:VISION_BASE_URL}"
+  VISION_MODEL_NAME: "${oc.env:VISION_MODEL_NAME}"
\ No newline at end of file
diff --git a/config/tool/tool-reasoning-os.yaml b/config/tool/tool-reasoning-os.yaml
new file mode 100644
index 0000000..c845d45
--- /dev/null
+++ b/config/tool/tool-reasoning-os.yaml
@@ -0,0 +1,9 @@
+name: "tool-reasoning-os"
+tool_command: "python"
+args:
+  - "-m"
+  - "src.tool.mcp_servers.reasoning_mcp_server_os"
+env:
+  REASONING_API_KEY: "${oc.env:REASONING_API_KEY}"
+  REASONING_BASE_URL: "${oc.env:REASONING_BASE_URL}"
+  REASONING_MODEL_NAME: "${oc.env:REASONING_MODEL_NAME}"
\ No newline at end of file
diff --git a/docs/mkdocs/docs/tool_audio_os.md b/docs/mkdocs/docs/tool_audio_os.md
new file mode 100644
index 0000000..e803846
--- /dev/null
+++ b/docs/mkdocs/docs/tool_audio_os.md
@@ -0,0 +1,149 @@
+# Audio Tools - Open Source (`audio_mcp_server_os.py`)
+
+The Audio MCP Server (Open Source) enables audio transcription using open-source Whisper models. It provides comprehensive audio-to-text conversion with support for multiple audio formats, local files, and URLs.
+
+!!! info "Available Functions"
+    This MCP server provides the following functions that agents can call:
+    
+    - **Audio Transcription**: High-quality speech-to-text conversion
+    - **Multi-Format Support**: MP3, WAV, M4A, AAC, OGG, FLAC, WMA formats
+    - **Flexible Input**: Local file paths and web URLs
+    - **Open-Source Model Support**: Whisper-Large-v3-Turbo with automatic processing
+
+---
+
+## Environment Variables
+
+!!! warning "Configuration Location"
+    The `audio_mcp_server_os.py` reads environment variables that are passed through the `tool-audio-os.yaml` configuration file, not directly from `.env` file.
+
+**Open-Source Model Configuration:**
+
+- `WHISPER_API_KEY`: Required API key for the open-source Whisper service
+- `WHISPER_BASE_URL`: Base URL for the Whisper service API endpoint
+- `WHISPER_MODEL_NAME`: Model name (default: `openai/whisper-large-v3-turbo`)
+
+**Example Configuration:**
+```bash
+# API for Open-Source Audio Transcription Tool (for benchmark testing)
+WHISPER_MODEL_NAME="openai/whisper-large-v3-turbo"
+WHISPER_API_KEY=your_whisper_key
+WHISPER_BASE_URL="https://your_whisper_base_url/v1"
+```
+
+---
+
+## Local Deployment
+
+### Using vLLM Server
+
+For optimal performance with the Whisper-Large-v3-Turbo model, deploy using vLLM:
+
+```bash
+pip install vllm==0.10.0
+pip install vllm[audio]
+vllm serve /path/to/whisper \
+  --served-model-name whisper-large-v3-turbo \
+  --task transcription
+```
+
+### Configuration for Local Deployment
+
+When using local deployment, configure your environment variables:
+
+```bash
+WHISPER_MODEL_NAME="openai/whisper-large-v3-turbo"
+WHISPER_API_KEY="dummy_key"  # Not required for local deployment
+WHISPER_BASE_URL="http://localhost:8000/v1"
+```
+
+---
+
+## Function Reference
+
+The following function is provided by the `audio_mcp_server_os.py` MCP tool and can be called by agents:
+
+### `audio_transcription(audio_path_or_url: str)`
+
+Transcribe audio files to text using open-source Whisper models. Supports both local files and web URLs with automatic format detection and processing.
+
+**Parameters:**
+
+- `audio_path_or_url`: Local file path (accessible to server) or web URL
+
+**Returns:**
+
+- `str`: The transcription of the audio file
+
+**Supported Audio Formats:**
+- MP3 (.mp3)
+- WAV (.wav)
+- M4A (.m4a)
+- AAC (.aac)
+- OGG (.ogg)
+- FLAC (.flac)
+- WMA (.wma)
+
+## Usage Examples
+
+### Local File Transcription
+```python
+# Local file transcription
+result = audio_transcription(
+    audio_path_or_url="/path/to/audio.mp3"
+)
+```
+
+### URL-based Transcription
+```python
+# URL transcription
+result = audio_transcription(
+    audio_path_or_url="https://example.com/audio.wav"
+)
+```
+
+### Meeting Recording Transcription
+```python
+result = audio_transcription(
+    audio_path_or_url="meeting_recording.m4a"
+)
+```
+
+### Podcast Transcription
+```python
+result = audio_transcription(
+    audio_path_or_url="podcast_episode.mp3"
+)
+```
+
+---
+
+## Technical Implementation
+
+### Audio Processing Pipeline
+
+1. **Input Validation**: Checks if input is local file or URL
+2. **Format Detection**: Determines audio format from extension or content type
+3. **File Handling**: Downloads URL files to temporary storage with proper extensions
+4. **API Request**: Sends audio file to Whisper model for transcription
+5. **Cleanup**: Removes temporary files after processing
+6. **Response Processing**: Returns transcription text
+
+### Error Handling
+
+- **File Access Errors**: Graceful handling of inaccessible local files
+- **Network Errors**: Robust URL fetching with retry logic (up to 3 attempts)
+- **Format Errors**: Automatic format detection and validation
+- **API Errors**: Clear error reporting for service issues
+- **Sandbox Restrictions**: Prevents access to sandbox files with clear error messages
+
+### Retry Logic
+
+- **Maximum Retries**: 3 attempts for failed requests
+- **Exponential Backoff**: 5, 10, 20 second delays between retries
+- **Network Resilience**: Handles temporary network issues and service unavailability
+
+---
+
+!!! info "Documentation Info"
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
diff --git a/docs/mkdocs/docs/tool_reasoning_os.md b/docs/mkdocs/docs/tool_reasoning_os.md
new file mode 100644
index 0000000..6d34aca
--- /dev/null
+++ b/docs/mkdocs/docs/tool_reasoning_os.md
@@ -0,0 +1,135 @@
+# Reasoning Tools - Open Source (`reasoning_mcp_server_os.py`)
+
+The Reasoning MCP Server (Open Source) provides a **pure text-based reasoning engine** using open-source models. It supports logical analysis, problem solving, and planning, with robust retry mechanisms and exponential backoff for reliability.
+
+!!! info "Available Functions"
+    This MCP server provides the following functions that agents can call:
+    
+    - **Pure Text Reasoning**: Logical analysis and problem solving using open-source LLM backends
+    - **Step-by-Step Analysis**: Structured reasoning with detailed explanations
+    - **Open-Source Model Support**: Qwen3-235B-A22B-Thinking-2507 with automatic fallback
+    - **Robust Error Handling**: Exponential backoff retry logic (up to 10 attempts)
+
+---
+
+## Environment Variables
+
+!!! warning "Configuration Location"
+    The `reasoning_mcp_server_os.py` reads environment variables that are passed through the `tool-reasoning-os.yaml` configuration file, not directly from `.env` file.
+
+**Open-Source Model Configuration:**
+
+- `REASONING_API_KEY`: Required API key for the open-source reasoning service
+- `REASONING_BASE_URL`: Base URL for the reasoning service API endpoint
+- `REASONING_MODEL_NAME`: Model name (default: `Qwen/Qwen3-235B-A22B-Thinking-2507`)
+
+**Example Configuration:**
+```bash
+# API for Open-Source Reasoning Tool (for benchmark testing)
+REASONING_MODEL_NAME="Qwen/Qwen3-235B-A22B-Thinking-2507"
+REASONING_API_KEY=your_reasoning_key
+REASONING_BASE_URL="https://your_reasoning_base_url/v1/chat/completions"
+```
+
+---
+
+## Local Deployment
+
+### Using SGLang Server
+
+For optimal performance with the Qwen3-235B-A22B-Thinking model, deploy using SGLang:
+
+```bash
+python3 -m sglang.launch_server \
+  --model-path /path/to/Qwen3-235B-A22B-Thinking-2507 \
+  --tp 8 --host 0.0.0.0 --port 1234 \
+  --trust-remote-code --enable-metrics \
+  --log-level debug --log-level-http debug \
+  --log-requests --log-requests-level 2 \
+  --show-time-cost --context-length 131072
+```
+
+### Configuration for Local Deployment
+
+When using local deployment, configure your environment variables:
+
+```bash
+REASONING_MODEL_NAME="Qwen/Qwen3-235B-A22B-Thinking-2507"
+REASONING_API_KEY="dummy_key"  # Not required for local deployment
+REASONING_BASE_URL="http://localhost:1234/v1/chat/completions"
+```
+
+---
+
+## Function Reference
+
+The following function is provided by the `reasoning_mcp_server_os.py` MCP tool and can be called by agents:
+
+### `reasoning(question: str)`
+
+Perform step-by-step reasoning, analysis, and planning over a **text-only input**. This tool is specialized for **complex thinking tasks** that require deep analytical reasoning.
+
+!!! note "Text-Only Processing"
+    This tool processes only the provided text input and will not fetch external data or context. Ensure all necessary information is included in the question.
+
+**Parameters:**
+
+- `question`: A detailed, complex question or problem statement that includes all necessary information
+
+**Returns:**
+
+- `str`: A structured, step-by-step reasoned answer
+
+**Features:**
+
+- **Open-Source Model**: Uses Qwen3-235B-A22B-Thinking-2507 for advanced reasoning
+- **Robust Retry Logic**: Exponential backoff retry mechanism (up to 10 attempts)
+- **Thinking Mode Support**: Automatically extracts reasoning content from thinking blocks
+- **Error Handling**: Graceful fallback with informative error messages
+- **Timeout Protection**: 600-second timeout for long-running reasoning tasks
+- **Jittered Backoff**: Prevents thundering herd problems with randomized retry delays
+
+**Retry Configuration:**
+- Maximum retries: 10 attempts
+- Initial backoff: 1.0 seconds
+- Maximum backoff: 30.0 seconds
+- Exponential backoff with jitter (0.8-1.2x multiplier)
+
+---
+
+## Usage Examples
+
+### Complex Mathematical Problems
+```python
+question = """
+Solve this complex optimization problem:
+A company wants to minimize costs while maximizing production. 
+Given constraints: 2x + 3y ≤ 100, x + y ≤ 50, x ≥ 0, y ≥ 0
+Cost function: C = 5x + 8y
+Production function: P = 3x + 4y
+Find the optimal values of x and y.
+"""
+```
+
+### Logical Puzzles
+```python
+question = """
+Three people are in a room: Alice, Bob, and Charlie. 
+- Alice says: "Bob is lying"
+- Bob says: "Charlie is lying" 
+- Charlie says: "Alice is lying"
+If exactly one person is telling the truth, who is it?
+"""
+```
+
+### Strategic Planning
+```python
+question = """
+Design a strategy for a startup to enter a competitive market 
+with limited resources. Consider market analysis, competitive 
+positioning, resource allocation, and risk mitigation.
+"""
+```
+
+!!! info "Documentation Info"
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
diff --git a/docs/mkdocs/docs/tool_vqa_os.md b/docs/mkdocs/docs/tool_vqa_os.md
new file mode 100644
index 0000000..d308bbc
--- /dev/null
+++ b/docs/mkdocs/docs/tool_vqa_os.md
@@ -0,0 +1,149 @@
+# Vision Tools - Open Source (`vision_mcp_server_os.py`)
+
+The Vision MCP Server (Open Source) enables Visual Question Answering (VQA) over images using open-source vision-language models. It provides comprehensive image analysis with support for local files and URLs.
+
+!!! info "Available Functions"
+    This MCP server provides the following functions that agents can call:
+    
+    - **Visual Question Answering**: Comprehensive image analysis and question answering
+    - **Multi-Format Support**: JPEG, PNG, GIF image formats
+    - **Flexible Input**: Local file paths and web URLs
+    - **Open-Source Model Support**: Qwen2.5-VL-72B-Instruct with automatic encoding
+
+---
+
+## Environment Variables
+
+!!! warning "Configuration Location"
+    The `vision_mcp_server_os.py` reads environment variables that are passed through the `tool-image-video-os.yaml` configuration file, not directly from `.env` file.
+
+**Open-Source Model Configuration:**
+
+- `VISION_API_KEY`: Required API key for the open-source vision service
+- `VISION_BASE_URL`: Base URL for the vision service API endpoint
+- `VISION_MODEL_NAME`: Model name (default: `Qwen/Qwen2.5-VL-72B-Instruct`)
+
+**Example Configuration:**
+```bash
+# API for Open-Source VQA Tool (for benchmark testing)
+VISION_MODEL_NAME="Qwen/Qwen2.5-VL-72B-Instruct"
+VISION_API_KEY=your_vision_key
+VISION_BASE_URL="https://your_vision_base_url/v1/chat/completions"
+```
+
+---
+
+## Local Deployment
+
+### Using SGLang Server
+
+For optimal performance with the Qwen2.5-VL-72B-Instruct model, deploy using SGLang (suggested SGLang version is `0.5.2`, as lower versions have potential issues with the model):
+
+```bash
+python3 -m sglang.launch_server \
+  --model-path /path/to/Qwen2.5-VL-72B-Instruct \
+  --tp 8 --host 0.0.0.0 --port 1234 \
+  --trust-remote-code --enable-metrics \
+  --log-level debug --log-level-http debug \
+  --log-requests --log-requests-level 2 --show-time-cost
+```
+
+### Configuration for Local Deployment
+
+When using local deployment, configure your environment variables:
+
+```bash
+VISION_MODEL_NAME="Qwen/Qwen2.5-VL-72B-Instruct"
+VISION_API_KEY="dummy_key"  # Not required for local deployment
+VISION_BASE_URL="http://localhost:1234/v1/chat/completions"
+```
+
+---
+
+## Function Reference
+
+The following function is provided by the `vision_mcp_server_os.py` MCP tool and can be called by agents:
+
+### `visual_question_answering(image_path_or_url: str, question: str)`
+
+Ask questions about images using open-source vision-language models. Supports both local files and web URLs with automatic format detection and encoding.
+
+**Parameters:**
+
+- `image_path_or_url`: Local file path (accessible to server) or web URL
+- `question`: The user's question about the image
+
+**Returns:**
+
+- `str`: The model's answer to the image-related question
+
+**Supported Image Formats:**
+- JPEG (.jpg, .jpeg)
+- PNG (.png)
+- GIF (.gif)
+- Default fallback to JPEG for unknown formats
+
+## Usage Examples
+
+### Image Analysis
+```python
+# Local file analysis
+result = visual_question_answering(
+    image_path_or_url="/path/to/image.jpg",
+    question="What objects can you see in this image?"
+)
+
+# URL analysis
+result = visual_question_answering(
+    image_path_or_url="https://example.com/image.png",
+    question="Describe the scene in detail."
+)
+```
+
+### OCR and Text Extraction
+```python
+result = visual_question_answering(
+    image_path_or_url="document.jpg",
+    question="Extract all the text from this document."
+)
+```
+
+### Object Detection and Counting
+```python
+result = visual_question_answering(
+    image_path_or_url="scene.jpg",
+    question="Count how many people are in this image and describe their activities."
+)
+```
+
+### Technical Diagram Analysis
+```python
+result = visual_question_answering(
+    image_path_or_url="diagram.png",
+    question="Explain this technical diagram and identify the key components."
+)
+```
+
+---
+
+## Technical Implementation
+
+### Image Processing Pipeline
+
+1. **Input Validation**: Checks if input is local file or URL
+2. **Format Detection**: Determines MIME type from extension or headers
+3. **Encoding**: Converts images to Base64 for API transmission
+4. **API Request**: Sends structured request to vision model
+5. **Response Processing**: Extracts and returns model response
+
+### Error Handling
+
+- **File Access Errors**: Graceful handling of inaccessible local files
+- **Network Errors**: Robust URL fetching with proper error messages
+- **Format Errors**: Fallback MIME type detection for unknown formats
+- **API Errors**: Clear error reporting for service issues
+
+---
+
+!!! info "Documentation Info"
+    **Last Updated:** October 2025 · **Doc Contributor:** Team @ MiroMind AI
diff --git a/docs/mkdocs/mkdocs.yml b/docs/mkdocs/mkdocs.yml
index 376bf4b..a6db094 100644
--- a/docs/mkdocs/mkdocs.yml
+++ b/docs/mkdocs/mkdocs.yml
@@ -65,7 +65,10 @@ nav:
     - Overview: tool_overview.md
     - Tools:
       - tool-reasoning: tool_reasoning.md
+      - tool-reasoning-os: tool_reasoning_os.md
       - tool-image-video: tool_vqa.md
+      - tool-image-video-os: tool_vqa_os.md
+      - tool-audio-os: tool_audio_os.md
       - tool-searching: tool_searching.md
       - tool-python: tool_python.md
     - Advanced Features:
diff --git a/src/tool/mcp_servers/audio_mcp_server_os.py b/src/tool/mcp_servers/audio_mcp_server_os.py
new file mode 100644
index 0000000..1b59d98
--- /dev/null
+++ b/src/tool/mcp_servers/audio_mcp_server_os.py
@@ -0,0 +1,213 @@
+# Copyright 2025 Miromind.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import asyncio
+import base64
+import contextlib
+import mimetypes
+import os
+import tempfile
+import wave
+from urllib.parse import urlparse
+
+import requests
+from fastmcp import FastMCP
+from mutagen import File as MutagenFile
+from openai import OpenAI
+
+WHISPER_API_KEY = os.environ.get("WHISPER_API_KEY")
+WHISPER_BASE_URL = os.environ.get("WHISPER_BASE_URL")
+WHISPER_MODEL_NAME = os.environ.get("WHISPER_MODEL_NAME")
+
+# Initialize FastMCP server
+mcp = FastMCP("audio-mcp-server-os")
+
+
+def _get_audio_extension(url: str, content_type: str = None) -> str:
+    """
+    Determine the appropriate audio file extension from URL or content type.
+
+    Args:
+        url: The URL of the audio file
+        content_type: The content type from HTTP headers
+
+    Returns:
+        File extension (with dot) to use for temporary file
+    """
+    # First try to get extension from URL
+    parsed_url = urlparse(url)
+    path = parsed_url.path.lower()
+
+    # Common audio extensions
+    audio_extensions = [".mp3", ".wav", ".m4a", ".aac", ".ogg", ".flac", ".wma"]
+    for ext in audio_extensions:
+        if path.endswith(ext):
+            return ext
+
+    # If no extension found in URL, try content type
+    if content_type:
+        content_type = content_type.lower()
+        if "mp3" in content_type or "mpeg" in content_type:
+            return ".mp3"
+        elif "wav" in content_type:
+            return ".wav"
+        elif "m4a" in content_type:
+            return ".m4a"
+        elif "aac" in content_type:
+            return ".aac"
+        elif "ogg" in content_type:
+            return ".ogg"
+        elif "flac" in content_type:
+            return ".flac"
+
+    # Default fallback to mp3
+    return ".mp3"
+
+
+def _get_audio_duration(audio_path: str) -> float:
+    """
+    Get audio duration in seconds.
+
+    Tries to use wave (for .wav), then falls back to mutagen (for mp3, etc).
+    """
+    # Try using wave for .wav files
+    try:
+        with contextlib.closing(wave.open(audio_path, "rb")) as f:
+            frames = f.getnframes()
+            rate = f.getframerate()
+            duration = frames / float(rate)
+            if duration > 0:
+                return duration
+    except Exception:
+        pass  # Not a wav file or failed
+
+    # Try using mutagen for other audio formats (mp3, etc)
+    try:
+        audio = MutagenFile(audio_path)
+        if (
+            audio is not None
+            and hasattr(audio, "info")
+            and hasattr(audio.info, "length")
+        ):
+            duration = float(audio.info.length)
+            if duration > 0:
+                return duration
+    except Exception as e:
+        return f"[ERROR]: Failed to get audio duration: {e}"
+
+
+def _encode_audio_file(audio_path: str) -> tuple[str, str]:
+    """Encode audio file to base64 and determine format."""
+    with open(audio_path, "rb") as audio_file:
+        audio_data = audio_file.read()
+        encoded_string = base64.b64encode(audio_data).decode("utf-8")
+
+    # Determine file format from file extension
+    mime_type, _ = mimetypes.guess_type(audio_path)
+    if mime_type and mime_type.startswith("audio/"):
+        mime_format = mime_type.split("/")[-1]
+        # Map MIME type formats to OpenAI supported formats
+        format_mapping = {
+            "mpeg": "mp3",  # audio/mpeg -> mp3
+            "wav": "wav",  # audio/wav -> wav
+            "wave": "wav",  # audio/wave -> wav
+        }
+        file_format = format_mapping.get(mime_format, "mp3")
+    else:
+        # Default to mp3 if we can't determine
+        file_format = "mp3"
+
+    return encoded_string, file_format
+
+
+@mcp.tool()
+async def audio_transcription(audio_path_or_url: str) -> str:
+    """
+    Transcribe audio file to text and return the transcription.
+    Args:
+        audio_path_or_url: The path of the audio file locally or its URL. Path from sandbox is not supported. YouTube URL is not supported.
+
+    Returns:
+        The transcription of the audio file.
+    """
+    max_retries = 3
+    retry = 0
+    transcription = None
+
+    while retry < max_retries:
+        try:
+            client = OpenAI(base_url=WHISPER_BASE_URL, api_key=WHISPER_API_KEY)
+            if os.path.exists(audio_path_or_url):  # Check if the file exists locally
+                with open(audio_path_or_url, "rb") as audio_file:
+                    transcription = client.audio.transcriptions.create(
+                        model=WHISPER_MODEL_NAME, file=audio_file
+                    )
+            elif "home/user" in audio_path_or_url:
+                return "[ERROR]: The audio_transcription tool cannot access to sandbox file, please use the local path provided by original instruction"
+            else:
+                # download the audio file from the URL
+                response = requests.get(audio_path_or_url)
+                response.raise_for_status()  # Raise an exception for bad status codes
+
+                # Basic content validation - check if response has content
+                if not response.content:
+                    return (
+                        "[ERROR]: Audio transcription failed: Downloaded file is empty"
+                    )
+
+                # Check content type if available
+                content_type = response.headers.get("content-type", "").lower()
+                if content_type and not any(
+                    media_type in content_type
+                    for media_type in ["audio", "video", "application/octet-stream"]
+                ):
+                    return f"[ERROR]: Audio transcription failed: Invalid content type '{content_type}'. Expected audio file."
+
+                # Get proper extension for the temporary file
+                file_extension = _get_audio_extension(audio_path_or_url, content_type)
+
+                # Use proper temporary file handling with correct extension
+                with tempfile.NamedTemporaryFile(
+                    delete=False, suffix=file_extension
+                ) as temp_file:
+                    temp_file.write(response.content)
+                    temp_audio_path = temp_file.name
+
+                try:
+                    with open(temp_audio_path, "rb") as audio_file:
+                        transcription = client.audio.transcriptions.create(
+                            model=WHISPER_MODEL_NAME, file=audio_file
+                        )
+                finally:
+                    # Clean up the temp file
+                    if os.path.exists(temp_audio_path):
+                        os.remove(temp_audio_path)
+            break
+
+        except requests.RequestException as e:
+            retry += 1
+            if retry >= max_retries:
+                return f"[ERROR]: Audio transcription failed: Failed to download audio file - {e}.\nNote: Files from sandbox are not available. You should use local path given in the instruction. \nURLs must include the proper scheme (e.g., 'https://') and be publicly accessible. The file should be in a common audio format such as MP3, WAV, or M4A.\nNote: YouTube video URL is not supported."
+            await asyncio.sleep(5 * (2**retry))
+        except Exception as e:
+            retry += 1
+            if retry >= max_retries:
+                return f"[ERROR]: Audio transcription failed: {e}\nNote: Files from sandbox are not available. You should use local path given in the instruction. The file should be in a common audio format such as MP3, WAV, or M4A.\nNote: YouTube video URL is not supported."
+            await asyncio.sleep(5 * (2**retry))
+
+    return transcription.text
+
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
\ No newline at end of file
diff --git a/src/tool/mcp_servers/reasoning_mcp_server_os.py b/src/tool/mcp_servers/reasoning_mcp_server_os.py
new file mode 100644
index 0000000..3e886a7
--- /dev/null
+++ b/src/tool/mcp_servers/reasoning_mcp_server_os.py
@@ -0,0 +1,103 @@
+# Copyright 2025 Miromind.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import os
+import random
+import time
+
+import requests
+from fastmcp import FastMCP
+
+logger = logging.getLogger("miroflow")
+
+REASONING_API_KEY = os.environ.get("REASONING_API_KEY")
+REASONING_BASE_URL = os.environ.get("REASONING_BASE_URL")
+REASONING_MODEL_NAME = os.environ.get("REASONING_MODEL_NAME")
+
+# Initialize FastMCP server
+mcp = FastMCP("reasoning-mcp-server-os")
+
+# Retry configuration
+MAX_RETRIES = 10
+BACKOFF_BASE = 1.0  # initial backoff in seconds
+BACKOFF_MAX = 30.0  # maximum backoff in seconds
+
+
+def post_with_retry(url, json, headers):
+    """Send POST request with retry and exponential backoff.
+    Returns response object if success, otherwise None."""
+    for attempt in range(1, MAX_RETRIES + 1):
+        try:
+            resp = requests.post(url, json=json, headers=headers, timeout=600)
+            if resp.status_code == 200:
+                return resp
+            else:
+                logger.warning(
+                    f"HTTP {resp.status_code} on attempt {attempt}: {resp.text[:200]}"
+                )
+        except requests.exceptions.RequestException as e:
+            logger.warning(f"Request failed on attempt {attempt}: {e}")
+
+        # Backoff before next retry
+        if attempt < MAX_RETRIES:
+            sleep_time = min(BACKOFF_BASE * (2 ** (attempt - 1)), BACKOFF_MAX)
+            # Add jitter to avoid thundering herd
+            sleep_time *= 0.8 + 0.4 * random.random()
+            logger.info(f"Retrying in {sleep_time:.1f}s...")
+            time.sleep(sleep_time)
+
+    logger.warning(f"All {MAX_RETRIES} retries failed for {url}")
+    return None
+
+
+@mcp.tool()
+async def reasoning(question: str) -> str:
+    """You can use this tool use solve hard math problem, puzzle, riddle and IQ test question that requires a lot of chain of thought efforts.
+    DO NOT use this tool for simple and obvious question.
+
+    Args:
+        question: The hard question.
+
+    Returns:
+        The answer to the question.
+    """
+    payload = {
+        "model": REASONING_MODEL_NAME,
+        "messages": [{"role": "user", "content": question}],
+        "temperature": 0.6,
+        "top_p": 0.95,
+    }
+    headers = {
+        "Authorization": f"Bearer {REASONING_API_KEY}",
+        "Content-Type": "application/json",
+    }
+
+    response = post_with_retry(REASONING_BASE_URL, json=payload, headers=headers)
+    if response is None:
+        return "Reasoning service unavailable. Please try again later."
+
+    json_response = response.json()
+    try:
+        content = json_response["choices"][0]["message"]["content"]
+        if "</think>" in content:
+            content = content.split("</think>", 1)[1].strip()
+        return content
+    except Exception:
+        logger.info("Reasoning Error: only thinking content is returned")
+        return json_response["choices"][0]["message"]["reasoning_content"]
+
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
\ No newline at end of file
diff --git a/src/tool/mcp_servers/vision_mcp_server_os.py b/src/tool/mcp_servers/vision_mcp_server_os.py
new file mode 100644
index 0000000..786e5af
--- /dev/null
+++ b/src/tool/mcp_servers/vision_mcp_server_os.py
@@ -0,0 +1,112 @@
+# Copyright 2025 Miromind.ai
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import base64
+import os
+
+import aiohttp
+import requests
+from fastmcp import FastMCP
+
+VISION_API_KEY = os.environ.get("VISION_API_KEY")
+VISION_BASE_URL = os.environ.get("VISION_BASE_URL")
+VISION_MODEL_NAME = os.environ.get("VISION_MODEL_NAME")
+
+# Initialize FastMCP server
+mcp = FastMCP("vision-mcp-server-os")
+
+
+def guess_mime_media_type_from_extension(file_path: str) -> str:
+    """Guess the MIME type based on the file extension."""
+    _, ext = os.path.splitext(file_path)
+    ext = ext.lower()
+    if ext in [".jpg", ".jpeg"]:
+        return "image/jpeg"
+    elif ext == ".png":
+        return "image/png"
+    elif ext == ".gif":
+        return "image/gif"
+    else:
+        return "image/jpeg"  # Default to JPEG if unknown
+
+
+@mcp.tool()
+async def visual_question_answering(image_path_or_url: str, question: str) -> str:
+    """Ask question about an image or a video and get the answer with a vision language model.
+
+    Args:
+        image_path_or_url: The path of the image file locally or its URL.
+        question: The question to ask about the image.
+
+    Returns:
+        The answer to the image-related question.
+    """
+    messages_for_llm = [
+        {
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": None}},
+                {
+                    "type": "text",
+                    "text": question,
+                },
+            ],
+        }
+    ]
+
+    headers = {
+        "Authorization": f"Bearer {VISION_API_KEY}",
+        "Content-Type": "application/json",
+    }
+
+    try:
+        if os.path.exists(image_path_or_url):  # Check if the file exists locally
+            with open(image_path_or_url, "rb") as image_file:
+                image_data = base64.b64encode(image_file.read()).decode("utf-8")
+                mime_type = guess_mime_media_type_from_extension(image_path_or_url)
+                messages_for_llm[0]["content"][0]["image_url"]["url"] = (
+                    f"data:{mime_type};base64,{image_data}"
+                )
+        elif image_path_or_url.startswith(("http://", "https://")):
+            async with aiohttp.ClientSession() as session:
+                async with session.get(image_path_or_url) as resp:
+                    if resp.status == 200:
+                        image_bytes = await resp.read()
+                        mime_type = resp.headers.get(
+                            "Content-Type", "image/png"
+                        )  # fallback MIME type
+                        image_data = base64.b64encode(image_bytes).decode("utf-8")
+                        messages_for_llm[0]["content"][0]["image_url"]["url"] = (
+                            f"data:{mime_type};base64,{image_data}"
+                        )
+                    else:
+                        return f"Failed to fetch image from URL: {image_path_or_url}"
+        else:
+            messages_for_llm[0]["content"][0]["image_url"]["url"] = image_path_or_url
+
+        payload = {"model": VISION_MODEL_NAME, "messages": messages_for_llm}
+
+        response = requests.post(VISION_BASE_URL, json=payload, headers=headers)
+
+    except Exception as e:
+        return f"Error: {e}"
+
+    try:
+        return response.json()["choices"][0]["message"]["content"]
+    except (AttributeError, IndexError):
+        return response.json()
+
+
+if __name__ == "__main__":
+    mcp.run(transport="stdio")
\ No newline at end of file