@@ -837,8 +837,9 @@ def parameter_reference_guide() -> str:
837837- **Type**: `Optional[Union[str, Dict[str, Any]]]`
838838- **Purpose**: Define expected output structure
839839- **Formats**:
840- - Dictionary: `{'type': 'object', 'properties': {'title': {'type': 'string'}}}`
841- - JSON string: `'{"type": "object", "properties": {"name": {"type": "string"}}}'`
840+ - Dictionary: `{'type': 'object', 'properties': {'title': {'type': 'string'}}, 'required': []}`
841+ - JSON string: `'{"type": "object", "properties": {"name": {"type": "string"}}, "required": []}'`
842+ - **IMPORTANT**: Must include a `"required"` field (can be empty array `[]` if no fields are required)
842843- **Examples**:
843844 ```json
844845 {
@@ -852,16 +853,20 @@ def parameter_reference_guide() -> str:
852853 "name": {"type": "string"},
853854 "price": {"type": "number"},
854855 "available": {"type": "boolean"}
855- }
856+ },
857+ "required": []
856858 }
857859 }
858- }
860+ },
861+ "required": []
859862 }
860863 ```
861864- **Best Practices**:
865+ - Always include the `"required"` field (use `[]` if no fields are required)
862866 - Use for complex, structured extractions
863867 - Define clear data types
864868 - Consider nested structures for complex data
869+ - Note: If `"required"` field is missing, it will be automatically added as `[]`
865870
866871---
867872
@@ -1432,8 +1437,7 @@ def smartscraper(
14321437 Extract structured data from a webpage, HTML, or markdown using AI-powered extraction.
14331438
14341439 This tool uses advanced AI to understand your natural language prompt and extract specific
1435- structured data from web content. Supports three input modes: URL scraping, local HTML processing,
1436- or local markdown processing. Ideal for extracting product information, contact details,
1440+ structured data from web content. Supports three input modes: URL scraping. Ideal for extracting product information, contact details,
14371441 article metadata, or any structured content. Costs 10 credits per page. Read-only operation.
14381442
14391443 Args:
@@ -1480,10 +1484,13 @@ def smartscraper(
14801484 - Can be provided as a dictionary or JSON string
14811485 - Helps ensure consistent, structured output format
14821486 - Optional but recommended for complex extractions
1487+ - IMPORTANT: Must include a "required" field (can be empty array [] if no fields are required)
14831488 - Examples:
1484- * As dict: {'type': 'object', 'properties': {'title': {'type': 'string'}, 'price': {'type': 'number'}}}
1485- * As JSON string: '{"type": "object", "properties": {"name": {"type": "string"}}}'
1486- * For arrays: {'type': 'array', 'items': {'type': 'object', 'properties': {...}}}
1489+ * As dict: {'type': 'object', 'properties': {'title': {'type': 'string'}, 'price': {'type': 'number'}}, 'required': []}
1490+ * As JSON string: '{"type": "object", "properties": {"name": {"type": "string"}}, "required": []}'
1491+ * For arrays: {'type': 'array', 'items': {'type': 'object', 'properties': {...}, 'required': []}, 'required': []}
1492+ * With required fields: {'type': 'object', 'properties': {'name': {'type': 'string'}, 'email': {'type': 'string'}}, 'required': ['name', 'email']}
1493+ - Note: If "required" field is missing, it will be automatically added as an empty array []
14871494 - Default: None (AI will infer structure from prompt)
14881495
14891496 number_of_scrolls (Optional[int]): Number of infinite scrolls to perform before scraping.
@@ -1564,6 +1571,11 @@ def smartscraper(
15641571 except json .JSONDecodeError as e :
15651572 return {"error" : f"Invalid JSON for output_schema: { str (e )} " }
15661573
1574+ # Ensure output_schema has a 'required' field if it exists
1575+ if normalized_schema is not None :
1576+ if "required" not in normalized_schema :
1577+ normalized_schema ["required" ] = []
1578+
15671579 return client .smartscraper (
15681580 user_prompt = user_prompt ,
15691581 website_url = website_url ,
@@ -2099,11 +2111,14 @@ def agentic_scrapper(
20992111 - Can be provided as a dictionary or JSON string
21002112 - Defines the format and structure of the final extracted data
21012113 - Helps ensure consistent, predictable output format
2114+ - IMPORTANT: Must include a "required" field (can be empty array [] if no fields are required)
21022115 - Examples:
2103- * Simple object: {'type': 'object', 'properties': {'title': {'type': 'string'}, 'price': {'type': 'number'}}}
2104- * Array of objects: {'type': 'array', 'items': {'type': 'object', 'properties': {'name': {'type': 'string'}, 'value': {'type': 'string'}}}}
2105- * Complex nested: {'type': 'object', 'properties': {'products': {'type': 'array', 'items': {...}}, 'total_count': {'type': 'number'}}}
2106- * As JSON string: '{"type": "object", "properties": {"results": {"type": "array"}}}'
2116+ * Simple object: {'type': 'object', 'properties': {'title': {'type': 'string'}, 'price': {'type': 'number'}}, 'required': []}
2117+ * Array of objects: {'type': 'array', 'items': {'type': 'object', 'properties': {'name': {'type': 'string'}, 'value': {'type': 'string'}}, 'required': []}, 'required': []}
2118+ * Complex nested: {'type': 'object', 'properties': {'products': {'type': 'array', 'items': {...}}, 'total_count': {'type': 'number'}}, 'required': []}
2119+ * As JSON string: '{"type": "object", "properties": {"results": {"type": "array"}}, "required": []}'
2120+ * With required fields: {'type': 'object', 'properties': {'id': {'type': 'string'}, 'name': {'type': 'string'}}, 'required': ['id']}
2121+ - Note: If "required" field is missing, it will be automatically added as an empty array []
21072122 - Default: None (agent will infer structure from prompt and steps)
21082123
21092124 steps (Optional[Union[str, List[str]]]): Step-by-step instructions for the agent.
@@ -2245,6 +2260,11 @@ def agentic_scrapper(
22452260 except json .JSONDecodeError as e :
22462261 return {"error" : f"Invalid JSON for output_schema: { str (e )} " }
22472262
2263+ # Ensure output_schema has a 'required' field if it exists
2264+ if normalized_schema is not None :
2265+ if "required" not in normalized_schema :
2266+ normalized_schema ["required" ] = []
2267+
22482268 try :
22492269 api_key = get_api_key (ctx )
22502270 client = ScapeGraphClient (api_key )
@@ -2280,8 +2300,10 @@ def create_server() -> FastMCP:
22802300def main () -> None :
22812301 """Run the ScapeGraph MCP server."""
22822302 try :
2283- logger .info ("Starting ScapeGraph MCP server!" )
2284- print ("Starting ScapeGraph MCP server!" )
2303+ # Verify we're running from local codebase
2304+ server_path = os .path .abspath (__file__ )
2305+ logger .info (f"Starting ScapeGraph MCP server from local codebase: { server_path } " )
2306+ print (f"Starting ScapeGraph MCP server (local codebase)" )
22852307 mcp .run (transport = "stdio" )
22862308 except Exception as e :
22872309 logger .error (f"Failed to start MCP server: { e } " )
0 commit comments