Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions sgl-model-gateway/bindings/python/sglang_router/mini_lb.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,16 +341,15 @@ async def get_server_info():
}


@app.get("/get_model_info")
async def get_model_info():
async def _get_model_info_impl():
if not lb or not lb.prefill_urls:
raise HTTPException(
status_code=HTTPStatus.SERVICE_UNAVAILABLE,
detail="There is no server registered",
)

target_server_url = lb.prefill_urls[0]
endpoint_url = f"{target_server_url}/get_model_info"
endpoint_url = f"{target_server_url}/model_info"

async with aiohttp.ClientSession() as session:
try:
Expand All @@ -375,6 +374,16 @@ async def get_model_info():
)


@app.get("/model_info")
async def model_info():
return await _get_model_info_impl()


@app.get("/get_model_info")
async def get_model_info():
return await _get_model_info_impl()
Comment on lines +377 to +384
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

To improve maintainability and reduce code duplication, you can register multiple paths to a single handler function in FastAPI. This would make it more explicit that both /model_info and /get_model_info are served by the same logic, and it reduces the number of similar functions to maintain.

Suggested change
@app.get("/model_info")
async def model_info():
return await _get_model_info_impl()
@app.get("/get_model_info")
async def get_model_info():
return await _get_model_info_impl()
@app.get("/model_info")
@app.get("/get_model_info")
async def get_model_info():
"""Handle both /model_info and /get_model_info for backward compatibility."""
return await _get_model_info_impl()



@app.post("/generate")
async def handle_generate_request(request_data: dict):
prefill_server, bootstrap_port, decode_server = lb.select_pair()
Expand Down
Loading