Skip to content

Commit c671ac4

Browse files
committed
Add /api/generate endpoint for model loading and unloading
So we can load and unload models Signed-off-by: Eric Curtin <[email protected]>
1 parent acab8b5 commit c671ac4

File tree

14 files changed

+297
-15
lines changed

14 files changed

+297
-15
lines changed

cmd/cli/commands/root.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,7 @@ func NewRootCmd(cli *command.DockerCli) *cobra.Command {
113113
newConfigureCmd(),
114114
newPSCmd(),
115115
newDFCmd(),
116+
newStopCmd(),
116117
newUnloadCmd(),
117118
newRequestsCmd(),
118119
)

cmd/cli/commands/run.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -691,8 +691,12 @@ func newRunCmd() *cobra.Command {
691691
}
692692
}
693693

694+
// Check if a prompt was explicitly provided (even if empty string)
695+
// If args length > 1, then a prompt argument was provided (even if it's "")
696+
explicitPromptProvided := len(args) > 1
697+
694698
// Handle --detach flag: just load the model without interaction
695-
if detach {
699+
if detach || (explicitPromptProvided && prompt == "") {
696700
// Make a minimal request to load the model into memory
697701
err := desktopClient.Chat(model, "", nil, func(content string) {
698702
// Silently discard output in detach mode

cmd/cli/commands/stop.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package commands
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/docker/model-runner/cmd/cli/commands/completion"
7+
"github.com/docker/model-runner/cmd/cli/desktop"
8+
"github.com/docker/model-runner/pkg/inference/models"
9+
"github.com/spf13/cobra"
10+
)
11+
12+
func newStopCmd() *cobra.Command {
13+
var backend string
14+
15+
const cmdArgs = "MODEL"
16+
c := &cobra.Command{
17+
Use: "stop " + cmdArgs,
18+
Short: "Stop a running model",
19+
RunE: func(cmd *cobra.Command, args []string) error {
20+
model := models.NormalizeModelName(args[0])
21+
unloadResp, err := desktopClient.Unload(desktop.UnloadRequest{Backend: backend, Models: []string{model}})
22+
if err != nil {
23+
err = handleClientError(err, "Failed to stop model")
24+
return handleNotRunningError(err)
25+
}
26+
unloaded := unloadResp.UnloadedRunners
27+
if unloaded == 0 {
28+
cmd.Println("No such model running.")
29+
} else {
30+
cmd.Printf("Stopped %d model(s).\n", unloaded)
31+
}
32+
return nil
33+
},
34+
ValidArgsFunction: completion.NoComplete,
35+
}
36+
c.Args = func(cmd *cobra.Command, args []string) error {
37+
if len(args) < 1 {
38+
return fmt.Errorf(
39+
"'docker model stop' requires MODEL.\\n\\n" +
40+
"Usage: docker model stop " + cmdArgs + "\\n\\n" +
41+
"See 'docker model stop --help' for more information.",
42+
)
43+
}
44+
return nil
45+
}
46+
c.Flags().StringVar(&backend, "backend", "", "Optional backend to target")
47+
return c
48+
}

cmd/cli/commands/utils.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,14 @@ func handleClientError(err error, message string) error {
3939
return errors.Join(err, errors.New(message))
4040
}
4141

42+
// handleNotRunningError checks if the error indicates that the model was not running
43+
// and returns a user-friendly message in that case
44+
func handleNotRunningError(err error) error {
45+
// For now, just return the error as-is
46+
// This function can be expanded to handle specific "model not running" errors in the future
47+
return err
48+
}
49+
4250
// stripDefaultsFromModelName removes the default "ai/" prefix and ":latest" tag for display.
4351
// Examples:
4452
// - "ai/gemma3:latest" -> "gemma3"

cmd/cli/docs/reference/docker_model.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ cname:
2222
- docker model run
2323
- docker model start-runner
2424
- docker model status
25+
- docker model stop
2526
- docker model stop-runner
2627
- docker model tag
2728
- docker model uninstall-runner
@@ -44,6 +45,7 @@ clink:
4445
- docker_model_run.yaml
4546
- docker_model_start-runner.yaml
4647
- docker_model_status.yaml
48+
- docker_model_stop.yaml
4749
- docker_model_stop-runner.yaml
4850
- docker_model_tag.yaml
4951
- docker_model_uninstall-runner.yaml
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
command: docker model stop
2+
short: Stop a running model
3+
long: Stop a running model
4+
usage: docker model stop MODEL
5+
pname: docker model
6+
plink: docker_model.yaml
7+
options:
8+
- option: backend
9+
value_type: string
10+
description: Optional backend to target
11+
deprecated: false
12+
hidden: false
13+
experimental: false
14+
experimentalcli: false
15+
kubernetes: false
16+
swarm: false
17+
deprecated: false
18+
hidden: false
19+
experimental: false
20+
experimentalcli: false
21+
kubernetes: false
22+
swarm: false
23+

cmd/cli/docs/reference/model.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ Docker Model Runner
2323
| [`run`](model_run.md) | Run a model and interact with it using a submitted prompt or chat mode |
2424
| [`start-runner`](model_start-runner.md) | Start Docker Model Runner (Docker Engine only) |
2525
| [`status`](model_status.md) | Check if the Docker Model Runner is running |
26+
| [`stop`](model_stop.md) | Stop a running model |
2627
| [`stop-runner`](model_stop-runner.md) | Stop Docker Model Runner (Docker Engine only) |
2728
| [`tag`](model_tag.md) | Tag a model |
2829
| [`uninstall-runner`](model_uninstall-runner.md) | Uninstall Docker Model Runner (Docker Engine only) |
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
# docker model stop
2+
3+
<!---MARKER_GEN_START-->
4+
Stop a running model
5+
6+
### Options
7+
8+
| Name | Type | Default | Description |
9+
|:------------|:---------|:--------|:---------------------------|
10+
| `--backend` | `string` | | Optional backend to target |
11+
12+
13+
<!---MARKER_GEN_END-->
14+

main.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,17 @@ func main() {
179179
// Add /v1 as an alias for /engines/v1
180180
router.Handle("/v1/", &V1AliasHandler{scheduler: scheduler})
181181

182+
// Add API endpoints by creating a custom handler
183+
apiHandler := http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
184+
switch r.URL.Path {
185+
case "/api/generate":
186+
scheduler.HandleGenerate(w, r)
187+
default:
188+
http.NotFound(w, r)
189+
}
190+
})
191+
router.Handle("/api/generate", apiHandler)
192+
182193
// Add metrics endpoint if enabled
183194
if os.Getenv("DISABLE_METRICS") != "1" {
184195
metricsHandler := metrics.NewAggregatedMetricsHandler(

pkg/inference/backends/vllm/vllm.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,9 @@ func (v *vLLM) Run(ctx context.Context, socket, model string, modelRef string, m
119119
return fmt.Errorf("failed to get vLLM arguments: %w", err)
120120
}
121121

122-
// Add served model name
123-
args = append(args, "--served-model-name", model, modelRef)
122+
// Add served model name - sanitize to prevent command injection
123+
sanitizedModelRef := utils.SanitizeModelNameForCommand(modelRef)
124+
args = append(args, "--served-model-name", model, sanitizedModelRef)
124125

125126
// Sanitize args for safe logging
126127
sanitizedArgs := make([]string, len(args))

0 commit comments

Comments
 (0)