mlxrunner fixes (#14247)

* load glm4_moe_lite from the mlxrunner

* fix loading diffusion models

* remove log lines

* fix --imagegen flag
This commit is contained in:
Patrick Devine
2026-02-13 22:30:42 -08:00
committed by GitHub
parent 5f5ef20131
commit d18dcd7775
19 changed files with 764 additions and 281 deletions

View File

@@ -408,10 +408,10 @@ func TestSchedGetRunner(t *testing.T) {
s.getSystemInfoFn = getSystemInfoFn
s.newServerFn = a.newServer
slog.Info("a")
successCh1a, errCh1a := s.GetRunner(a.ctx, a.req.model, a.req.opts, a.req.sessionDuration)
successCh1a, errCh1a := s.GetRunner(a.ctx, a.req.model, a.req.opts, a.req.sessionDuration, false)
require.Len(t, s.pendingReqCh, 1)
slog.Info("b")
successCh1b, errCh1b := s.GetRunner(b.ctx, b.req.model, b.req.opts, b.req.sessionDuration)
successCh1b, errCh1b := s.GetRunner(b.ctx, b.req.model, b.req.opts, b.req.sessionDuration, false)
require.Len(t, s.pendingReqCh, 1)
require.Empty(t, successCh1b)
require.Len(t, errCh1b, 1)
@@ -435,7 +435,7 @@ func TestSchedGetRunner(t *testing.T) {
c.req.model.ModelPath = "bad path"
slog.Info("c")
successCh1c, errCh1c := s.GetRunner(c.ctx, c.req.model, c.req.opts, c.req.sessionDuration)
successCh1c, errCh1c := s.GetRunner(c.ctx, c.req.model, c.req.opts, c.req.sessionDuration, false)
// Starts in pending channel, then should be quickly processed to return an error
time.Sleep(50 * time.Millisecond) // Long enough for the "a" model to expire and unload
require.Empty(t, successCh1c)
@@ -509,7 +509,7 @@ func TestSchedPrematureExpired(t *testing.T) {
s.getGpuFn = getGpuFn
s.getSystemInfoFn = getSystemInfoFn
s.newServerFn = scenario1a.newServer
successCh1a, errCh1a := s.GetRunner(scenario1a.ctx, scenario1a.req.model, scenario1a.req.opts, scenario1a.req.sessionDuration)
successCh1a, errCh1a := s.GetRunner(scenario1a.ctx, scenario1a.req.model, scenario1a.req.opts, scenario1a.req.sessionDuration, false)
require.Len(t, s.pendingReqCh, 1)
s.Run(ctx)
select {