helixml
diff --git a/‎.drone.yml
-14 b/‎.drone.yml
-14
diff --git a/‎api/cmd/helix/qapairs.go
+5-2 b/‎api/cmd/helix/qapairs.go
+5-2
diff --git a/‎api/cmd/helix/root.go
+1-6 b/‎api/cmd/helix/root.go
+1-6
diff --git a/‎api/cmd/helix/runner.go
+15-6 b/‎api/cmd/helix/runner.go
+15-6
diff --git a/‎api/cmd/helix/serve.go
+44-46 b/‎api/cmd/helix/serve.go
+44-46
diff --git a/‎api/pkg/config/config.go
+9 b/‎api/pkg/config/config.go
+9
diff --git a/‎api/pkg/controller/controller.go
+3-36 b/‎api/pkg/controller/controller.go
+3-36
@@ -159,8 +159,6 @@ steps:
   - name: dockersocket
     path: /var/run/docker.sock
   when:
-    branch:
-    - main
     event:
     - tag
     - push
@@ -243,8 +241,6 @@ steps:
   - name: dockersocket
     path: /var/run/docker.sock
   when:
-    branch:
-    - main
     event:
     - tag
 
@@ -316,8 +312,6 @@ steps:
   - name: dockersocket
     path: /var/run/docker.sock
   when:
-    branch:
-    - main
     event:
     - tag
 
@@ -380,8 +374,6 @@ steps:
   - name: dockersocket
     path: /var/run/docker.sock
   when:
-    branch:
-    - main
     event:
     - tag
     - push
@@ -416,8 +408,6 @@ steps:
   - name: dockersocket
     path: /var/run/docker.sock
   when:
-    branch:
-    - main
     event:
     - tag
     - push
@@ -452,8 +442,6 @@ steps:
   - name: dockersocket
     path: /var/run/docker.sock
   when:
-    branch:
-    - main
     event:
     - tag
     - push
@@ -488,8 +476,6 @@ steps:
   - name: dockersocket
     path: /var/run/docker.sock
   when:
-    branch:
-    - main
     event:
     - tag
     - push
 
@@ -24,11 +24,14 @@ func newQapairCommand() *cobra.Command {
 			if err != nil {
 				return fmt.Errorf("failed to load server config: %v", err)
 			}
-			ps, err := pubsub.New(serverConfig.PubSub.StoreDir)
+			ps, err := pubsub.New(&serverConfig)
+			if err != nil {
+				return err
+			}
+			scheduler, err := scheduler.NewScheduler(cmd.Context(), &serverConfig, nil)
 			if err != nil {
 				return err
 			}
-			scheduler := scheduler.NewScheduler(cmd.Context(), &serverConfig, nil)
 			helixInference := openai.NewInternalHelixServer(&serverConfig, ps, scheduler)
 			client, err := createDataPrepOpenAIClient(&serverConfig, helixInference)
 			if err != nil {
 
@@ -3,7 +3,6 @@ package helix
 import (
 	"context"
 	"os"
-	"runtime"
 
 	"github.com/spf13/cobra"
 
@@ -46,11 +45,7 @@ func NewRootCmd() *cobra.Command {
 	RootCmd.AddCommand(newQapairCommand())
 	RootCmd.AddCommand(newEvalsCommand())
 	RootCmd.AddCommand(NewTestCmd()) // Use the NewTestCmd function from the current package
-
-	// Runner only works on Linux
-	if runtime.GOOS == "linux" {
-		RootCmd.AddCommand(newRunnerCmd())
-	}
+	RootCmd.AddCommand(newRunnerCmd())
 
 	return RootCmd
 }
 
@@ -43,6 +43,10 @@ func NewRunnerOptions() *RunnerOptions {
 			AllowMultipleCopies:          getDefaultServeOptionBool("ALLOW_MULTIPLE_COPIES", false),
 			MaxModelInstances:            getDefaultServeOptionInt("MAX_MODEL_INSTANCES", 0),
 			CacheDir:                     getDefaultServeOptionString("CACHE_DIR", "/root/.cache/huggingface"), // TODO: change to maybe just /data
+			WebServer: runner.WebServer{
+				Host: getDefaultServeOptionString("SERVER_HOST", "127.0.0.1"),
+				Port: getDefaultServeOptionInt("SERVER_PORT", 80),
+			},
 		},
 		Janitor: config.Janitor{
 			SentryDsnAPI: getDefaultServeOptionString("SENTRY_DSN_API", ""),
@@ -86,6 +90,16 @@ func newRunnerCmd() *cobra.Command {
 		`The auth token for this runner`,
 	)
 
+	runnerCmd.PersistentFlags().StringVar(
+		&allOptions.Runner.WebServer.Host, "server-host", allOptions.Runner.WebServer.Host,
+		`The host to bind the api server to.`,
+	)
+
+	runnerCmd.PersistentFlags().IntVar(
+		&allOptions.Runner.WebServer.Port, "server-port", allOptions.Runner.WebServer.Port,
+		`The port to bind the api server to.`,
+	)
+
 	runnerCmd.PersistentFlags().Uint64Var(
 		&allOptions.Runner.MemoryBytes, "memory-bytes", allOptions.Runner.MemoryBytes,
 		`The number of bytes of GPU memory available - e.g. 1073741824`,
@@ -288,12 +302,7 @@ func runnerCLI(cmd *cobra.Command, options *RunnerOptions) error {
 		return err
 	}
 
-	err = runnerController.Initialize(ctx)
-	if err != nil {
-		return err
-	}
-
-	go runnerController.Run()
+	go runnerController.Run(ctx)
 
 	<-ctx.Done()
 	return nil
 
@@ -219,9 +219,9 @@ func serve(cmd *cobra.Command, cfg *config.ServerConfig) error {
 		return err
 	}
 
-	ps, err := pubsub.New(cfg.PubSub.StoreDir)
+	ps, err := pubsub.New(cfg)
 	if err != nil {
-		return err
+		return fmt.Errorf("failed to create pubsub provider: %w", err)
 	}
 
 	if cfg.WebServer.RunnerToken == "" {
@@ -265,49 +265,50 @@ func serve(cmd *cobra.Command, cfg *config.ServerConfig) error {
 		return fmt.Errorf("unknown extractor: %s", cfg.TextExtractor.Provider)
 	}
 
-	// Must use the same allocator for both new LLM requests and old sessions
-	scheduler := scheduler.NewScheduler(ctx, cfg, func(work *scheduler.Workload, err error) {
-		// This function describes what happens when errors occur in jobs.
-		// Each request type (session vs. LLM requests) has a differeht code path handling results,
-		// hence for now we need to separate cases to handle errors.
-		switch work.WorkloadType {
-		case scheduler.WorkloadTypeLLMInferenceRequest:
-			log.Warn().Err(err).Str("id", work.ID()).Msg("error scheduling work, removing from queue")
-			req := work.LLMInferenceRequest()
-			resp := &types.RunnerLLMInferenceResponse{
-				RequestID:     req.RequestID,
-				OwnerID:       req.OwnerID,
-				SessionID:     req.SessionID,
-				InteractionID: req.InteractionID,
-				Error:         err.Error(),
-				Done:          true,
-			}
-			bts, err := json.Marshal(resp)
-			if err != nil {
-				log.Error().Err(err).Str("id", work.ID()).Msg("error marshalling runner response")
-			}
+	runnerController, err := scheduler.NewRunnerController(ctx, &scheduler.RunnerControllerConfig{
+		PubSub: ps,
+		FS:     fs,
+	})
+	if err != nil {
+		return err
+	}
 
-			err = ps.Publish(context.Background(), pubsub.GetRunnerResponsesQueue(req.OwnerID, req.RequestID), bts)
-			if err != nil {
-				log.Error().Err(err).Str("id", work.ID()).Msg("error publishing runner response")
-			}
-		case scheduler.WorkloadTypeSession:
-			// If we can't retry, write an error to the request and continue so it takes it off
-			// the queue
-			errSession := work.Session()
-			errSession.Interactions = append(errSession.Interactions, &types.Interaction{
-				Creator: types.CreatorTypeSystem,
-				Error:   err.Error(),
-				Message: "Error scheduling session",
-			})
-			_, err = store.UpdateSession(ctx, *errSession)
-			if err != nil {
-				log.Error().Err(err).Msg("error updating session")
+	var appController *controller.Controller
+
+	scheduler, err := scheduler.NewScheduler(ctx, cfg, &scheduler.Params{
+		RunnerController: runnerController,
+		QueueSize:        100,
+		OnSchedulingErr: func(work *scheduler.Workload, err error) {
+			if appController != nil {
+				switch work.WorkloadType {
+				case scheduler.WorkloadTypeLLMInferenceRequest:
+					request := work.LLMInferenceRequest()
+					response := types.RunnerNatsReplyResponse{
+						OwnerID:   request.OwnerID,
+						RequestID: request.RequestID,
+						Error:     err.Error(),
+						Response:  []byte{},
+					}
+					bts, err := json.Marshal(response)
+					if err != nil {
+						log.Error().Err(err).Msg("error marshalling runner response")
+					}
+					err = ps.Publish(ctx, pubsub.GetRunnerResponsesQueue(request.OwnerID, request.RequestID), bts)
+					if err != nil {
+						log.Error().Err(err).Msg("error publishing runner response")
+					}
+				case scheduler.WorkloadTypeSession:
+					appController.ErrorSession(ctx, work.Session(), err)
+				}
 			}
-		default:
-			log.Error().Str("workload_type", string(work.WorkloadType)).Msg("unknown workload type")
-		}
+		},
+		OnResponseHandler: func(_ context.Context, _ *types.RunnerLLMInferenceResponse) error {
+			return nil
+		},
 	})
+	if err != nil {
+		return err
+	}
 
 	helixInference := openai.NewInternalHelixServer(cfg, ps, scheduler)
 
@@ -354,8 +355,6 @@ func serve(cmd *cobra.Command, cfg *config.ServerConfig) error {
 		return fmt.Errorf("unknown RAG provider: %s", cfg.RAG.DefaultRagProvider)
 	}
 
-	var appController *controller.Controller
-
 	controllerOptions := controller.Options{
 		Config:               cfg,
 		Store:                store,
@@ -369,6 +368,7 @@ func serve(cmd *cobra.Command, cfg *config.ServerConfig) error {
 		ProviderManager:      providerManager,
 		DataprepOpenAIClient: dataprepOpenAIClient,
 		Scheduler:            scheduler,
+		RunnerController:     runnerController,
 	}
 
 	appController, err = controller.NewController(ctx, controllerOptions)
@@ -381,8 +381,6 @@ func serve(cmd *cobra.Command, cfg *config.ServerConfig) error {
 		return err
 	}
 
-	go appController.Start(ctx)
-
 	// Initialize browser pool
 	browserPool, err := browser.New(cfg)
 	if err != nil {
 
@@ -248,6 +248,15 @@ type FileStore struct {
 
 type PubSub struct {
 	StoreDir string `envconfig:"NATS_STORE_DIR" default:"/filestore/nats" description:"The directory to store nats data."`
+	Provider string `envconfig:"PUBSUB_PROVIDER" default:"nats" description:"The pubsub provider to use (nats or inmemory)."`
+	Server   struct {
+		EmbeddedNatsServerEnabled bool   `envconfig:"NATS_SERVER_EMBEDDED_ENABLED" default:"true" description:"Whether to enable the embedded NATS server."`
+		Host                      string `envconfig:"NATS_SERVER_HOST" default:"127.0.0.1" description:"The host to bind the NATS server to."`
+		Port                      int    `envconfig:"NATS_SERVER_PORT" default:"8433" description:"The port to bind the NATS server to."`
+		Token                     string `envconfig:"NATS_SERVER_TOKEN" description:"The authentication token for the NATS server."`
+		MaxPayload                int    `envconfig:"NATS_SERVER_MAX_PAYLOAD" default:"33554432" description:"The maximum payload size in bytes (default 32MB)."`
+		JetStream                 bool   `envconfig:"NATS_SERVER_JETSTREAM" default:"true" description:"Whether to enable JetStream."`
+	}
 }
 
 type Store struct {
 
@@ -3,8 +3,6 @@ package controller
 import (
 	"context"
 	"fmt"
-	"runtime/debug"
-	"time"
 
 	"github.com/helixml/helix/api/pkg/config"
 	"github.com/helixml/helix/api/pkg/extract"
@@ -21,8 +19,6 @@ import (
 	"github.com/helixml/helix/api/pkg/store"
 	"github.com/helixml/helix/api/pkg/tools"
 	"github.com/helixml/helix/api/pkg/types"
-	"github.com/puzpuzpuz/xsync/v3"
-	"github.com/rs/zerolog/log"
 )
 
 type Options struct {
@@ -38,7 +34,8 @@ type Options struct {
 	// OpenAIClient         openai.Client
 	ProviderManager      manager.ProviderManager
 	DataprepOpenAIClient openai.Client
-	Scheduler            scheduler.Scheduler
+	Scheduler            *scheduler.Scheduler
+	RunnerController     *scheduler.RunnerController
 }
 
 type Controller struct {
@@ -56,14 +53,10 @@ type Controller struct {
 	// the models package looks after instantiating this for us
 	models map[string]model.Model
 
-	// the map of model instances that we have loaded
-	// and are currently running
-	activeRunners *xsync.MapOf[string, *types.RunnerState]
-
 	// the current buffer of scheduling decisions
 	schedulingDecisions []*types.GlobalSchedulingDecision
 
-	scheduler scheduler.Scheduler
+	scheduler *scheduler.Scheduler
 }
 
 func NewController(
@@ -100,7 +93,6 @@ func NewController(
 		newRagClient: func(settings *types.RAGSettings) rag.RAG {
 			return rag.NewLlamaindex(settings)
 		},
-		activeRunners:       xsync.NewMapOf[string, *types.RunnerState](),
 		schedulingDecisions: []*types.GlobalSchedulingDecision{},
 		scheduler:           options.Scheduler,
 	}
@@ -123,28 +115,3 @@ func NewController(
 func (c *Controller) Initialize() error {
 	return nil
 }
-
-// this should be run in a go-routine
-func (c *Controller) Start(ctx context.Context) {
-	for {
-		select {
-		case <-ctx.Done():
-			return
-		case <-time.After(10 * time.Second):
-			err := c.run(c.Ctx)
-			if err != nil {
-				log.Error().Msgf("error in controller loop: %s", err.Error())
-				debug.PrintStack()
-			}
-		}
-	}
-}
-
-func (c *Controller) run(ctx context.Context) error {
-	err := c.cleanOldRunnerMetrics(ctx)
-	if err != nil {
-		log.Error().Msgf("error in controller loop: %s", err.Error())
-		debug.PrintStack()
-	}
-	return nil
-}