Skip to content

Commit

Permalink
[core] test environment's FSM, handling hooks and fix discovered issues
Browse files Browse the repository at this point in the history
The fixes being:
- set run_end_completion_time_ms also at the end of GO_ERROR transition
- document that the ECS does not abort the call execution upon timeout, merely passes the parameter to the call
- avoid a crash if the FSM.Event() is called without providing a Transition
  • Loading branch information
knopers8 authored and teo committed Jul 3, 2024
1 parent d59e65e commit e556b5a
Show file tree
Hide file tree
Showing 11 changed files with 711 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ INSTALL_WHAT:=$(patsubst %, install_%, $(WHAT))

GENERATE_DIRS := ./apricot ./coconut/cmd ./common ./common/runtype ./common/system ./core ./core/integration/ccdb ./core/integration/dcs ./core/integration/ddsched ./core/integration/kafka ./core/integration/odc ./executor ./walnut ./core/integration/trg ./core/integration/bookkeeping
SRC_DIRS := ./apricot ./cmd/* ./core ./coconut ./executor ./common ./configuration ./occ/peanut ./walnut
TEST_DIRS := ./apricot/local ./configuration/cfgbackend ./configuration/componentcfg ./configuration/template ./core/task/sm ./core/workflow ./core/integration/odc/fairmq ./core/integration
TEST_DIRS := ./apricot/local ./configuration/cfgbackend ./configuration/componentcfg ./configuration/template ./core/task/sm ./core/workflow ./core/integration/odc/fairmq ./core/integration ./core/environment
GO_TEST_DIRS := ./core/repos ./core/integration/dcs

coverage:COVERAGE_PREFIX := ./coverage_results
Expand Down
25 changes: 25 additions & 0 deletions core/environment/environment.go
Original file line number Diff line number Diff line change
Expand Up @@ -525,6 +525,27 @@ func newEnvironment(userVars map[string]string, newId uid.ID) (env *Environment,
env.workflow.GetVars().Del("runNumber")
// Ensure the auto stop timer is stopped (important for stop transitions NOT triggered by the timer itself)
env.invalidateAutoStopTransition()
} else if e.Event == "GO_ERROR" {
endCompletionTime, ok := env.workflow.GetUserVars().Get("run_end_completion_time_ms")
if ok && endCompletionTime == "" {
runEndCompletionTime := time.Now()
runEndCompletionTimeStr := strconv.FormatInt(runEndCompletionTime.UnixMilli(), 10)
env.workflow.SetRuntimeVar("run_end_completion_time_ms", runEndCompletionTimeStr)

the.EventWriterWithTopic(topic.Run).WriteEventWithTimestamp(&pb.Ev_RunEvent{
EnvironmentId: envId.String(),
RunNumber: env.GetCurrentRunNumber(),
State: env.Sm.Current(),
Error: "",
Transition: e.Event,
TransitionStatus: pb.OpStatus_DONE_OK,
Vars: nil,
}, runEndCompletionTime)

} else {
log.WithField("partition", envId.String()).
Debug("O2 End Completion time already set before after_GO_ERROR")
}
}

errorMsg := ""
Expand Down Expand Up @@ -907,6 +928,10 @@ func (env *Environment) handlerFunc() func(e *fsm.Event) {
"partition": env.id.String(),
}).Debug("environment.sm starting transition")

if len(e.Args) == 0 {
e.Cancel(errors.New("transition missing in FSM event"))
return
}
transition, ok := e.Args[0].(Transition)
if !ok {
e.Cancel(errors.New("transition wrapping error"))
Expand Down
56 changes: 56 additions & 0 deletions core/environment/environment_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package environment

import (
"github.com/AliceO2Group/Control/core/integration"
"github.com/AliceO2Group/Control/core/integration/testplugin"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
"github.com/spf13/viper"
"io"
"os"
"testing"
)

const envTestConfig = "environment_test.yaml"

var tmpDir *string

var _ = BeforeSuite(func() {
var err error
tmpDir = new(string)
*tmpDir, err = os.MkdirTemp("", "o2control-core-environment")
Expect(err).NotTo(HaveOccurred())

// copy config files
configFiles := []string{envTestConfig}
for _, configFile := range configFiles {
from, err := os.Open("./" + configFile)
Expect(err).NotTo(HaveOccurred())
defer from.Close()

to, err := os.OpenFile(*tmpDir+"/"+configFile, os.O_RDWR|os.O_CREATE, 0666)
Expect(err).NotTo(HaveOccurred())
defer to.Close()

_, err = io.Copy(to, from)
Expect(err).NotTo(HaveOccurred())
}

viper.Set("coreWorkingDir", tmpDir) // used by NewRunNumber with YAML backend

integration.Reset()
integration.RegisterPlugin("testplugin", "testPluginEndpoint", testplugin.NewPlugin)
viper.Reset()
viper.Set("integrationPlugins", []string{"testplugin"})
viper.Set("testPluginEndpoint", "http://example.com")
viper.Set("config_endpoint", "file://"+*tmpDir+"/"+envTestConfig)
})

func TestCoreEnvironment(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Core Environment Test Suite")
}

var _ = AfterSuite(func() {
os.RemoveAll(*tmpDir)
})
13 changes: 13 additions & 0 deletions core/environment/environment_test.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# the contents of this file are not really used, but we need an apricot instance for environment test package, which needs a non-empty file
o2:
components:
qc:
TECHNICAL:
any:
entry: "config"
runtime:
aliecs:
defaults:
key1: value1
vars:
key2: value2
120 changes: 120 additions & 0 deletions core/environment/fsm_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
package environment

import (
"github.com/AliceO2Group/Control/common/utils/uid"
. "github.com/onsi/ginkgo/v2"
. "github.com/onsi/gomega"
)

var _ = Describe("allowed states and transitions in the environment FSM", func() {
var env *Environment
BeforeEach(func() {
envId, err := uid.FromString("2oDvieFrVTi")
Expect(err).NotTo(HaveOccurred())

env, err = newEnvironment(nil, envId)
Expect(err).NotTo(HaveOccurred())
Expect(env).NotTo(BeNil())
})
When("FSM is created", func() {
It("should be in STANDBY", func() {
Expect(env.Sm.Current()).To(Equal("STANDBY"))
})
})
When("FSM is in STANDBY", func() {
It("should allow for DEPLOY, GO_ERROR and EXIT transitions", func() {
env.Sm.SetState("STANDBY")
Expect(env.Sm.Can("DEPLOY")).To(BeTrue())
Expect(env.Sm.Can("GO_ERROR")).To(BeTrue())
Expect(env.Sm.Can("EXIT")).To(BeTrue())
})
It("should not allow for other transitions", func() {
env.Sm.SetState("STANDBY")
Expect(env.Sm.Cannot("CONFIGURE")).To(BeTrue())
Expect(env.Sm.Cannot("RESET")).To(BeTrue())
Expect(env.Sm.Cannot("START_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("STOP_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("RECOVER")).To(BeTrue())
})
})
When("FSM is in DEPLOYED", func() {
It("should allow for CONFIGURED, GO_ERROR and EXIT transitions", func() {
env.Sm.SetState("DEPLOYED")
Expect(env.Sm.Can("CONFIGURE")).To(BeTrue())
Expect(env.Sm.Can("GO_ERROR")).To(BeTrue())
Expect(env.Sm.Can("EXIT")).To(BeTrue())
})
It("should not allow for other transitions", func() {
env.Sm.SetState("DEPLOYED")
Expect(env.Sm.Cannot("DEPLOY")).To(BeTrue())
Expect(env.Sm.Cannot("RESET")).To(BeTrue())
Expect(env.Sm.Cannot("START_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("STOP_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("RECOVER")).To(BeTrue())
})
})
When("FSM is in CONFIGURED", func() {
It("should allow for START_ACTIVITY, RESET, GO_ERROR and EXIT transitions", func() {
env.Sm.SetState("CONFIGURED")
Expect(env.Sm.Can("START_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Can("RESET")).To(BeTrue())
Expect(env.Sm.Can("GO_ERROR")).To(BeTrue())
Expect(env.Sm.Can("EXIT")).To(BeTrue())
})
It("should not allow for other transitions", func() {
env.Sm.SetState("CONFIGURED")
Expect(env.Sm.Cannot("DEPLOY")).To(BeTrue())
Expect(env.Sm.Cannot("CONFIGURE")).To(BeTrue())
Expect(env.Sm.Cannot("STOP_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("RECOVER")).To(BeTrue())
})
})
When("FSM is in RUNNING", func() {
It("should allow for STOP_ACTIVITY and GO_ERROR transitions", func() {
env.Sm.SetState("RUNNING")
Expect(env.Sm.Can("STOP_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Can("GO_ERROR")).To(BeTrue())
})
It("should not allow for other transitions", func() {
env.Sm.SetState("RUNNING")
Expect(env.Sm.Cannot("DEPLOY")).To(BeTrue())
Expect(env.Sm.Cannot("RESET")).To(BeTrue())
Expect(env.Sm.Cannot("CONFIGURE")).To(BeTrue())
Expect(env.Sm.Cannot("START_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("RECOVER")).To(BeTrue())
Expect(env.Sm.Cannot("EXIT")).To(BeTrue())
})
})
When("FSM is in ERROR", func() {
It("should allow for RECOVER transition", func() {
env.Sm.SetState("ERROR")
Expect(env.Sm.Can("RECOVER")).To(BeTrue())
// We do not include EXIT as possible transition, since anyway we kill tasks not caring about the FSM.
// There is no known issue which could forbid us from that.
// TEARDOWN and DESTROY are the artificial transitions which correspond to that.
})
It("should not allow for other transitions", func() {
env.Sm.SetState("ERROR")
Expect(env.Sm.Cannot("GO_ERROR")).To(BeTrue())
Expect(env.Sm.Cannot("DEPLOY")).To(BeTrue())
Expect(env.Sm.Cannot("RESET")).To(BeTrue())
Expect(env.Sm.Cannot("CONFIGURE")).To(BeTrue())
Expect(env.Sm.Cannot("START_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("STOP_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("EXIT")).To(BeTrue())
})
})
When("FSM is in DONE", func() {
It("should not allow for any transitions", func() {
env.Sm.SetState("DONE")
Expect(env.Sm.Cannot("GO_ERROR")).To(BeTrue())
Expect(env.Sm.Cannot("DEPLOY")).To(BeTrue())
Expect(env.Sm.Cannot("RESET")).To(BeTrue())
Expect(env.Sm.Cannot("CONFIGURE")).To(BeTrue())
Expect(env.Sm.Cannot("START_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("STOP_ACTIVITY")).To(BeTrue())
Expect(env.Sm.Cannot("RECOVER")).To(BeTrue())
Expect(env.Sm.Cannot("EXIT")).To(BeTrue())
})
})
})
Loading

0 comments on commit e556b5a

Please sign in to comment.