diff --git a/.devcontainer/update.patch b/.devcontainer/update.patch new file mode 100644 index 0000000..3324b78 --- /dev/null +++ b/.devcontainer/update.patch @@ -0,0 +1,20 @@ +diff --git a/resource/traversers/dfu_impl_update.cpp b/resource/traversers/dfu_impl_update.cpp +index f74ea34c..b5dd2210 100644 +--- a/resource/traversers/dfu_impl_update.cpp ++++ b/resource/traversers/dfu_impl_update.cpp +@@ -595,6 +595,7 @@ int dfu_impl_t::mod_dfv (vtx_t u, int64_t jobid, modify_data_t &mod_data) + subsystem_t dom = m_match->dom_subsystem (); + f_out_edg_iterator_t ei, ei_end; + ++ (*m_graph)[u].idata.colors[dom] = m_color.gray (); + if ((rc = mod_idata (u, jobid, dom, mod_data, stop)) != 0 || stop) + goto done; + if ((rc = mod_plan (u, jobid, mod_data)) != 0) +@@ -608,6 +609,7 @@ int dfu_impl_t::mod_dfv (vtx_t u, int64_t jobid, modify_data_t &mod_data) + rc += mod_dfv (tgt, jobid, mod_data); + else + rc += mod_upv (tgt, jobid, mod_data); ++ (*m_graph)[u].idata.colors[subsystem] = m_color.black (); + } + } + done: diff --git a/.github/workflows/build-deploy.yaml b/.github/workflows/build-deploy.yaml index 320a9b5..d43e3dd 100644 --- a/.github/workflows/build-deploy.yaml +++ b/.github/workflows/build-deploy.yaml @@ -20,7 +20,7 @@ jobs: uses: actions/checkout@v4 - uses: actions/setup-go@v4 with: - go-version: ^1.21 + go-version: ^1.22 - name: GHCR Login if: (github.event_name != 'pull_request') uses: docker/login-action@v2 diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index 68ac51a..3399996 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -37,6 +37,13 @@ jobs: go-version: ^1.22 - name: flux-sched build - run: git clone -b grow-api https://github.com/milroy/flux-sched /opt/flux-sched + run: git clone https://github.com/milroy/flux-sched /opt/flux-sched + - name: patch flux sched + run: | + here=$(pwd) + cp .devcontainer/update.patch /opt/flux-sched + cd /opt/flux-sched + git apply update.patch + mkdir build && cd build && cmake ../ && make -j && sudo make install - name: Test - run: LIB_PREFIX=${{ matrix.test[1] }} make test-v \ No newline at end of file + run: LIB_PREFIX=${{ matrix.test[1] }} make test-v diff --git a/Makefile b/Makefile index 58c235b..de06a5e 100644 --- a/Makefile +++ b/Makefile @@ -22,11 +22,11 @@ all: test .PHONY: test test: - $(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -ldflags '-w' ./pkg/fluxcli ./pkg/types + $(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -count 1 -run TestCancel -ldflags '-w' ./pkg/fluxcli ./pkg/types .PHONY: test-v test-v: - $(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -v -ldflags '-w' ./pkg/fluxcli ./pkg/types + $(COMMONENVVAR) $(BUILDENVVAR) LD_LIBRARY_PATH=$(LD_LIBRARY_PATH) go test -count 1 -run TestCancel -v -ldflags '-w' ./pkg/fluxcli ./pkg/types .PHONY: $(LOCALBIN) $(LOCALBIN): diff --git a/pkg/fluxcli/data/cancel/tiny-partial-cancel.json b/pkg/fluxcli/data/cancel/tiny-partial-cancel.json index b874096..0deba66 100644 --- a/pkg/fluxcli/data/cancel/tiny-partial-cancel.json +++ b/pkg/fluxcli/data/cancel/tiny-partial-cancel.json @@ -60,7 +60,7 @@ "name": "node1", "id": 1, "uniq_id": 3, - "rank": 0, + "rank": 1, "exclusive": false, "unit": "", "size": 1, diff --git a/pkg/fluxcli/reapi_cli.go b/pkg/fluxcli/reapi_cli.go index d908f33..8dec4f1 100644 --- a/pkg/fluxcli/reapi_cli.go +++ b/pkg/fluxcli/reapi_cli.go @@ -67,6 +67,7 @@ func (cli *ReapiClient) InitContext(jgf string, options string) (err error) { jobgraph := C.CString(jgf) opts := C.CString(options) + fmt.Println(opts) fluxerr := (int)( C.reapi_cli_initialize( (*C.struct_reapi_cli_ctx)(cli.ctx), jobgraph, (opts), @@ -258,7 +259,7 @@ func (cli *ReapiClient) Cancel(jobid int64, noent_ok bool) (err error) { // bool *full_removal); func (cli *ReapiClient) PartialCancel(jobid int64, r string, noent_ok bool) (bool, error) { - var full_removal bool + full_removal := false var resource = C.CString(r) fluxerr := (int)(C.reapi_cli_partial_cancel((*C.struct_reapi_cli_ctx)(cli.ctx), (C.ulong)(jobid), diff --git a/pkg/fluxcli/reapi_cli_test.go b/pkg/fluxcli/reapi_cli_test.go index d5f90fe..025edc9 100644 --- a/pkg/fluxcli/reapi_cli_test.go +++ b/pkg/fluxcli/reapi_cli_test.go @@ -19,7 +19,7 @@ func TestFluxcliContext(t *testing.T) { // NewClient creates a new client for testing func NewClient(jgf string) (*ReapiClient, error) { cli := NewReapiClient() - err := cli.InitContext(jgf, "{}") + err := cli.InitContext(jgf, "{\"prune-filters\": \"ALL:core,ALL:gpu,ALL:memory\"}") if err != nil { fmt.Printf("Error initializing jobspec context for ReapiClient: %v\n", err) fmt.Printf("Errors so far: %s\n", cli.GetErrMsg()) @@ -94,7 +94,7 @@ func TestCancel(t *testing.T) { // We should be able to request one node t.Log("Match allocate for 1 node should succeed") - reserved, allocated, at, _, jobid, err = cli.MatchAllocate(false, string(cancelJobspec)) + reserved, allocated, at, _, oneNodeJobid, err := cli.MatchAllocate(false, string(cancelJobspec)) if err != nil { t.Logf("Fluxion errors %s\n", cli.GetErrMsg()) t.Errorf("matchAllocate for one node after partial cancel: %v\n", err) @@ -105,13 +105,32 @@ func TestCancel(t *testing.T) { } // Cancel should work of the initial job - // TODO: this doesn't find the job1 id, need to debug. - /*t.Log("cancel for partially cancelled job") + t.Log("cancel for partially cancelled job") err = cli.Cancel(initialJobid, false) if err != nil { t.Logf("Fluxion errors %s\n", cli.GetErrMsg()) t.Errorf("cancel for partially cancelled job: %v\n", err) - }*/ + } + + // And the one node job + t.Log("cancel for one node job") + err = cli.Cancel(oneNodeJobid, false) + if err != nil { + t.Logf("Fluxion errors %s\n", cli.GetErrMsg()) + t.Errorf("cancel for single node job job: %v\n", err) + } + + t.Log("Match allocate for 2 nodes (all of graph resources) should now succeed") + reserved, allocated, at, _, jobid, err = cli.MatchAllocate(false, string(jobspec)) + if err != nil { + t.Logf("Fluxion errors %s\n", cli.GetErrMsg()) + t.Errorf("matchAllocate 2 nodes when graph is empty: %v\n", err) + } + printOutput(reserved, allocated, at, jobid, err) + if allocated == "" { + t.Errorf("matchAllocate should not have failed, we have two nodes again.") + } + } func TestMatchAllocate(t *testing.T) { @@ -199,7 +218,8 @@ func TestMatchAllocate(t *testing.T) { func printOutput(reserved bool, allocated string, at int64, jobid int64, err error) { fmt.Println("\n\t----Match Allocate output---") - fmt.Printf("jobid: %d\nreserved: %t\nallocated: %s\nat: %d\nerror: %v\n", jobid, reserved, allocated, at, err) + isAllocated := allocated != "" + fmt.Printf("jobid: %d\nreserved: %t\nallocated: %v\nat: %d\nerror: %v\n", jobid, reserved, isAllocated, at, err) } func printSatOutput(sat bool, err error) {