From cf236ad6312cbce047dc80fb3802be351b2f8870 Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Wed, 9 Oct 2024 23:22:45 -0400 Subject: [PATCH 1/9] feat: add dataset functions Signed-off-by: Grant Linville --- datasets.go | 50 ++++++++++++++ gptscript.go | 170 ++++++++++++++++++++++++++++++++++++++++++++++ gptscript_test.go | 43 ++++++++++++ opts.go | 1 + 4 files changed, 264 insertions(+) create mode 100644 datasets.go diff --git a/datasets.go b/datasets.go new file mode 100644 index 0000000..38ce686 --- /dev/null +++ b/datasets.go @@ -0,0 +1,50 @@ +package gptscript + +type DatasetElementMeta struct { + Name string `json:"name"` + Description string `json:"description"` +} + +type DatasetElement struct { + DatasetElementMeta `json:",inline"` + Contents string `json:"contents"` +} + +type DatasetMeta struct { + ID string `json:"id"` + Name string `json:"name"` + Description string `json:"description"` +} + +type Dataset struct { + DatasetMeta `json:",inline"` + BaseDir string `json:"baseDir,omitempty"` + Elements map[string]DatasetElement `json:"elements"` +} + +type datasetRequest struct { + Input string `json:"input"` + Workspace string `json:"workspace"` + DatasetToolRepo string `json:"datasetToolRepo"` +} + +type createDatasetArgs struct { + Name string `json:"dataset_name"` + Description string `json:"dataset_description"` +} + +type addDatasetElementArgs struct { + DatasetID string `json:"dataset_id"` + ElementName string `json:"element_name"` + ElementDescription string `json:"element_description"` + ElementContent string `json:"element_content"` +} + +type listDatasetElementArgs struct { + DatasetID string `json:"dataset_id"` +} + +type getDatasetElementArgs struct { + DatasetID string `json:"dataset_id"` + Element string `json:"element"` +} diff --git a/gptscript.go b/gptscript.go index 1e30d95..5c4a7a7 100644 --- a/gptscript.go +++ b/gptscript.go @@ -388,6 +388,176 @@ func (g *GPTScript) DeleteCredential(ctx context.Context, credCtx, name string) return err } +// Dataset methods + +func (g *GPTScript) ListDatasets(ctx context.Context, workspace string) ([]DatasetMeta, error) { + if workspace == "" { + workspace = os.Getenv("GPTSCRIPT_WORKSPACE_DIR") + } + + out, err := g.runBasicCommand(ctx, "datasets", datasetRequest{ + Input: "{}", + Workspace: workspace, + DatasetToolRepo: g.globalOpts.DatasetToolRepo, + }) + + if err != nil { + return nil, err + } + + if strings.HasPrefix(out, "ERROR:") { + return nil, fmt.Errorf(out) + } + + var datasets []DatasetMeta + if err = json.Unmarshal([]byte(out), &datasets); err != nil { + return nil, err + } + return datasets, nil +} + +func (g *GPTScript) CreateDataset(ctx context.Context, workspace, name, description string) (Dataset, error) { + if workspace == "" { + workspace = os.Getenv("GPTSCRIPT_WORKSPACE_DIR") + } + + args := createDatasetArgs{ + Name: name, + Description: description, + } + argsJSON, err := json.Marshal(args) + if err != nil { + return Dataset{}, fmt.Errorf("failed to marshal dataset args: %w", err) + } + + out, err := g.runBasicCommand(ctx, "datasets/create", datasetRequest{ + Input: string(argsJSON), + Workspace: workspace, + DatasetToolRepo: g.globalOpts.DatasetToolRepo, + }) + + if err != nil { + return Dataset{}, err + } + + if strings.HasPrefix(out, "ERROR:") { + return Dataset{}, fmt.Errorf(out) + } + + var dataset Dataset + if err = json.Unmarshal([]byte(out), &dataset); err != nil { + return Dataset{}, err + } + return dataset, nil +} + +func (g *GPTScript) AddDatasetElement(ctx context.Context, workspace, datasetID, elementName, elementDescription, elementContent string) (DatasetElementMeta, error) { + if workspace == "" { + workspace = os.Getenv("GPTSCRIPT_WORKSPACE_DIR") + } + + args := addDatasetElementArgs{ + DatasetID: datasetID, + ElementName: elementName, + ElementDescription: elementDescription, + ElementContent: elementContent, + } + argsJSON, err := json.Marshal(args) + if err != nil { + return DatasetElementMeta{}, fmt.Errorf("failed to marshal element args: %w", err) + } + + out, err := g.runBasicCommand(ctx, "datasets/add-element", datasetRequest{ + Input: string(argsJSON), + Workspace: workspace, + DatasetToolRepo: g.globalOpts.DatasetToolRepo, + }) + + if err != nil { + return DatasetElementMeta{}, err + } + + if strings.HasPrefix(out, "ERROR:") { + return DatasetElementMeta{}, fmt.Errorf(out) + } + + var element DatasetElementMeta + if err = json.Unmarshal([]byte(out), &element); err != nil { + return DatasetElementMeta{}, err + } + return element, nil +} + +func (g *GPTScript) ListDatasetElements(ctx context.Context, workspace, datasetID string) ([]DatasetElementMeta, error) { + if workspace == "" { + workspace = os.Getenv("GPTSCRIPT_WORKSPACE_DIR") + } + + args := listDatasetElementArgs{ + DatasetID: datasetID, + } + argsJSON, err := json.Marshal(args) + if err != nil { + return nil, fmt.Errorf("failed to marshal element args: %w", err) + } + + out, err := g.runBasicCommand(ctx, "datasets/list-elements", datasetRequest{ + Input: string(argsJSON), + Workspace: workspace, + DatasetToolRepo: g.globalOpts.DatasetToolRepo, + }) + + if err != nil { + return nil, err + } + + if strings.HasPrefix(out, "ERROR:") { + return nil, fmt.Errorf(out) + } + + var elements []DatasetElementMeta + if err = json.Unmarshal([]byte(out), &elements); err != nil { + return nil, err + } + return elements, nil +} + +func (g *GPTScript) GetDatasetElement(ctx context.Context, workspace, datasetID, elementName string) (DatasetElement, error) { + if workspace == "" { + workspace = os.Getenv("GPTSCRIPT_WORKSPACE_DIR") + } + + args := getDatasetElementArgs{ + DatasetID: datasetID, + Element: elementName, + } + argsJSON, err := json.Marshal(args) + if err != nil { + return DatasetElement{}, fmt.Errorf("failed to marshal element args: %w", err) + } + + out, err := g.runBasicCommand(ctx, "datasets/get-element", datasetRequest{ + Input: string(argsJSON), + Workspace: workspace, + DatasetToolRepo: g.globalOpts.DatasetToolRepo, + }) + + if err != nil { + return DatasetElement{}, err + } + + if strings.HasPrefix(out, "ERROR:") { + return DatasetElement{}, fmt.Errorf(out) + } + + var element DatasetElement + if err = json.Unmarshal([]byte(out), &element); err != nil { + return DatasetElement{}, err + } + + return element, nil +} + func (g *GPTScript) runBasicCommand(ctx context.Context, requestPath string, body any) (string, error) { run := &Run{ url: g.globalOpts.URL, diff --git a/gptscript_test.go b/gptscript_test.go index ec4419c..0ad46a9 100644 --- a/gptscript_test.go +++ b/gptscript_test.go @@ -1560,3 +1560,46 @@ func TestCredentials(t *testing.T) { require.Error(t, err) require.True(t, errors.As(err, &ErrNotFound{})) } + +func TestDatasets(t *testing.T) { + workspace, err := os.MkdirTemp("/tmp", "go-gptscript-test") + require.NoError(t, err) + defer func() { + _ = os.RemoveAll(workspace) + }() + + // Create a dataset + dataset, err := g.CreateDataset(context.Background(), workspace, "test-dataset", "This is a test dataset") + require.NoError(t, err) + require.Equal(t, "test-dataset", dataset.Name) + require.Equal(t, "This is a test dataset", dataset.Description) + require.Equal(t, 0, len(dataset.Elements)) + + // Add an element + elementMeta, err := g.AddDatasetElement(context.Background(), workspace, dataset.ID, "test-element", "This is a test element", "This is the content") + require.NoError(t, err) + require.Equal(t, "test-element", elementMeta.Name) + require.Equal(t, "This is a test element", elementMeta.Description) + + // Get the element + element, err := g.GetDatasetElement(context.Background(), workspace, dataset.ID, "test-element") + require.NoError(t, err) + require.Equal(t, "test-element", element.Name) + require.Equal(t, "This is a test element", element.Description) + require.Equal(t, "This is the content", element.Contents) + + // List elements in the dataset + elements, err := g.ListDatasetElements(context.Background(), workspace, dataset.ID) + require.NoError(t, err) + require.Equal(t, 1, len(elements)) + require.Equal(t, "test-element", elements[0].Name) + require.Equal(t, "This is a test element", elements[0].Description) + + // List datasets + datasets, err := g.ListDatasets(context.Background(), workspace) + require.NoError(t, err) + require.Equal(t, 1, len(datasets)) + require.Equal(t, "test-dataset", datasets[0].Name) + require.Equal(t, "This is a test dataset", datasets[0].Description) + require.Equal(t, dataset.ID, datasets[0].ID) +} diff --git a/opts.go b/opts.go index 779dcb9..18cec91 100644 --- a/opts.go +++ b/opts.go @@ -11,6 +11,7 @@ type GlobalOptions struct { DefaultModelProvider string `json:"DefaultModelProvider"` CacheDir string `json:"CacheDir"` Env []string `json:"env"` + DatasetToolRepo string `json:"DatasetToolRepo"` } func (g GlobalOptions) toEnv() []string { From e1adafa2989fc3fb33a52849948289631e6b9686 Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Wed, 9 Oct 2024 23:36:35 -0400 Subject: [PATCH 2/9] fix linter issue Signed-off-by: Grant Linville --- gptscript.go | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/gptscript.go b/gptscript.go index 5c4a7a7..6178c67 100644 --- a/gptscript.go +++ b/gptscript.go @@ -7,6 +7,7 @@ import ( "context" "encoding/base64" "encoding/json" + "errors" "fmt" "io" "log/slog" @@ -406,7 +407,7 @@ func (g *GPTScript) ListDatasets(ctx context.Context, workspace string) ([]Datas } if strings.HasPrefix(out, "ERROR:") { - return nil, fmt.Errorf(out) + return nil, errors.New(out) } var datasets []DatasetMeta @@ -441,7 +442,7 @@ func (g *GPTScript) CreateDataset(ctx context.Context, workspace, name, descript } if strings.HasPrefix(out, "ERROR:") { - return Dataset{}, fmt.Errorf(out) + return Dataset{}, errors.New(out) } var dataset Dataset @@ -478,7 +479,7 @@ func (g *GPTScript) AddDatasetElement(ctx context.Context, workspace, datasetID, } if strings.HasPrefix(out, "ERROR:") { - return DatasetElementMeta{}, fmt.Errorf(out) + return DatasetElementMeta{}, errors.New(out) } var element DatasetElementMeta @@ -512,7 +513,7 @@ func (g *GPTScript) ListDatasetElements(ctx context.Context, workspace, datasetI } if strings.HasPrefix(out, "ERROR:") { - return nil, fmt.Errorf(out) + return nil, errors.New(out) } var elements []DatasetElementMeta @@ -547,7 +548,7 @@ func (g *GPTScript) GetDatasetElement(ctx context.Context, workspace, datasetID, } if strings.HasPrefix(out, "ERROR:") { - return DatasetElement{}, fmt.Errorf(out) + return DatasetElement{}, errors.New(out) } var element DatasetElement From f4ed456319d1c957b7e9c134fc53be2ff11d32e0 Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Thu, 10 Oct 2024 12:43:04 -0400 Subject: [PATCH 3/9] options fix Signed-off-by: Grant Linville --- opts.go | 1 + 1 file changed, 1 insertion(+) diff --git a/opts.go b/opts.go index 18cec91..283b4ec 100644 --- a/opts.go +++ b/opts.go @@ -42,6 +42,7 @@ func completeGlobalOptions(opts ...GlobalOptions) GlobalOptions { result.OpenAIBaseURL = firstSet(opt.OpenAIBaseURL, result.OpenAIBaseURL) result.DefaultModel = firstSet(opt.DefaultModel, result.DefaultModel) result.DefaultModelProvider = firstSet(opt.DefaultModelProvider, result.DefaultModelProvider) + result.DatasetToolRepo = firstSet(opt.DatasetToolRepo, result.DatasetToolRepo) result.Env = append(result.Env, opt.Env...) } return result From be538570b2c8adf1de542d66409c7d8947f0afcd Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Thu, 10 Oct 2024 14:08:14 -0400 Subject: [PATCH 4/9] use snake case Signed-off-by: Grant Linville --- datasets.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datasets.go b/datasets.go index 38ce686..00935d1 100644 --- a/datasets.go +++ b/datasets.go @@ -25,7 +25,7 @@ type Dataset struct { type datasetRequest struct { Input string `json:"input"` Workspace string `json:"workspace"` - DatasetToolRepo string `json:"datasetToolRepo"` + DatasetToolRepo string `json:"dataset_tool_repo"` } type createDatasetArgs struct { From 8f0fa05fc0f54e5d9d11fd8e5fca21bae828f8d1 Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Thu, 10 Oct 2024 14:36:47 -0400 Subject: [PATCH 5/9] fix dataset type Signed-off-by: Grant Linville --- datasets.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/datasets.go b/datasets.go index 00935d1..e85c87a 100644 --- a/datasets.go +++ b/datasets.go @@ -18,8 +18,8 @@ type DatasetMeta struct { type Dataset struct { DatasetMeta `json:",inline"` - BaseDir string `json:"baseDir,omitempty"` - Elements map[string]DatasetElement `json:"elements"` + BaseDir string `json:"baseDir,omitempty"` + Elements map[string]DatasetElementMeta `json:"elements"` } type datasetRequest struct { From db10ae9d003ca370ca16fdc39632b3dbf8ba3b46 Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Thu, 10 Oct 2024 16:10:12 -0400 Subject: [PATCH 6/9] use camelCase Signed-off-by: Grant Linville --- datasets.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/datasets.go b/datasets.go index e85c87a..45b665b 100644 --- a/datasets.go +++ b/datasets.go @@ -25,26 +25,26 @@ type Dataset struct { type datasetRequest struct { Input string `json:"input"` Workspace string `json:"workspace"` - DatasetToolRepo string `json:"dataset_tool_repo"` + DatasetToolRepo string `json:"datasetToolRepo"` } type createDatasetArgs struct { - Name string `json:"dataset_name"` - Description string `json:"dataset_description"` + Name string `json:"datasetName"` + Description string `json:"datasetDescription"` } type addDatasetElementArgs struct { - DatasetID string `json:"dataset_id"` - ElementName string `json:"element_name"` - ElementDescription string `json:"element_description"` - ElementContent string `json:"element_content"` + DatasetID string `json:"datasetID"` + ElementName string `json:"elementName"` + ElementDescription string `json:"elementDescription"` + ElementContent string `json:"elementContent"` } type listDatasetElementArgs struct { - DatasetID string `json:"dataset_id"` + DatasetID string `json:"datasetID"` } type getDatasetElementArgs struct { - DatasetID string `json:"dataset_id"` + DatasetID string `json:"datasetID"` Element string `json:"element"` } From cbf6d34ca96afcf842c0faa2c8e8269fc80ad798 Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Fri, 11 Oct 2024 09:30:36 -0400 Subject: [PATCH 7/9] fix test Signed-off-by: Grant Linville --- gptscript_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gptscript_test.go b/gptscript_test.go index 0ad46a9..2c203ee 100644 --- a/gptscript_test.go +++ b/gptscript_test.go @@ -670,7 +670,7 @@ func TestParseToolWithTextNode(t *testing.T) { t.Fatalf("No text node found") } - if tools[1].TextNode.Text != "hello\n" { + if strings.TrimSpace(tools[1].TextNode.Text) != "hello" { t.Errorf("Unexpected text: %s", tools[1].TextNode.Text) } if tools[1].TextNode.Fmt != "markdown" { From eb67973a6441c28f630b42267290bf3a1fddd7c3 Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Fri, 11 Oct 2024 09:34:28 -0400 Subject: [PATCH 8/9] fix temp dir Signed-off-by: Grant Linville --- gptscript_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gptscript_test.go b/gptscript_test.go index 2c203ee..2a19e60 100644 --- a/gptscript_test.go +++ b/gptscript_test.go @@ -1562,7 +1562,7 @@ func TestCredentials(t *testing.T) { } func TestDatasets(t *testing.T) { - workspace, err := os.MkdirTemp("/tmp", "go-gptscript-test") + workspace, err := os.MkdirTemp("", "go-gptscript-test") require.NoError(t, err) defer func() { _ = os.RemoveAll(workspace) From 789a04122e10e436f3ed5eec4966cbf86226150c Mon Sep 17 00:00:00 2001 From: Grant Linville Date: Fri, 11 Oct 2024 09:41:31 -0400 Subject: [PATCH 9/9] make confirm test more lenient Signed-off-by: Grant Linville --- gptscript_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gptscript_test.go b/gptscript_test.go index 2a19e60..eb471c8 100644 --- a/gptscript_test.go +++ b/gptscript_test.go @@ -1047,7 +1047,7 @@ func TestConfirmDeny(t *testing.T) { return } - if !strings.Contains(confirmCallEvent.Input, "\"ls\"") { + if !strings.Contains(confirmCallEvent.Input, "ls") { t.Errorf("unexpected confirm input: %s", confirmCallEvent.Input) }