Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add unit testcases for files in Text format. #2274

Merged
merged 10 commits into from
Mar 12, 2024
Original file line number Diff line number Diff line change
Expand Up @@ -23,22 +23,23 @@ import (
"testing"
"time"

"github.com/google/go-cmp/cmp"
commonv1beta1 "github.com/kubeflow/katib/pkg/apis/controller/common/v1beta1"
v1beta1 "github.com/kubeflow/katib/pkg/apis/manager/v1beta1"
"github.com/kubeflow/katib/pkg/controller.v1beta1/consts"
)

var testJsonDataPath = filepath.Join("testdata", "JSON")

func TestCollectObservationLog(t *testing.T) {
var (
testJsonDataPath = filepath.Join("testdata", "JSON")
testTextDataPath = filepath.Join("testdata", "TEXT")
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
)

if err := generateTestFiles(); err != nil {
func TestCollectJsonObservationLog(t *testing.T) {
if err := generateJsonTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(filepath.Dir(testJsonDataPath))

Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
// TODO (tenzen-y): We should add tests for logs in TEXT format.
// Ref: https://github.com/kubeflow/katib/issues/1756
testCases := []struct {
description string
filePath string
Expand Down Expand Up @@ -167,7 +168,131 @@ func TestCollectObservationLog(t *testing.T) {
}
}

func generateTestFiles() error {
func TestCollectTextObservationLog(t *testing.T) {
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
if err := generateTextTestFiles(); err != nil {
t.Fatal(err)
}
defer os.RemoveAll(filepath.Dir(testTextDataPath))

testCases := map[string]struct {
filePath string
metrics []string
filters []string
fileFormat commonv1beta1.FileFormat
err bool
expected *v1beta1.ObservationLog
}{
"Positive case for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "good.log"),
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As we mentioned in the issue, our motivation is to verify if this filter works fine to avoid bugs like this: #1754

We should add unit test for our File Metrics Collector to verify log parsing.
That will help to avoid issues similar to: #1754

So, could you add more than valid and invalid cases?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, I'll do it later.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some new test cases similar to bugs in #1754 have been added to the test file.

fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.8078",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.5183",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.6752",
},
},
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.3634",
},
},
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.8671",
},
},
},
},
},
"Invalid file name": {
filePath: "invalid",
err: true,
},
"Invalid file format": {
filePath: filepath.Join(testTextDataPath, "good.log"),
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
fileFormat: "invalid",
err: true,
},
"Invalid timestamp for logs in TEXT format": {
filePath: filepath.Join(testTextDataPath, "invalid-timestamp.log"),
metrics: []string{"accuracy", "loss"},
filters: []string{"{metricName: ([\\w|-]+), metricValue: ((-?\\d+)(\\.\\d+)?)}"},
fileFormat: commonv1beta1.TextFormat,
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: "2024-03-04T17:55:08Z",
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: "0.6752",
},
},
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "loss",
Value: "0.3634",
},
},
},
},
},
"Missing objective metric in training logs": {
filePath: filepath.Join(testTextDataPath, "missing-objective-metric.log"),
fileFormat: commonv1beta1.TextFormat,
metrics: []string{"accuracy", "loss"},
expected: &v1beta1.ObservationLog{
MetricLogs: []*v1beta1.MetricLog{
{
TimeStamp: time.Time{}.UTC().Format(time.RFC3339),
Metric: &v1beta1.Metric{
Name: "accuracy",
Value: consts.UnavailableMetricValue,
},
},
},
},
},
}

for name, test := range testCases {
t.Run(name, func(t *testing.T) {
actual, err := CollectObservationLog(test.filePath, test.metrics, test.filters, test.fileFormat)
if (err != nil) != test.err {
t.Errorf("\nGOT: \n%v\nWANT: %v\n", err, test.err)
} else {
if cmp.Diff(actual, test.expected) != "" {
t.Errorf("Expected %v\n got %v", test.expected, actual)
}
}
})
}
}

func generateJsonTestFiles() error {
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
if _, err := os.Stat(testJsonDataPath); err != nil {
if err = os.MkdirAll(testJsonDataPath, 0700); err != nil {
return err
Expand Down Expand Up @@ -215,3 +340,43 @@ func generateTestFiles() error {

return nil
}

func generateTextTestFiles() error {
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
if _, err := os.Stat(testTextDataPath); err != nil {
if err = os.MkdirAll(testTextDataPath, 0700); err != nil {
return err
}
}
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we move directory creation outside of these functions and create it before we invoke them ?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both are okay. But keeping them in generateXXXTestFiles will make the code look more tidy.

Copy link
Member

@andreyvelich andreyvelich Mar 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's true, but since we always run these functions sequentially why do we need to have duplicate code to create directories in each of them ?
Maybe we could have another function called: generateTestDirectory which will create testdata dir.
WDYT @Electronic-Waste @tenzen-y ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andreyvelich That sounds reasonable. Having separate function wouldn't be wroth it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I meant that consolidating 2 generators into 1 would be better.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, that sounds reasonable. I'll modify it soon.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@andreyvelich @tenzen-y I also integrate os.RemoveAll operations into the func deleteTestDirs(). plz cc👀


testData := []struct {
fileName string
data string
}{
{
fileName: "good.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.8078};{metricName: loss, metricValue: 0.5183}
2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
{metricName: loss, metricValue: 0.8671}`,
Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
},
{
fileName: "invalid-timestamp.log",
data: `2024-03-04T17:55:08Z INFO {metricName: accuracy, metricValue: 0.6752}
invalid INFO {metricName: loss, metricValue: 0.3634}`,
},
{
fileName: "missing-objective-metric.log",
data: `2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.3634}
2024-03-04T17:55:08Z INFO {metricName: loss, metricValue: 0.8671}`,
},
}

for _, td := range testData {
filePath := filepath.Join(testTextDataPath, td.fileName)
if err := os.WriteFile(filePath, []byte(td.data), 0600); err != nil {
return err
}
}

Electronic-Waste marked this conversation as resolved.
Show resolved Hide resolved
return nil
}
Loading