-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[stability] Survive vips sigabrt #75
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
package main | ||
|
||
import ( | ||
datago "datago/pkg" | ||
"flag" | ||
"fmt" | ||
"os" | ||
"runtime/pprof" | ||
"runtime/trace" | ||
"time" | ||
) | ||
|
||
func main() { | ||
|
||
cropAndResize := flag.Bool("crop_and_resize", false, "Whether to crop and resize the images and masks") | ||
itemFetchBuffer := flag.Int("item_fetch_buffer", 256, "The number of items to pre-load") | ||
itemReadyBuffer := flag.Int("item_ready_buffer", 128, "The number of items ready to be served") | ||
limit := flag.Int("limit", 2000, "The number of items to fetch") | ||
profile := flag.Bool("profile", false, "Whether to profile the code") | ||
source := flag.String("source", os.Getenv("DATAGO_TEST_DB"), "The data source to select on") | ||
|
||
// Parse the flags before setting the configuration values | ||
flag.Parse() | ||
|
||
// Initialize the configuration | ||
config := datago.GetDatagoConfig() | ||
|
||
sourceConfig := datago.GetDefaultSourceDBConfig() | ||
sourceConfig.Sources = *source | ||
|
||
config.ImageConfig = datago.GetDefaultImageTransformConfig() | ||
config.ImageConfig.CropAndResize = *cropAndResize | ||
|
||
config.SourceConfig = sourceConfig | ||
config.PrefetchBufferSize = int32(*itemFetchBuffer) | ||
config.SamplesBufferSize = int32(*itemReadyBuffer) | ||
config.Limit = *limit | ||
|
||
dataroom_client := datago.GetClient(config) | ||
|
||
// Go-routine which will feed the sample data to the workers | ||
// and fetch the next page | ||
startTime := time.Now() // Record the start time | ||
|
||
if *profile { | ||
fmt.Println("Profiling the code") | ||
{ | ||
f, _ := os.Create("trace.out") | ||
// read with go tool trace trace.out | ||
|
||
err := trace.Start(f) | ||
if err != nil { | ||
panic(err) | ||
} | ||
defer trace.Stop() | ||
} | ||
{ | ||
f, _ := os.Create("cpu.prof") | ||
// read with go tool pprof cpu.prof | ||
err := pprof.StartCPUProfile(f) | ||
if err != nil { | ||
panic(err) | ||
} | ||
defer pprof.StopCPUProfile() | ||
} | ||
} | ||
|
||
dataroom_client.Start() | ||
|
||
// Fetch all of the binary payloads as they become available | ||
// NOTE: This is useless, just making sure that we empty the payloads channel | ||
n_samples := 0 | ||
for { | ||
sample := dataroom_client.GetSample() | ||
if sample.ID == "" { | ||
fmt.Println("No more samples") | ||
break | ||
} | ||
n_samples++ | ||
} | ||
|
||
// Cancel the context to kill the goroutines | ||
dataroom_client.Stop() | ||
|
||
// Calculate the elapsed time | ||
elapsedTime := time.Since(startTime) | ||
fps := float64(config.Limit) / elapsedTime.Seconds() | ||
fmt.Printf("Total execution time: %.2f seconds. Samples %d \n", elapsedTime.Seconds(), n_samples) | ||
fmt.Printf("Average throughput: %.2f samples per second \n", fps) | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,11 +29,9 @@ func main() { | |
sourceConfig.Rank = 0 | ||
sourceConfig.WorldSize = 1 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. a binary to stress test the lib in the filesystem path |
||
config.ImageConfig = datago.ImageTransformConfig{ | ||
DefaultImageSize: 1024, | ||
DownsamplingRatio: 32, | ||
CropAndResize: *cropAndResize, | ||
} | ||
config.ImageConfig = datago.GetDefaultImageTransformConfig() | ||
config.ImageConfig.CropAndResize = *cropAndResize | ||
|
||
config.SourceConfig = sourceConfig | ||
config.PrefetchBufferSize = int32(*itemFetchBuffer) | ||
config.SamplesBufferSize = int32(*itemReadyBuffer) | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -51,6 +51,12 @@ func (c *ImageTransformConfig) setDefaults() { | |
c.PreEncodeImages = false | ||
} | ||
|
||
func GetDefaultImageTransformConfig() ImageTransformConfig { | ||
config := ImageTransformConfig{} | ||
config.setDefaults() | ||
return config | ||
} | ||
|
||
// DatagoConfig is the main configuration structure for the datago client | ||
type DatagoConfig struct { | ||
SourceType DatagoSourceType `json:"source_type"` | ||
|
@@ -161,12 +167,8 @@ type DatagoClient struct { | |
|
||
// GetClient is a constructor for the DatagoClient, given a JSON configuration string | ||
func GetClient(config DatagoConfig) *DatagoClient { | ||
// Make sure that the GC is run more often than usual | ||
// VIPS will allocate a lot of memory and we want to make sure that it's released as soon as possible | ||
os.Setenv("GOGC", "10") // Default is 100, we're running it when heap is 10% larger than the last GC | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this was ugly, not required when we properly release the buffers (and it doesn't crash on us) |
||
|
||
// Initialize the vips library | ||
err := os.Setenv("VIPS_DISC_THRESHOLD", "5g") | ||
err := os.Setenv("VIPS_DISC_THRESHOLD", "10g") | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in other terms: never go do disk. Could be changed depending on where this runs, 2TB servers for Photoroom so ram is fine |
||
if err != nil { | ||
log.Panicf("Error setting VIPS_DISC_THRESHOLD: %v", err) | ||
} | ||
|
@@ -255,7 +257,7 @@ func (c *DatagoClient) Start() { | |
if c.imageConfig.CropAndResize { | ||
fmt.Println("Cropping and resizing images") | ||
fmt.Println("Base image size | downsampling ratio | min | max:", c.imageConfig.DefaultImageSize, c.imageConfig.DownsamplingRatio, c.imageConfig.MinAspectRatio, c.imageConfig.MaxAspectRatio) | ||
arAwareTransform = newARAwareTransform(c.imageConfig) | ||
arAwareTransform = GetArAwareTransform(c.imageConfig) | ||
} | ||
|
||
if c.imageConfig.PreEncodeImages { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
a binary to stress test the lib in the vectorDB path