Skip to content

Commit

Permalink
Modify sort algorithm to treat unresolvable IP addresses as random
Browse files Browse the repository at this point in the history
The underlying issue relates to the "Kansas" problem, where valid IP addresses that don't
have a known GeoIP resolution are assigned the default Kansas lat/long.

This approach is _slightly_ different when compared to the GH issue, but I think it resolves
the issue in a simpler fashion. Rather than incorporate logic that says "if Kansas AND
large accuracy radius", this only looks at accuracy radius because a high radius, set to
900 km here, is as good as "I don't know where this IP comes from".

When we see such a radius, we set the lat/long to null (0,0) -- and this happens for both
client IPs AND server registrations.

The sort logic was modified to detect any null lat/long and create a random, negative
distance weight for these. The result is we sort these to the end of our list. If the
client IP is null, everything gets sorted to the end of the list, so we either follow
a load-aware sorting only (if sort method is distanceAndLoad/adaptive) or completely
random (if sort method is distance). When this happens with a server IP but not the
client IP, that server is sorted behind other servers with a valid distance.

Finally, the Director logs errors when either of these situations occur. On startup,
a director run in OSDF mode spits out the origins/caches that currently have no
GeoIP resolution as a WARNING that mentions the accuracy radius, and does the same for
client requests. This is handled differently for IP addresses that are invalid (perhaps
because they come from a private IP range), with an error that indicates a true null.
  • Loading branch information
jhiemstrawisc committed Aug 8, 2024
1 parent ca1739f commit 9491fce
Show file tree
Hide file tree
Showing 5 changed files with 121 additions and 37 deletions.
2 changes: 2 additions & 0 deletions director/cache_ads.go
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,8 @@ func updateLatLong(ad *server_structs.ServerAd) error {
if !ok {
return errors.New("Failed to create address object from IP")
}
// NOTE: If GeoIP resolution of this address fails, lat/long are set to 0.0 (the null lat/long)
// This causes the server to be sorted to the end of the list whenever the Director requires distance-aware sorting.
lat, long, err := getLatLong(addr)
if err != nil {
return err
Expand Down
80 changes: 56 additions & 24 deletions director/sort.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,9 +152,24 @@ func getLatLong(addr netip.Addr) (lat float64, long float64, err error) {
lat = record.Location.Latitude
long = record.Location.Longitude

// If the lat/long results in null _before_ we've had a chance to potentially set it to null, log a warning.
// There's likely a problem with the GeoIP database or the IP address. Usually this just means the IP address
// comes from a private range.
if lat == 0 && long == 0 {
log.Infof("GeoIP Resolution of the address %s resulted in the nul lat/long.", ip.String())
log.Warningf("GeoIP Resolution of the address %s resulted in the null lat/long. This will result in random server sorting.", ip.String())
}

// MaxMind provides an accuracy radius in kilometers. When it actually has no clue how to resolve a valid, public
// IP, it sets the radius to 1000. If we get a radius of 900 or more (probably even much less than this...), we
// should be very suspicious of the data, and mark it as appearing at the null lat/long (and provide a warning in
// the Director), which also triggers random weighting in our sort algorithms.
if record.Location.AccuracyRadius >= 900 {
log.Warningf("GeoIP resolution of the address %s resulted in a suspiciously large accuracy radius of %d km. "+
"This will be treated as GeoIP resolution failure and result in random server sorting. Setting lat/long to null.", ip.String(), record.Location.AccuracyRadius)
lat = 0
long = 0
}

return
}

Expand All @@ -168,6 +183,15 @@ func getClientLatLong(addr netip.Addr) (coord Coordinate, ok bool) {
return
}

// Any time we end up with a random distance, we flip the weights negative. When this happens,
// we want a multiplier that should double a servers rank to multiply the weight by 0.5, not 2.0
func invertWeightIfNeeded(isRand bool, weight float64) float64 {
if isRand {
return 1 / weight
}
return weight
}

// The all-in-one method to sort serverAds based on Dirtector.CacheSortMethod configuration parameter
// - distance: sort serverAds by the distance between the geolocation of the servers and the client
// - distanceAndLoad: sort serverAds by the distance with gated halving factor (see details in the adaptive method)
Expand Down Expand Up @@ -197,56 +221,64 @@ func sortServerAds(clientAddr netip.Addr, ads []server_structs.ServerAd, availab
for idx, ad := range ads {
switch server_structs.SortType(sortMethod) {
case server_structs.DistanceType:
// If we can't get the client coordinates, then simply use random sort
if !clientCoordOk {
weights[idx] = SwapMap{rand.Float64(), idx}
continue
}
if ad.Latitude == 0 && ad.Longitude == 0 {
// If server lat and long are 0, the server geolocation is unknown
// Below we sort weights in descending order, so we assign negative value here,
// causing them to always be at the end of the sorted list.
weights[idx] = SwapMap{0 - rand.Float64(), idx}
// If either client or ad coordinates are null, the underlying distanceWeight function will return a random weight
weight, isRand := distanceWeight(clientCoord.Lat, clientCoord.Long, ad.Latitude, ad.Longitude, false)
if isRand {
// Guarantee randomly-weighted servers are sorted to the bottom
weights[idx] = SwapMap{0 - weight, idx}
} else {
weights[idx] = SwapMap{distanceWeight(clientCoord.Lat, clientCoord.Long, ad.Latitude, ad.Longitude, false),
idx}
weights[idx] = SwapMap{weight, idx}
}
case server_structs.DistanceAndLoadType:
weight := 1.0
// Distance weight
var distance float64
var isRand bool
if clientCoordOk {
distance := distanceWeight(clientCoord.Lat, clientCoord.Long, ad.Latitude, ad.Longitude, true)
dWeighted := gatedHalvingMultiplier(distance, distanceHalvingThreshold, distanceHalvingFactor)
weight *= dWeighted
distance, isRand = distanceWeight(clientCoord.Lat, clientCoord.Long, ad.Latitude, ad.Longitude, true)
if isRand {
// We'll set weight to negative, but still perform the load weight calculation
// This way, if the client or multiple servers result in null lat/long, we'll still
// prefer whichever server has the lowest load.
weight = 0 - rand.Float64()
} else {
dWeighted := gatedHalvingMultiplier(distance, distanceHalvingThreshold, distanceHalvingFactor)
weight *= dWeighted
}
}

// Load weight
lWeighted := gatedHalvingMultiplier(ad.IOLoad, loadHalvingThreshold, loadHalvingFactor)
weight *= lWeighted

weight *= invertWeightIfNeeded(isRand, lWeighted)
weights[idx] = SwapMap{weight, idx}
case server_structs.AdaptiveType:
weight := 1.0
// Distance weight
var distance float64
var isRand bool
if clientCoordOk {
distance := distanceWeight(clientCoord.Lat, clientCoord.Long, ad.Latitude, ad.Longitude, true)
dWeighted := gatedHalvingMultiplier(distance, distanceHalvingThreshold, distanceHalvingFactor)
weight *= dWeighted
distance, isRand = distanceWeight(clientCoord.Lat, clientCoord.Long, ad.Latitude, ad.Longitude, true)
if isRand {
weight = 0 - rand.Float64()
} else {
dWeighted := gatedHalvingMultiplier(distance, distanceHalvingThreshold, distanceHalvingFactor)
weight *= dWeighted
}
}
// Availability weight
if availabilityMap == nil {
weight *= 1.0
} else if hasObj, ok := availabilityMap[ad.URL.String()]; ok && hasObj {
weight *= 2.0
weight *= invertWeightIfNeeded(isRand, 2.0)
} else if !ok {
weight *= 1.0
} else { // ok but does not have the object
weight *= 0.5
weight *= invertWeightIfNeeded(isRand, 0.5)
}

// Load weight
lWeighted := gatedHalvingMultiplier(ad.IOLoad, loadHalvingThreshold, loadHalvingFactor)
weight *= lWeighted
weight *= invertWeightIfNeeded(isRand, lWeighted)

weights[idx] = SwapMap{weight, idx}
case server_structs.RandomType:
Expand Down
14 changes: 10 additions & 4 deletions director/sort_algorithms.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,18 @@ func distanceOnSphere(lat1 float64, long1 float64, lat2 float64, long2 float64)
// with priority (higher weight is higher priority is lower distance)
//
// Is realD set to true, then return the distance between the two coordinates in miles
func distanceWeight(lat1 float64, long1 float64, lat2 float64, long2 float64, realD bool) float64 {
if realD {
return distanceOnSphere(lat1, long1, lat2, long2) * earthRadiusToMilesFactor
func distanceWeight(lat1 float64, long1 float64, lat2 float64, long2 float64, realD bool) (weight float64, isRand bool) {
// If either coordinate is sitting at null island, return a random weight
isRand = false
if (lat1 == 0.0 && long1 == 0.0) || (lat2 == 0.0 && long2 == 0.0) {
isRand = true
weight = rand.Float64() // technically this returns [0,1)
} else if realD {
weight = distanceOnSphere(lat1, long1, lat2, long2) * earthRadiusToMilesFactor
} else {
return 1 - (distanceOnSphere(lat1, long1, lat2, long2) / math.Pi)
weight = 1 - (distanceOnSphere(lat1, long1, lat2, long2) / math.Pi)
}
return
}

// Given the input value, return a weight [0, 1.0] based on the gated havling of the base weight 1.0.
Expand Down
28 changes: 25 additions & 3 deletions director/sort_algorithms_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,31 @@ import (

func TestDistanceWeight(t *testing.T) {
// Some basic values to test and ensure it returns miles, not radian
assert.Equal(t, 0.0, distanceWeight(43.0753, -89.4114, 43.0753, -89.4114, true))
assert.Equal(t, 69.0, math.Round(distanceWeight(42.0753, -89.4114, 43.0753, -89.4114, true)))
assert.Equal(t, 50.0, math.Round(distanceWeight(43.0753, -90.4114, 43.0753, -89.4114, true)))
d, isRand := distanceWeight(43.0753, -89.4114, 43.0753, -89.4114, true)
assert.False(t, isRand)
assert.Equal(t, 0.0, d)

d, isRand = distanceWeight(42.0753, -89.4114, 43.0753, -89.4114, true)
assert.False(t, isRand)
assert.Equal(t, 69.0, math.Round(d))

d, isRand = distanceWeight(43.0753, -90.4114, 43.0753, -89.4114, true)
assert.False(t, isRand)
assert.Equal(t, 50.0, math.Round(d))

// Test passing null lat long
_, isRand = distanceWeight(0, 0, 0, 0, true)
assert.True(t, isRand)

_, isRand = distanceWeight(42.0753, -89.4114, 0, 0, true)
assert.True(t, isRand)

_, isRand = distanceWeight(0, 0, 43.0753, -89.4114, true)
assert.True(t, isRand)

// Make sure a 0 in both is not mistaken for null
_, isRand = distanceWeight(43.0753, 0, 0, -89.4114, true)
assert.False(t, isRand)
}

func TestGatedHalvingMultiplier(t *testing.T) {
Expand Down
34 changes: 28 additions & 6 deletions director/sort_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,12 @@ func TestSortServerAds(t *testing.T) {
Latitude: -77.8500,
Longitude: 166.6666,
}
nullIslandServer := server_structs.ServerAd{
Name: "Null Island Server",
URL: url.URL{Scheme: "https", Host: "null-cache.org"},
Latitude: 0.0,
Longitude: 0.0,
}

// Mock servers with same geolocation but different loads
serverLoad1 := server_structs.ServerAd{
Expand Down Expand Up @@ -269,6 +275,20 @@ func TestSortServerAds(t *testing.T) {
IOLoad: 60.3,
}

serverLoad5NullLoc := server_structs.ServerAd{
Name: "load5NullLoc",
Latitude: 0.0,
Longitude: 0.0,
IOLoad: 10.0,
}

serverLoad6NullLoc := server_structs.ServerAd{
Name: "load6NullLoc",
Latitude: 0.0,
Longitude: 0.0,
IOLoad: 99.0,
}

// These are listed in order of increasing distance from the clientIP
// However, madison server is overloaded and bigBenServer has very high load
madisonServerHighLoad := server_structs.ServerAd{
Expand All @@ -293,9 +313,9 @@ func TestSortServerAds(t *testing.T) {
}

randAds := []server_structs.ServerAd{madisonServer, sdscServer, bigBenServer, kremlinServer,
daejeonServer, mcMurdoServer}
daejeonServer, mcMurdoServer, nullIslandServer}

randLoadAds := []server_structs.ServerAd{serverLoad4, serverLoad1, serverLoad3, serverLoad2}
randLoadAds := []server_structs.ServerAd{serverLoad6NullLoc, serverLoad4, serverLoad1, serverLoad3, serverLoad2}

randDistanceLoadAds := []server_structs.ServerAd{
madisonServerHighLoad,
Expand All @@ -305,6 +325,8 @@ func TestSortServerAds(t *testing.T) {
kremlinServer,
daejeonServer,
mcMurdoServer,
serverLoad5NullLoc,
serverLoad6NullLoc,
}

// Shuffle so that we don't give the sort function an already-sorted slice!
Expand All @@ -315,7 +337,7 @@ func TestSortServerAds(t *testing.T) {
t.Run("test-distance-sort", func(t *testing.T) {
viper.Set("Director.CacheSortMethod", "distance")
expected := []server_structs.ServerAd{madisonServer, sdscServer, bigBenServer, kremlinServer,
daejeonServer, mcMurdoServer}
daejeonServer, mcMurdoServer, nullIslandServer}
sorted, err := sortServerAds(clientIP, randAds, nil)
require.NoError(t, err)
assert.EqualValues(t, expected, sorted)
Expand All @@ -325,15 +347,15 @@ func TestSortServerAds(t *testing.T) {
// Should return the same ordering as the distance test
viper.Set("Director.CacheSortMethod", "distanceAndLoad")
expected := []server_structs.ServerAd{madisonServer, sdscServer, bigBenServer, kremlinServer,
daejeonServer, mcMurdoServer}
daejeonServer, mcMurdoServer, nullIslandServer}
sorted, err := sortServerAds(clientIP, randAds, nil)
require.NoError(t, err)
assert.EqualValues(t, expected, sorted)
})

t.Run("test-distanceAndLoad-sort-load-only", func(t *testing.T) {
viper.Set("Director.CacheSortMethod", "distanceAndLoad")
expected := []server_structs.ServerAd{serverLoad1, serverLoad2, serverLoad3, serverLoad4}
expected := []server_structs.ServerAd{serverLoad1, serverLoad2, serverLoad3, serverLoad4, serverLoad6NullLoc}
sorted, err := sortServerAds(clientIP, randLoadAds, nil)
require.NoError(t, err)
assert.EqualValues(t, expected, sorted)
Expand All @@ -342,7 +364,7 @@ func TestSortServerAds(t *testing.T) {
t.Run("test-distanceAndLoad-sort-distance-and-load", func(t *testing.T) {
viper.Set("Director.CacheSortMethod", "distanceAndLoad")
expected := []server_structs.ServerAd{chicagoLowload, sdscServer, madisonServerHighLoad, kremlinServer,
daejeonServer, mcMurdoServer, bigBenServerHighLoad}
daejeonServer, mcMurdoServer, bigBenServerHighLoad, serverLoad5NullLoc, serverLoad6NullLoc}
sorted, err := sortServerAds(clientIP, randDistanceLoadAds, nil)
require.NoError(t, err)
assert.EqualValues(t, expected, sorted)
Expand Down

0 comments on commit 9491fce

Please sign in to comment.