-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Core: Unimplement Map from CharSequenceMap to obey contract
`CharSequenceMap` did not follow `Map` contract fully - `Map.keySet` is supposed to be a view, but `CharSequenceMap` returned a copy. Fixing this one can be hard given how `CharSequenceMap` is implemented internally. - `Map.equals` should compare maps by value. I.e. different map implementations with same key/value pairs are supposed to be equal. `CharSequenceMap` could certainly implement this correctly, but it does not today. The benefit of doing so is unclear and time necessary to do so correctly is non-negligible (we would need to define semantics what to when `CharSequenceMap` is compared to a map with more keys, but such that each key compared as char sequences is equal to some key in the `CharSequenceMap`). Also, having a Map implementation that does not compare the keys by equality is error-prone: - it's likely that new default Map interface methods assume equality-based comparison semantics. JDK maintains non-equality based Map implementation (Comparator-based and Identity-based), and it surely doesn't go without maintenance burden (and not without bugs either, e.g. https://bugs.openjdk.org/browse/JDK-8178355) - it's not unlikely someone tries to make a defensive copy of a map with `ImmutableMap.copyOf` or `Map.copyOf`, resulting in a map with different semantics. Instead of fixing the `CharSequenceMap` to obey the `Map` contract, it seems more pragmatic to just stop implementing the `Map` interface, which this commit does, resulting in a simple to understand class contract and behavior. This commit provides a copy of `CharSequenceMap`, `PathMap`, with `Map` interface removed. Additional positive side-effects of the change - less code and easier to understand contracts - strongly typed `get` and `containsKey` methods (the need to accept `Object` in these was forced by `Map` interface). `CharSequenceSet` has similar issues, to be addressed in a follow-up. Rename CharSequenceMap to PathMap Restore original CharSequenceMap Deprecate CharSequenceMap
- Loading branch information
Showing
7 changed files
with
422 additions
and
18 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.iceberg.util; | ||
|
||
import java.io.Serializable; | ||
import java.util.Collection; | ||
import java.util.List; | ||
import java.util.Map; | ||
import java.util.Map.Entry; | ||
import java.util.Objects; | ||
import java.util.function.Supplier; | ||
import java.util.stream.Collectors; | ||
import java.util.stream.Stream; | ||
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects; | ||
import org.apache.iceberg.relocated.com.google.common.collect.Maps; | ||
|
||
/** | ||
* A map-like data structure that uses char sequences as keys and compares them by char sequence | ||
* value they represent. | ||
* | ||
* <p>This implementation wraps provided keys into {@link CharSequenceWrapper} for consistent | ||
* hashing and equals behavior. This ensures that objects of different types that represent the same | ||
* sequence of characters are treated as equal keys in the map. | ||
* | ||
* <p>Note: CharSequenceMap is not designed for concurrent modification by multiple threads. | ||
* However, it supports safe concurrent reads, assuming there are no concurrent writes. | ||
* | ||
* <p>Note: CharSequenceMap does not support null keys. | ||
* | ||
* @param <V> the type of values | ||
*/ | ||
public class PathMap<V> implements Serializable { | ||
|
||
private static final long serialVersionUID = 1L; | ||
private static final ThreadLocal<CharSequenceWrapper> WRAPPERS = | ||
ThreadLocal.withInitial(() -> CharSequenceWrapper.wrap(null)); | ||
|
||
private final Map<CharSequenceWrapper, V> wrapperMap; | ||
|
||
private PathMap() { | ||
this.wrapperMap = Maps.newHashMap(); | ||
} | ||
|
||
public static <T> PathMap<T> create() { | ||
return new PathMap<>(); | ||
} | ||
|
||
public int size() { | ||
return wrapperMap.size(); | ||
} | ||
|
||
public boolean isEmpty() { | ||
return wrapperMap.isEmpty(); | ||
} | ||
|
||
public boolean containsKey(CharSequence key) { | ||
CharSequenceWrapper wrapper = WRAPPERS.get(); | ||
boolean result = wrapperMap.containsKey(wrapper.set(key)); | ||
wrapper.set(null); // don't hold a reference to the key | ||
return result; | ||
} | ||
|
||
public V get(CharSequence key) { | ||
CharSequenceWrapper wrapper = WRAPPERS.get(); | ||
V result = wrapperMap.get(wrapper.set(key)); | ||
wrapper.set(null); // don't hold a reference to the value | ||
return result; | ||
} | ||
|
||
public V getOrDefault(CharSequence filePath, V defaultValue) { | ||
return MoreObjects.firstNonNull(get(filePath), defaultValue); | ||
} | ||
|
||
public V put(CharSequence key, V value) { | ||
return wrapperMap.put(CharSequenceWrapper.wrap(key), value); | ||
} | ||
|
||
public V remove(CharSequence key) { | ||
CharSequenceWrapper wrapper = WRAPPERS.get(); | ||
V result = wrapperMap.remove(wrapper.set(key)); | ||
wrapper.set(null); // don't hold a reference to the value | ||
return result; | ||
} | ||
|
||
public void clear() { | ||
wrapperMap.clear(); | ||
} | ||
|
||
public List<CharSequence> keys() { | ||
return wrapperMap.keySet().stream().map(CharSequenceWrapper::get).collect(Collectors.toList()); | ||
} | ||
|
||
public Collection<V> values() { | ||
return wrapperMap.values(); | ||
} | ||
|
||
public Stream<Entry<CharSequence, V>> entries() { | ||
return wrapperMap.entrySet().stream() | ||
.map(entry -> Maps.immutableEntry(entry.getKey().get(), entry.getValue())); | ||
} | ||
|
||
public V computeIfAbsent(CharSequence key, Supplier<V> valueSupplier) { | ||
return wrapperMap.computeIfAbsent( | ||
CharSequenceWrapper.wrap(key), ignored -> valueSupplier.get()); | ||
} | ||
|
||
@Override | ||
public boolean equals(Object other) { | ||
if (this == other) { | ||
return true; | ||
} else if (other == null || getClass() != other.getClass()) { | ||
return false; | ||
} | ||
|
||
PathMap<?> that = (PathMap<?>) other; | ||
return Objects.equals(wrapperMap, that.wrapperMap); | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
return Objects.hashCode(wrapperMap); | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return entries().map(this::toString).collect(Collectors.joining(", ", "{", "}")); | ||
} | ||
|
||
private String toString(Entry<CharSequence, V> entry) { | ||
CharSequence key = entry.getKey(); | ||
V value = entry.getValue(); | ||
return key + "=" + (value == this ? "(this Map)" : value); | ||
} | ||
} |
214 changes: 214 additions & 0 deletions
214
api/src/test/java/org/apache/iceberg/util/TestPathMap.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,214 @@ | ||
/* | ||
* Licensed to the Apache Software Foundation (ASF) under one | ||
* or more contributor license agreements. See the NOTICE file | ||
* distributed with this work for additional information | ||
* regarding copyright ownership. The ASF licenses this file | ||
* to you under the Apache License, Version 2.0 (the | ||
* "License"); you may not use this file except in compliance | ||
* with the License. You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, | ||
* software distributed under the License is distributed on an | ||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
* KIND, either express or implied. See the License for the | ||
* specific language governing permissions and limitations | ||
* under the License. | ||
*/ | ||
package org.apache.iceberg.util; | ||
|
||
import static org.assertj.core.api.Assertions.assertThat; | ||
|
||
import java.util.concurrent.ExecutorService; | ||
import java.util.concurrent.Executors; | ||
import java.util.concurrent.TimeUnit; | ||
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; | ||
import org.assertj.core.api.AbstractObjectAssert; | ||
import org.junit.jupiter.api.Test; | ||
|
||
public class TestPathMap { | ||
|
||
@Test | ||
public void nullString() { | ||
assertMap(PathMap.create()).doesNotContainKey(null); | ||
assertMap(PathMap.create()).doesNotContainValue(null); | ||
} | ||
|
||
@Test | ||
public void testEmptyMap() { | ||
PathMap<String> map = PathMap.create(); | ||
assertMap(map).isEmpty(); | ||
assertMap(map).hasSize(0); | ||
assertMap(map).doesNotContainKey("key"); | ||
assertMap(map).doesNotContainValue("value"); | ||
assertThat(map.values()).isEmpty(); | ||
assertThat(map.keys()).isEmpty(); | ||
assertThat(map.entries()).isEmpty(); | ||
} | ||
|
||
@Test | ||
public void testDifferentCharSequenceImplementations() { | ||
PathMap<String> map = PathMap.create(); | ||
map.put("abc", "value1"); | ||
map.put(new StringBuffer("def"), "value2"); | ||
assertMap(map).containsEntry(new StringBuilder("abc"), "value1"); | ||
assertMap(map).containsEntry("def", "value2"); | ||
} | ||
|
||
@Test | ||
public void testPutAndGet() { | ||
PathMap<String> map = PathMap.create(); | ||
map.put("key1", "value1"); | ||
assertMap(map).containsEntry("key1", "value1"); | ||
} | ||
|
||
@Test | ||
public void testRemove() { | ||
PathMap<String> map = PathMap.create(); | ||
map.put("key1", "value1"); | ||
map.remove(new StringBuilder("key1")); | ||
assertMap(map).doesNotContainKey("key1"); | ||
assertMap(map).isEmpty(); | ||
} | ||
|
||
@Test | ||
public void testClear() { | ||
PathMap<String> map = PathMap.create(); | ||
map.put("key1", "value1"); | ||
map.clear(); | ||
assertMap(map).isEmpty(); | ||
} | ||
|
||
@Test | ||
public void testValues() { | ||
PathMap<String> map = PathMap.create(); | ||
map.put("key1", "value1"); | ||
map.put("key2", "value2"); | ||
assertThat(map.values()).containsAll(ImmutableList.of("value1", "value2")); | ||
} | ||
|
||
@Test | ||
public void testEntrySet() { | ||
PathMap<String> map = PathMap.create(); | ||
map.put("key1", "value1"); | ||
map.put(new StringBuilder("key2"), "value2"); | ||
assertThat(map.entries()).hasSize(2); | ||
} | ||
|
||
@Test | ||
public void testEquals() { | ||
PathMap<String> map1 = PathMap.create(); | ||
map1.put(new StringBuilder("key"), "value"); | ||
|
||
PathMap<String> map2 = PathMap.create(); | ||
map2.put("key", "value"); | ||
|
||
assertThat(map1).isEqualTo(map2); | ||
} | ||
|
||
@Test | ||
public void testHashCode() { | ||
PathMap<String> map1 = PathMap.create(); | ||
map1.put(new StringBuilder("key"), "value"); | ||
|
||
PathMap<String> map2 = PathMap.create(); | ||
map2.put("key", "value"); | ||
|
||
assertThat(map1.hashCode()).isEqualTo(map2.hashCode()); | ||
} | ||
|
||
@Test | ||
public void testToString() { | ||
PathMap<String> map = PathMap.create(); | ||
|
||
// empty map | ||
assertThat(map.toString()).isEqualTo("{}"); | ||
|
||
// single entry | ||
map.put("key1", "value1"); | ||
assertThat(map.toString()).isEqualTo("{key1=value1}"); | ||
|
||
// multiple entries | ||
map.put("key2", "value2"); | ||
map.put("key3", "value3"); | ||
String toStringResult = map.toString(); | ||
assertThat(toStringResult).contains("key1=value1", "key2=value2", "key3=value3"); | ||
} | ||
|
||
@Test | ||
public void testComputeIfAbsent() { | ||
PathMap<String> map = PathMap.create(); | ||
|
||
String result1 = map.computeIfAbsent("key1", () -> "computedValue1"); | ||
assertThat(result1).isEqualTo("computedValue1"); | ||
assertMap(map).containsEntry("key1", "computedValue1"); | ||
|
||
// verify existing key is not affected | ||
String result2 = map.computeIfAbsent("key1", () -> "newValue"); | ||
assertThat(result2).isEqualTo("computedValue1"); | ||
assertMap(map).containsEntry("key1", "computedValue1"); | ||
} | ||
|
||
@Test | ||
public void testConcurrentReadAccess() throws InterruptedException { | ||
PathMap<String> map = PathMap.create(); | ||
map.put("key1", "value1"); | ||
map.put("key2", "value2"); | ||
map.put("key3", "value3"); | ||
|
||
int numThreads = 10; | ||
ExecutorService executorService = Executors.newFixedThreadPool(numThreads); | ||
|
||
// read the map from multiple threads to ensure thread-local wrappers are used | ||
for (int i = 0; i < numThreads; i++) { | ||
executorService.submit( | ||
() -> { | ||
assertThat(map.get("key1")).isEqualTo("value1"); | ||
assertThat(map.get("key2")).isEqualTo("value2"); | ||
assertThat(map.get("key3")).isEqualTo("value3"); | ||
}); | ||
} | ||
|
||
executorService.shutdown(); | ||
assertThat(executorService.awaitTermination(1, TimeUnit.MINUTES)).isTrue(); | ||
} | ||
|
||
private static <T> CharSequenceMapAssert<T> assertMap(PathMap<T> pathMap) { | ||
return new CharSequenceMapAssert<>(pathMap); | ||
} | ||
|
||
private static class CharSequenceMapAssert<T> | ||
extends AbstractObjectAssert<CharSequenceMapAssert<T>, PathMap<T>> { | ||
CharSequenceMapAssert(PathMap<T> pathMap) { | ||
super(pathMap, CharSequenceMapAssert.class); | ||
} | ||
|
||
void doesNotContainKey(CharSequence key) { | ||
assertThat(actual.keys()).doesNotContain(key); | ||
} | ||
|
||
void doesNotContainValue(T value) { | ||
assertThat(actual.values()).doesNotContain(value); | ||
} | ||
|
||
void isEmpty() { | ||
assertThat(actual.isEmpty()).isTrue(); | ||
} | ||
|
||
void hasSize(int size) { | ||
assertThat(actual.size()).isEqualTo(size); | ||
} | ||
|
||
void containsEntry(CharSequence key, T value) { | ||
assertThat(actual.get(key)).isEqualTo(value); | ||
assertThat(actual.entries()) | ||
.satisfiesOnlyOnce( | ||
entry -> { | ||
assertThat(CharSequenceWrapper.wrap(entry.getKey())) | ||
.isEqualTo(CharSequenceWrapper.wrap(key)); | ||
assertThat(entry.getValue()).isEqualTo(value); | ||
}); | ||
} | ||
} | ||
} |
Oops, something went wrong.