Skip to content

Commit

Permalink
Add an IDN helper (#2217)
Browse files Browse the repository at this point in the history
* Add an IDN helper

Add a helper that checks the validity of labels in IDNs.
All organizes TLDs according to the IDNs they support.
  • Loading branch information
weiminyu committed Nov 11, 2023
1 parent cf9c1ec commit 9e3c589
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 4 deletions.
108 changes: 108 additions & 0 deletions core/src/main/java/google/registry/bsa/IdnChecker.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright 2023 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package google.registry.bsa;

import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static com.google.common.collect.Maps.transformValues;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.collect.Sets.SetView;
import google.registry.model.tld.Tld;
import google.registry.model.tld.Tld.TldType;
import google.registry.model.tld.Tlds;
import google.registry.tldconfig.idn.IdnLabelValidator;
import google.registry.tldconfig.idn.IdnTableEnum;
import google.registry.util.Clock;
import javax.inject.Inject;
import org.joda.time.DateTime;

/**
* Checks labels' validity wrt Idns in TLDs enrolled with BSA.
*
* <p>Each instance takes a snapshot of the TLDs at instantiation time, and should be limited to the
* Request scope.
*/
public class IdnChecker {
private static final IdnLabelValidator IDN_LABEL_VALIDATOR = new IdnLabelValidator();

private final ImmutableMap<IdnTableEnum, ImmutableSet<Tld>> idnToTlds;
private final ImmutableSet<Tld> allTlds;

@Inject
IdnChecker(Clock clock) {
this.idnToTlds = getIdnToTldMap(clock.nowUtc());
allTlds = idnToTlds.values().stream().flatMap(ImmutableSet::stream).collect(toImmutableSet());
}

// TODO(11/30/2023): Remove below when new Tld schema is deployed and the `getBsaEnrollStartTime`
// method is no longer hardcoded.
@VisibleForTesting
IdnChecker(ImmutableMap<IdnTableEnum, ImmutableSet<Tld>> idnToTlds) {
this.idnToTlds = idnToTlds;
allTlds = idnToTlds.values().stream().flatMap(ImmutableSet::stream).collect(toImmutableSet());
}

/** Returns all IDNs in which the {@code label} is valid. */
ImmutableSet<IdnTableEnum> getAllValidIdns(String label) {
return idnToTlds.keySet().stream()
.filter(idnTable -> idnTable.getTable().isValidLabel(label))
.collect(toImmutableSet());
}

/**
* Returns the TLDs that support at least one IDN in the {@code idnTables}.
*
* @param idnTables String names of {@link IdnTableEnum} values
*/
public ImmutableSet<Tld> getSupportingTlds(ImmutableSet<String> idnTables) {
return idnTables.stream()
.map(IdnTableEnum::valueOf)
.filter(idnToTlds::containsKey)
.map(idnToTlds::get)
.flatMap(ImmutableSet::stream)
.collect(toImmutableSet());
}

/**
* Returns the TLDs that do not support any IDN in the {@code idnTables}.
*
* @param idnTables String names of {@link IdnTableEnum} values
*/
public SetView<Tld> getForbiddingTlds(ImmutableSet<String> idnTables) {
return Sets.difference(allTlds, getSupportingTlds(idnTables));
}

private static boolean isEnrolledWithBsa(Tld tld, DateTime now) {
DateTime enrollTime = tld.getBsaEnrollStartTime();
return enrollTime != null && enrollTime.isBefore(now);
}

private static ImmutableMap<IdnTableEnum, ImmutableSet<Tld>> getIdnToTldMap(DateTime now) {
ImmutableMultimap.Builder<IdnTableEnum, Tld> idnToTldMap = new ImmutableMultimap.Builder();
Tlds.getTldEntitiesOfType(TldType.REAL).stream()
.filter(tld -> isEnrolledWithBsa(tld, now))
.forEach(
tld -> {
for (IdnTableEnum idn : IDN_LABEL_VALIDATOR.getIdnTablesForTld(tld)) {
idnToTldMap.put(idn, tld);
}
});
return ImmutableMap.copyOf(transformValues(idnToTldMap.build().asMap(), ImmutableSet::copyOf));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -38,14 +38,18 @@ public final class IdnLabelValidator {
public Optional<String> findValidIdnTableForTld(String label, String tldStr) {
String unicodeString = Idn.toUnicode(label);
Tld tld = Tld.get(tldStr); // uses the cache
ImmutableSet<IdnTableEnum> idnTablesForTld = tld.getIdnTables();
ImmutableSet<IdnTableEnum> idnTables =
idnTablesForTld.isEmpty() ? DEFAULT_IDN_TABLES : idnTablesForTld;
ImmutableSet<IdnTableEnum> idnTables = getIdnTablesForTld(tld);
for (IdnTableEnum idnTable : idnTables) {
if (idnTable.getTable().isValidLabel(unicodeString)) {
return Optional.of(idnTable.getTable().getName());
}
}
return Optional.empty();
}

/** Returns the names of the IDN tables supported by a {@code tld}. */
public ImmutableSet<IdnTableEnum> getIdnTablesForTld(Tld tld) {
ImmutableSet<IdnTableEnum> idnTablesForTld = tld.getIdnTables();
return idnTablesForTld.isEmpty() ? DEFAULT_IDN_TABLES : idnTablesForTld;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public URI getPolicy() {
* Returns true if the given label is valid for this IDN table. A label is considered valid if all
* of its codepoints are in the IDN table.
*/
boolean isValidLabel(String label) {
public boolean isValidLabel(String label) {
final int length = label.length();
for (int i = 0; i < length; ) {
int codepoint = label.codePointAt(i);
Expand Down
94 changes: 94 additions & 0 deletions core/src/test/java/google/registry/bsa/IdnCheckerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
// Copyright 2023 The Nomulus Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package google.registry.bsa;

import static com.google.common.truth.Truth.assertThat;
import static google.registry.tldconfig.idn.IdnTableEnum.EXTENDED_LATIN;
import static google.registry.tldconfig.idn.IdnTableEnum.JA;
import static google.registry.tldconfig.idn.IdnTableEnum.UNCONFUSABLE_LATIN;

import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import google.registry.model.tld.Tld;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;

@ExtendWith(MockitoExtension.class)
public class IdnCheckerTest {

@Mock Tld jaonly;
@Mock Tld jandelatin;
@Mock Tld strictlatin;
IdnChecker idnChecker;

@BeforeEach
void setup() {
idnChecker =
new IdnChecker(
ImmutableMap.of(
JA,
ImmutableSet.of(jandelatin, jaonly),
EXTENDED_LATIN,
ImmutableSet.of(jandelatin),
UNCONFUSABLE_LATIN,
ImmutableSet.of(strictlatin)));
}

@Test
void getAllValidIdns_allTlds() {
assertThat(idnChecker.getAllValidIdns("all"))
.containsExactly(EXTENDED_LATIN, JA, UNCONFUSABLE_LATIN);
}

@Test
void getAllValidIdns_notJa() {
assertThat(idnChecker.getAllValidIdns("à")).containsExactly(EXTENDED_LATIN, UNCONFUSABLE_LATIN);
}

@Test
void getAllValidIdns_extendedLatinOnly() {
assertThat(idnChecker.getAllValidIdns("á")).containsExactly(EXTENDED_LATIN);
}

@Test
void getAllValidIdns_jaOnly() {
assertThat(idnChecker.getAllValidIdns("っ")).containsExactly(JA);
}

@Test
void getAllValidIdns_none() {
assertThat(idnChecker.getAllValidIdns("д")).isEmpty();
}

@Test
void getSupportingTlds_singleTld_success() {
assertThat(idnChecker.getSupportingTlds(ImmutableSet.of("EXTENDED_LATIN")))
.containsExactly(jandelatin);
}

@Test
void getSupportingTlds_multiTld_success() {
assertThat(idnChecker.getSupportingTlds(ImmutableSet.of("JA")))
.containsExactly(jandelatin, jaonly);
}

@Test
void getForbiddingTlds_success() {
assertThat(idnChecker.getForbiddingTlds(ImmutableSet.of("JA"))).containsExactly(strictlatin);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,14 @@

package google.registry.tldconfig.idn;

import static com.google.common.truth.Truth.assertThat;
import static com.google.common.truth.Truth8.assertThat;
import static google.registry.persistence.transaction.TransactionManagerFactory.tm;
import static google.registry.testing.DatabaseHelper.createTld;
import static google.registry.testing.DatabaseHelper.persistResource;

import com.google.common.collect.ImmutableSet;
import google.registry.model.tld.Tld;
import google.registry.persistence.transaction.JpaTestExtensions;
import google.registry.persistence.transaction.JpaTestExtensions.JpaIntegrationTestExtension;
import org.junit.jupiter.api.Test;
Expand Down Expand Up @@ -118,4 +121,24 @@ void testPerTldConfig() {
// Extended Latin shouldn't include Japanese characters
assertThat(idnLabelValidator.findValidIdnTableForTld("みんな", "tld")).isEmpty();
}

@Test
void testGetIdnTablesForTld_custom() {
persistResource(
createTld("tld")
.asBuilder()
.setIdnTables(ImmutableSet.of(IdnTableEnum.EXTENDED_LATIN))
.build());
Tld tld = tm().transact(() -> tm().loadByKey(Tld.createVKey("tld")));
assertThat(idnLabelValidator.getIdnTablesForTld(tld))
.containsExactly(IdnTableEnum.EXTENDED_LATIN);
}

@Test
void testGetIdnTablesForTld_default() {
persistResource(createTld("tld").asBuilder().build());
Tld tld = tm().transact(() -> tm().loadByKey(Tld.createVKey("tld")));
assertThat(idnLabelValidator.getIdnTablesForTld(tld))
.containsExactly(IdnTableEnum.EXTENDED_LATIN, IdnTableEnum.JA);
}
}

0 comments on commit 9e3c589

Please sign in to comment.