Skip to content

Commit

Permalink
Fix more Unicode bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
fmeum committed Nov 5, 2024
1 parent c728269 commit d539093
Show file tree
Hide file tree
Showing 31 changed files with 207 additions and 104 deletions.
2 changes: 2 additions & 0 deletions src/main/java/com/google/devtools/build/lib/analysis/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1485,6 +1485,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/actions:commandline_item",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/net/starlark/java/eval",
"//third_party:jsr305",
Expand All @@ -1501,6 +1502,7 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/actions:artifact_expander",
"//src/main/java/com/google/devtools/build/lib/actions:artifacts",
"//src/main/java/com/google/devtools/build/lib/collect/nestedset",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//third_party:guava",
"//third_party:jsr305",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
import com.google.devtools.build.lib.packages.RuleTransitionData;
import com.google.devtools.build.lib.packages.WorkspaceFactory;
import com.google.devtools.build.lib.starlarkbuildapi.core.Bootstrap;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.vfs.DigestHashFunction;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
Expand Down Expand Up @@ -131,7 +132,10 @@ protected synchronized byte[] getFastDigest(PathFragment path) {

@Override
protected synchronized byte[] getDigest(PathFragment path) {
return getDigestFunction().getHashFunction().hashString(path.toString(), UTF_8).asBytes();
return getDigestFunction()
.getHashFunction()
.hashBytes(StringUnsafe.getInstance().getInternalStringBytes(path.getPathString()))
.asBytes();
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSortedMap.toImmutableSortedMap;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.Comparator.comparing;

import com.github.benmanes.caffeine.cache.Caffeine;
Expand Down Expand Up @@ -146,7 +145,7 @@ protected void computeKey(
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
newDeterministicWriter().writeOutputFile(stream);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

import static com.google.common.collect.ImmutableList.toImmutableList;
import static java.nio.charset.StandardCharsets.ISO_8859_1;
import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
Expand Down Expand Up @@ -213,7 +212,7 @@ public void writeOutputFile(OutputStream out, @Nullable EventHandler eventHandle
public String getFileContents(@Nullable EventHandler eventHandler) throws IOException {
ByteArrayOutputStream stream = new ByteArrayOutputStream();
writeOutputFile(stream, eventHandler);
return stream.toString(UTF_8);
return stream.toString(ISO_8859_1);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.devtools.build.lib.actions.ActionExecutionContext;
import com.google.devtools.build.lib.actions.ActionKeyContext;
Expand All @@ -25,6 +24,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import javax.annotation.Nullable;
import net.starlark.java.eval.Tuple;
Expand All @@ -49,7 +49,8 @@ public LazyWriteNestedSetOfTupleAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents(delimiter).getBytes(UTF_8));
return out ->
out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents(delimiter)));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableSet;
import com.google.devtools.build.lib.actions.ActionExecutionContext;
Expand All @@ -26,6 +25,7 @@
import com.google.devtools.build.lib.collect.nestedset.NestedSet;
import com.google.devtools.build.lib.collect.nestedset.NestedSetBuilder;
import com.google.devtools.build.lib.collect.nestedset.Order;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.Fingerprint;
import java.util.function.Function;
import javax.annotation.Nullable;
Expand Down Expand Up @@ -71,7 +71,7 @@ public LazyWritePathsFileAction(

@Override
public DeterministicWriter newDeterministicWriter(ActionExecutionContext ctx) {
return out -> out.write(getContents().getBytes(UTF_8));
return out -> out.write(StringUnsafe.getInstance().getInternalStringBytes(getContents()));
}

/** Computes the Action key for this action by computing the fingerprint for the file contents. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.AbstractAction;
Expand Down Expand Up @@ -47,7 +47,8 @@ public ImmutableList<SpawnResult> expandTemplate(
final String expandedTemplate =
getExpandedTemplateUnsafe(
templateMetadata.template(), templateMetadata.substitutions(), ctx.getPathResolver());
DeterministicWriter deterministicWriter = out -> out.write(expandedTemplate.getBytes(UTF_8));
DeterministicWriter deterministicWriter =
out -> out.write(expandedTemplate.getBytes(ISO_8859_1));
return ctx.getContext(FileWriteActionContext.class)
.writeOutputToFile(
action,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
// limitations under the License.
package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.UTF_8;
import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -349,10 +349,13 @@ protected void afterExecute(
for (Artifact input : allInputs.toList()) {
usedInputsByMappedPath.put(pathMapper.getMappedExecPathString(input), input);
}
// Bazel encodes file system paths as raw bytes stored in a Latin-1 encoded string, so we need
// to make sure to also decode the unused input list as Latin-1.
try (BufferedReader br =
new BufferedReader(
new InputStreamReader(
getUnusedInputListInputStream(actionExecutionContext, spawnResults), UTF_8))) {
getUnusedInputListInputStream(actionExecutionContext, spawnResults),
ISO_8859_1))) {
String line;
while ((line = br.readLine()) != null) {
line = line.trim();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

package com.google.devtools.build.lib.analysis.actions;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.annotations.VisibleForTesting;
import com.google.devtools.build.lib.actions.Artifact;
import com.google.devtools.build.lib.actions.ArtifactPathResolver;
Expand All @@ -22,16 +24,12 @@
import com.google.devtools.build.lib.vfs.FileSystemUtils;
import com.google.devtools.build.lib.vfs.Path;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import javax.annotation.Nullable;

/** A template that contains text content, or alternatively throws an {@link IOException}. */
@Immutable // all subclasses are immutable
public abstract class Template {

static final Charset DEFAULT_CHARSET = StandardCharsets.UTF_8;

/** We only allow subclasses in this file. */
private Template() {}

Expand Down Expand Up @@ -105,7 +103,8 @@ private static final class ArtifactTemplate extends Template {
public String getContent(ArtifactPathResolver resolver) throws IOException {
Path templatePath = resolver.toPath(templateArtifact);
try {
return FileSystemUtils.readContent(templatePath, DEFAULT_CHARSET);
// Bazel's internal encoding for strings is raw bytes as Latin-1
return FileSystemUtils.readContent(templatePath, ISO_8859_1);
} catch (IOException e) {
throw new IOException(
"failed to load template file '"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -977,13 +977,8 @@ public void expandTemplate(
ImmutableMap.Builder<String, Substitution> substitutionsBuilder = ImmutableMap.builder();
for (Map.Entry<String, String> substitution :
Dict.cast(substitutionsUnchecked, String.class, String.class, "substitutions").entrySet()) {
// Blaze calls ParserInput.fromLatin1 when reading BUILD files, which might
// contain UTF-8 encoded symbols as part of template substitution.
// As a quick fix, the substitution values are corrected before being passed on.
// In the long term, avoiding ParserInput.fromLatin would be a better approach.
substitutionsBuilder.put(
substitution.getKey(),
Substitution.of(substitution.getKey(), convertLatin1ToUtf8(substitution.getValue())));
substitution.getKey(), Substitution.of(substitution.getKey(), substitution.getValue()));
}
if (!Starlark.UNBOUND.equals(computedSubstitutions)) {
for (Substitution substitution : ((TemplateDict) computedSubstitutions).getAll()) {
Expand All @@ -1007,16 +1002,6 @@ public void expandTemplate(
registerAction(action);
}

/**
* Returns the proper UTF-8 representation of a String that was erroneously read using Latin1.
*
* @param latin1 Input string
* @return The input string, UTF8 encoded
*/
private static String convertLatin1ToUtf8(String latin1) {
return new String(latin1.getBytes(StandardCharsets.ISO_8859_1), StandardCharsets.UTF_8);
}

@Override
public Args args(StarlarkThread thread) {
return Args.newArgs(thread.mutability(), getSemantics());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import com.google.common.base.Strings;
import java.nio.charset.Charset;
import static java.nio.charset.StandardCharsets.UTF_8;

import java.util.Base64;

/**
Expand All @@ -26,16 +26,9 @@ public final class BasicHttpAuthenticationEncoder {

private BasicHttpAuthenticationEncoder() {}

/** Encode username and password into a token with given {@link Charset}. */
public static String encode(String username, String password, Charset charset) {
StringBuilder sb = new StringBuilder();
if (!Strings.isNullOrEmpty(username)) {
sb.append(username);
}
sb.append(":");
if (!Strings.isNullOrEmpty(password)) {
sb.append(password);
}
return "Basic " + Base64.getEncoder().encodeToString(sb.toString().getBytes(charset));
/** Encode username and password into a token, encoded using UTF-8. */
public static String encode(String username, String password) {
return "Basic "
+ Base64.getEncoder().encodeToString((username + ":" + password).getBytes(UTF_8));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
// limitations under the License.
package com.google.devtools.build.lib.authandtls;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.auth.Credentials;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
Expand Down Expand Up @@ -58,7 +56,7 @@ public Map<String, List<String>> getRequestMetadata(URI uri) throws IOException
Credential credential = netrc.getCredential(uri.getHost());
if (credential != null) {
String token =
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password(), UTF_8);
BasicHttpAuthenticationEncoder.encode(credential.login(), credential.password());
return ImmutableMap.of("Authorization", ImmutableList.of(token));
} else {
return ImmutableMap.of();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@

package com.google.devtools.build.lib.bazel.bzlmod;

import static java.nio.charset.StandardCharsets.UTF_8;

import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.actions.FileValue;
Expand All @@ -35,6 +34,7 @@
import com.google.devtools.build.skyframe.SkyKey;
import com.google.devtools.build.skyframe.SkyValue;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import javax.annotation.Nullable;
import net.starlark.java.eval.EvalException;
import net.starlark.java.eval.Mutability;
Expand Down Expand Up @@ -138,8 +138,7 @@ private void createVendorFile(Path vendorPath, Path vendorFilePath)
throws VendorFileFunctionException {
try {
vendorPath.createDirectoryAndParents();
byte[] vendorFileContents = VENDOR_FILE_HEADER.getBytes(UTF_8);
FileSystemUtils.writeContent(vendorFilePath, vendorFileContents);
FileSystemUtils.writeContent(vendorFilePath, StandardCharsets.UTF_8, VENDOR_FILE_HEADER);
} catch (IOException e) {
throw new VendorFileFunctionException(
new IOException("error creating VENDOR.bazel file", e), Transience.TRANSIENT);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.
package com.google.devtools.build.lib.bazel.repository;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.base.Strings;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
Expand All @@ -29,7 +31,6 @@
import java.io.File;
import java.io.IOException;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.LinkedHashMap;
import java.util.Map;
import net.starlark.java.eval.Printer;
Expand Down Expand Up @@ -81,7 +82,7 @@ public void afterCommand() {
for (Object resolved : resolvedValues.values()) {
resultBuilder.add(resolved);
}
try (Writer writer = Files.newWriter(new File(resolvedFile), StandardCharsets.UTF_8)) {
try (Writer writer = Files.newWriter(new File(resolvedFile), ISO_8859_1)) {
writer.write(EXPORTED_NAME + " = " + new ValuePrinter().repr(resultBuilder.build()));
} catch (IOException e) {
logger.atWarning().withCause(e).log("IO Error writing to file %s", resolvedFile);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,10 @@ java_library(
"//src/main/java/com/google/devtools/build/lib/skyframe:precomputed_value",
"//src/main/java/com/google/devtools/build/lib/skyframe:repository_mapping_value",
"//src/main/java/com/google/devtools/build/lib/starlarkbuildapi/repository",
"//src/main/java/com/google/devtools/build/lib/unsafe:string",
"//src/main/java/com/google/devtools/build/lib/util",
"//src/main/java/com/google/devtools/build/lib/util:string",
"//src/main/java/com/google/devtools/build/lib/util:string_encoding",
"//src/main/java/com/google/devtools/build/lib/util/io:out-err",
"//src/main/java/com/google/devtools/build/lib/vfs",
"//src/main/java/com/google/devtools/build/lib/vfs:pathfragment",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor;
import com.google.devtools.build.lib.runtime.RepositoryRemoteExecutor.ExecutionResult;
import com.google.devtools.build.lib.skyframe.ActionEnvironmentFunction;
import com.google.devtools.build.lib.unsafe.StringUnsafe;
import com.google.devtools.build.lib.util.OsUtils;
import com.google.devtools.build.lib.util.io.OutErr;
import com.google.devtools.build.lib.vfs.FileSystemUtils;
Expand Down Expand Up @@ -1321,23 +1322,17 @@ private static String renamedStripPrefix(String method, String stripPrefix, Stri
@Param(
name = "legacy_utf8",
named = true,
defaultValue = "True",
defaultValue = "False",
doc =
"""
Encode file content to UTF-8, true by default. Future versions will change \
the default and remove this parameter.
No-op. This parameter is deprecated and will be removed in a future version of \
Bazel.
"""),
})
public void createFile(
Object path, String content, Boolean executable, Boolean legacyUtf8, StarlarkThread thread)
throws RepositoryFunctionException, EvalException, InterruptedException {
StarlarkPath p = getPath(path);
byte[] contentBytes;
if (legacyUtf8) {
contentBytes = content.getBytes(UTF_8);
} else {
contentBytes = content.getBytes(ISO_8859_1);
}
WorkspaceRuleEvent w =
WorkspaceRuleEvent.newFileEvent(
p.toString(),
Expand All @@ -1351,7 +1346,7 @@ public void createFile(
makeDirectories(p.getPath());
p.getPath().delete();
try (OutputStream stream = p.getPath().getOutputStream()) {
stream.write(contentBytes);
stream.write(StringUnsafe.getInstance().getInternalStringBytes(content));
}
if (executable) {
p.getPath().setExecutable(true);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ Builder setQuiet(boolean quiet) {

private static String toString(ByteArrayOutputStream stream) {
try {
return new String(stream.toByteArray(), UTF_8);
return stream.toString(UTF_8);
} catch (IllegalStateException e) {
return "";
}
Expand Down
Loading

0 comments on commit d539093

Please sign in to comment.