Skip to content

Commit

Permalink
Unicode-normalize manifest and file paths before comparing (#71)
Browse files Browse the repository at this point in the history
* Unicode-normalize manifest and file paths before comparing; fixes issue #70

Signed-off-by: David Moles <[email protected]>

* add developer certificate for David Moles

Signed-off-by: David Moles <[email protected]>
  • Loading branch information
dmoles authored Jan 11, 2023
1 parent b549ab9 commit dae9538
Show file tree
Hide file tree
Showing 10 changed files with 133 additions and 59 deletions.
38 changes: 38 additions & 0 deletions developer certificates/David_Moles.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
I, David Moles ([email protected]) freely assign copyright for
my contributions to this project over as described below:


Developer Certificate of Origin
Version 1.1

Copyright (C) 2004, 2006 The Linux Foundation and its contributors.

Everyone is permitted to copy and distribute verbatim copies of this
license document, but changing it is not allowed.


Developer's Certificate of Origin 1.1

By making a contribution to this project, I certify that:

(a) The contribution was created in whole or in part by me and I
have the right to submit it under the open source license
indicated in the file; or

(b) The contribution is based upon previous work that, to the best
of my knowledge, is covered under an appropriate open source
license and I have the right under that license to submit that
work with modifications, whether created in whole or in part
by me, under the same open source license (unless I am
permitted to submit under a different license), as indicated
in the file; or

(c) The contribution was provided directly to me by some other
person who certified (a), (b) or (c) and I have not modified
it.

(d) I understand and agree that this project and the contribution
are public and that a record of the contribution (including all
personal information I submit with it, including my sign-off) is
maintained indefinitely and may be redistributed consistent with
this project or the open source license(s) involved.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.ResourceBundle;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand All @@ -26,23 +27,36 @@ abstract public class AbstractPayloadFileExistsInManifestsVistor extends SimpleF

/**
* constructor must be called before using!
*
*
* @param ignoreHiddenFiles Should hidden files be ignored
*/
public AbstractPayloadFileExistsInManifestsVistor(final boolean ignoreHiddenFiles) {
super();
this.ignoreHiddenFiles = ignoreHiddenFiles;
}

@Override
public FileVisitResult preVisitDirectory(final Path dir, final BasicFileAttributes attrs) throws IOException {
FileVisitResult result = FileVisitResult.CONTINUE;

if(ignoreHiddenFiles && PathUtils.isHidden(dir)){
logger.debug(messages.getString("skipping_hidden_file"), dir);
result = FileVisitResult.SKIP_SUBTREE;
}

return result;
}

/**
* Returns true if the path exists in the provided set of manifest paths, false otherwise.
* @param path The file path.
* @param manifestPaths The manifest paths.
* @return true if the path exists, false otherwise.
*/
protected static boolean inManifest(final Path path, final Set<Path> manifestPaths) {
final String normalizedPath = ManifestVerifier.toNormalizedString(path);
return manifestPaths.stream().anyMatch(
(mp) -> ManifestVerifier.toNormalizedString(mp).equals(normalizedPath)
);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,19 +31,19 @@ public enum ManifestVerifier {; //using enum to enforce singleton
private static final ResourceBundle messages = ResourceBundle.getBundle("MessageBundle");

/**
* Verify that all the files in the payload directory are listed in the payload manifest and
* Verify that all the files in the payload directory are listed in the payload manifest and
* all files listed in all manifests exist.
*
*
* @param bag the bag which contains the manifests to check
* @param ignoreHiddenFiles to include hidden files when checking
*
*
* @throws IOException if there is an error while reading a file from the filesystem
* @throws MaliciousPathException if a path is outside the bag
* @throws InvalidBagitFileFormatException if a manifest is not formatted correctly
* @throws FileNotInPayloadDirectoryException if a file listed in a manifest is not in the payload directory
*/
public static void verifyManifests(final Bag bag, final boolean ignoreHiddenFiles)throws IOException{

final Set<Path> allFilesListedInManifests = getAllFilesListedInManifests(bag);
checkAllFilesListedInManifestExist(allFilesListedInManifests);

Expand All @@ -54,12 +54,21 @@ public static void verifyManifests(final Bag bag, final boolean ignoreHiddenFile
}
}

/**
* Returns the path as a String in {{java.text.Normalizer.Form#NFD}} (canonical) normalized form.
* @param path the path to normalize
* @return String the normalized string
*/
static String toNormalizedString(final Path path) {
return Normalizer.normalize(path.toString(), Normalizer.Form.NFD);
}

/*
* get the full path (absolute) of all the files listed in all the manifests
*/
private static Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOException {
logger.debug(messages.getString("all_files_in_manifests"));

final Set<Path> filesListedInManifests = new HashSet<>();

try(DirectoryStream<Path> directoryStream = Files.newDirectoryStream(bag.getTagFileDir(), new ManifestFilter())){
Expand All @@ -78,7 +87,7 @@ private static Set<Path> getAllFilesListedInManifests(final Bag bag) throws IOEx
*/
private static void checkAllFilesListedInManifestExist(final Set<Path> files) {
logger.info(messages.getString("check_all_files_in_manifests_exist"));

for (final Path file : files) {
if(!Files.exists(file)){
if(existsNormalized(file)){
Expand All @@ -91,21 +100,21 @@ private static void checkAllFilesListedInManifestExist(final Set<Path> files) {
}
}
}

/**
* if a file is parially normalized or of a different normalization then the manifest specifies it will fail the existence test.
* This method checks for that by normalizing what is on disk with the normalized filename and see if they match.
*
*
* @return true if the normalized filename matches one on disk in the specified folder
*/
private static boolean existsNormalized(final Path file){
boolean existsNormalized = false;
final String normalizedFile = Normalizer.normalize(file.toString(), Normalizer.Form.NFD);
final String normalizedFile = toNormalizedString(file);
final Path parent = file.getParent();
if(parent != null){
try(DirectoryStream<Path> files = Files.newDirectoryStream(parent)){
for(final Path fileToCheck : files){
final String normalizedFileToCheck = Normalizer.normalize(fileToCheck.toString(), Normalizer.Form.NFD);
final String normalizedFileToCheck = toNormalizedString(fileToCheck);
if(normalizedFile.equals(normalizedFileToCheck)){
existsNormalized = true;
break;
Expand All @@ -116,7 +125,7 @@ private static boolean existsNormalized(final Path file){
logger.error(messages.getString("error_reading_normalized_file"), parent, normalizedFile, e);
}
}

return existsNormalized;
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package com.github.jscancella.verify.internal;

import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
Expand Down Expand Up @@ -29,7 +30,7 @@ public final class PayloadFileExistsInAllManifestsVistor extends AbstractPayload

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*
*
* @param manifests the set of manifests to check
* @param rootDir the root directory of the bag
* @param ignoreHiddenFiles if the checker should ignore hidden files or not
Expand All @@ -41,16 +42,16 @@ public PayloadFileExistsInAllManifestsVistor(final Set<Manifest> manifests, fina
}

@Override
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs){
public FileVisitResult visitFile(final Path path, final BasicFileAttributes attrs) throws IOException {
if(Files.isRegularFile(path)){
for(final Manifest manifest : manifests){
final Set<Path> relativePaths = manifest
.getEntries().stream()
.map(entry -> entry.getRelativeLocation())
.collect(Collectors.toSet());
final Path relativePath = rootDir.relativize(path);
if(!relativePaths.contains(relativePath)){

if(!inManifest(relativePath, relativePaths)){
final String formattedMessage = messages.getString("file_not_in_manifest_error");
throw new FileNotInManifestException(MessageFormatter.format(formattedMessage, path, manifest.getBagitAlgorithmName()).getMessage());
}
Expand All @@ -59,4 +60,5 @@ public FileVisitResult visitFile(final Path path, final BasicFileAttributes attr
logger.debug(messages.getString("file_in_all_manifests"), path);
return FileVisitResult.CONTINUE;
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public final class PayloadFileExistsInAtLeastOneManifestVistor extends AbstractP

/**
* Implements {@link SimpleFileVisitor} to ensure that the encountered file is in one of the manifests.
*
*
* @param filesListedInManifests the set of files listed in all the manifests
* @param ignoreHiddenFiles if the checker should ignore hidden files or not
*/
Expand All @@ -42,7 +42,7 @@ public FileVisitResult visitFile(final Path path, final BasicFileAttributes attr
logger.debug(messages.getString("skipping_hidden_file"), path);
}
else {
if(Files.isRegularFile(path) && !filesListedInManifests.contains(path.toAbsolutePath())){
if(Files.isRegularFile(path) && !inManifest(path.toAbsolutePath(), filesListedInManifests)){
final String formattedMessage = messages.getString("file_not_in_any_manifest_error");
throw new FileNotInManifestException(MessageFormatter.format(formattedMessage, path).getMessage());
}
Expand Down
Loading

0 comments on commit dae9538

Please sign in to comment.