Skip to content

Commit

Permalink
Update the bloom filter from hashed files
Browse files Browse the repository at this point in the history
  • Loading branch information
zapek committed Feb 29, 2024
1 parent 44ff857 commit cbf29b9
Show file tree
Hide file tree
Showing 7 changed files with 77 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package io.xeres.app.database.repository;

import io.xeres.app.database.model.file.File;
import io.xeres.common.id.Sha1Sum;
import jakarta.transaction.Transactional;
import org.springframework.data.jpa.repository.JpaRepository;

Expand All @@ -35,4 +36,6 @@ public interface FileRepository extends JpaRepository<File, Long>
Optional<File> findByNameAndParent(String name, File parent);

Optional<File> findByNameAndParentName(String name, String parentName);

Optional<File> findByHash(Sha1Sum hash);
}
36 changes: 33 additions & 3 deletions app/src/main/java/io/xeres/app/service/file/FileService.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package io.xeres.app.service.file;

import io.xeres.app.configuration.DataDirConfiguration;
import io.xeres.app.crypto.hash.sha1.Sha1MessageDigest;
import io.xeres.app.database.model.file.File;
import io.xeres.app.database.model.share.Share;
Expand Down Expand Up @@ -60,6 +61,8 @@ public class FileService

private final FileRepository fileRepository;

private HashBloomFilter bloomFilter;

private static final String[] ignoredSuffixes = {
".bak",
".sys",
Expand All @@ -78,11 +81,13 @@ public class FileService
"temp."
};

public FileService(FileNotificationService fileNotificationService, ShareRepository shareRepository, FileRepository fileRepository)
public FileService(FileNotificationService fileNotificationService, ShareRepository shareRepository, FileRepository fileRepository, DataDirConfiguration dataDirConfiguration)
{
this.fileNotificationService = fileNotificationService;
this.shareRepository = shareRepository;
this.fileRepository = fileRepository;
bloomFilter = new HashBloomFilter(dataDirConfiguration.getDataDir(), 10_000, 0.01d); // XXX: parameters will need experimenting, especially the max files (yes it can be extended, but not reduced)
updateBloomFilter();
}

public void addShare(Share share)
Expand Down Expand Up @@ -149,6 +154,17 @@ public Map<Long, String> getFilesMapFromShares(List<Share> shares)
.collect(Collectors.joining(java.io.File.separator))));
}

public Optional<File> findFile(Sha1Sum hash)
{
Objects.requireNonNull(hash);

if (bloomFilter.mightContain(hash))
{
return fileRepository.findByHash(hash);
}
return Optional.empty();
}

private void saveFullPath(File file)
{
var tree = getFullPath(file);
Expand Down Expand Up @@ -177,7 +193,7 @@ void scanShare(Share share)
fileNotificationService.startScanning(share);
File directory = share.getFile();
var directoryPath = getFilePath(directory);
Files.walkFileTree(directoryPath, new TrackingFileVisitor(fileRepository, directory)
var visitor = new TrackingFileVisitor(fileRepository, directory)
{
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
Expand All @@ -196,6 +212,7 @@ public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
currentFile.setHash(hash);
currentFile.setModified(lastModified);
fileRepository.save(currentFile);
setChanged();
}
}
return FileVisitResult.CONTINUE;
Expand Down Expand Up @@ -242,9 +259,15 @@ public FileVisitResult visitFileFailed(Path file, IOException exc)
log.debug("Visiting file {} failed: {}", file, exc.getMessage());
return FileVisitResult.CONTINUE;
}
});
};
Files.walkFileTree(directoryPath, visitor);
directory.setModified(Files.getLastModifiedTime(directoryPath).toInstant());
fileRepository.save(directory);

if (visitor.foundChanges())
{
updateBloomFilter();
}
}
catch (IOException e)
{
Expand Down Expand Up @@ -354,4 +377,11 @@ Sha1Sum calculateFileHash(Path path)
fileNotificationService.stopScanningFile();
}
}

private void updateBloomFilter()
{
// XXX: extend the bloom filter if needed
bloomFilter.clear();
fileRepository.findAll().forEach(file -> bloomFilter.add(file.getHash()));
}
}
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023 by David Gerber - https://zapek.com
* Copyright (c) 2023-2024 by David Gerber - https://zapek.com
*
* This file is part of Xeres.
*
Expand All @@ -17,7 +17,7 @@
* along with Xeres. If not, see <http://www.gnu.org/licenses/>.
*/

package io.xeres.app.xrs.service.turtle;
package io.xeres.app.service.file;

import com.sangupta.bloomfilter.AbstractBloomFilter;
import com.sangupta.bloomfilter.core.BitArray;
Expand All @@ -39,13 +39,13 @@
* <p>
* The entries are persisted to disk.
*/
public class TurtleBloomFilter
public class HashBloomFilter
{
private static final String PERSISTENT_FILE = "turtle_bf";
private final AbstractBloomFilter<Sha1Sum> bFilter;
private BitArray bArray;

public TurtleBloomFilter(String baseDir, int expectedInsertions, double falsePositiveProbability)
public HashBloomFilter(String baseDir, int expectedInsertions, double falsePositiveProbability)
{
bFilter = new AbstractBloomFilter<>(expectedInsertions, falsePositiveProbability, (sha1Sum, byteSink) -> byteSink.putBytes(sha1Sum.getBytes()))
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ public class TrackingFileVisitor implements FileVisitor<Path>
private final FileRepository fileRepository;
private boolean skipFirst; // XXX: lame hack, find something better (this is because the first entered directory is already the root directory)
private final List<File> directories = new ArrayList<>();
private boolean foundChanges;

public TrackingFileVisitor(FileRepository fileRepository, File rootDirectory)
{
Expand Down Expand Up @@ -78,4 +79,14 @@ public File getCurrentDirectory()
{
return directories.getLast();
}

public boolean foundChanges()
{
return foundChanges;
}

void setChanged()
{
foundChanges = true;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@

package io.xeres.app.xrs.service.turtle;

import io.xeres.app.configuration.DataDirConfiguration;
import io.xeres.app.net.peer.PeerConnection;
import io.xeres.app.net.peer.PeerConnectionManager;
import io.xeres.app.service.file.FileService;
import io.xeres.app.xrs.item.Item;
import io.xeres.app.xrs.service.RsService;
import io.xeres.app.xrs.service.RsServiceRegistry;
Expand Down Expand Up @@ -56,15 +56,15 @@ public class TurtleRsService extends RsService

private final TunnelProbability tunnelProbability = new TunnelProbability();

private final TurtleBloomFilter bloomFilter;

private final PeerConnectionManager peerConnectionManager;

protected TurtleRsService(RsServiceRegistry rsServiceRegistry, PeerConnectionManager peerConnectionManager, DataDirConfiguration dataDirConfiguration)
private final FileService fileService;

protected TurtleRsService(RsServiceRegistry rsServiceRegistry, PeerConnectionManager peerConnectionManager, FileService fileService)
{
super(rsServiceRegistry);
this.peerConnectionManager = peerConnectionManager;
bloomFilter = new TurtleBloomFilter(dataDirConfiguration.getDataDir(), 10_000, 0.01d); // XXX: parameters will need experimenting, especially the max files
this.fileService = fileService;
}

@Override
Expand Down Expand Up @@ -118,10 +118,11 @@ private void handleTunnelRequest(PeerConnection sender, TurtleTunnelRequestItem
}

// XXX: if the request is not from us, perform a local search and send result back if found (otherwise forward)
if (bloomFilter.mightContain(item.getFileHash()))
var file = fileService.findFile(item.getFileHash());
if (file.isPresent())
{
log.debug("filehash is in the bloom filter");
// XXX: remember that it might be a false positive from the bloom filter
// XXX: return the file back!
return;
}

if (tunnelProbability.isForwardable(item)) // XXX: this is different there! needs the number of peers and speed...
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package io.xeres.app.xrs.service.turtle;

import io.xeres.app.service.file.HashBloomFilter;
import io.xeres.testutils.Sha1SumFakes;
import org.junit.jupiter.api.Test;

Expand All @@ -27,12 +28,12 @@
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertTrue;

class TurtleBloomFilterTest
class HashBloomFilterTest
{
@Test
void TurtleBloomFilter_OK()
{
var filter = new TurtleBloomFilter(null, 10_000, 0.01d);
var filter = new HashBloomFilter(null, 10_000, 0.01d);

var s1 = Sha1SumFakes.createSha1Sum();
var s2 = Sha1SumFakes.createSha1Sum();
Expand All @@ -52,7 +53,7 @@ void TurtleBloomFilter_OK()
@Test
void TurtleBloomFilter_Multiple_OK()
{
var filter = new TurtleBloomFilter(null, 10_000, 0.01d);
var filter = new HashBloomFilter(null, 10_000, 0.01d);

var s1 = Sha1SumFakes.createSha1Sum();
var s2 = Sha1SumFakes.createSha1Sum();
Expand Down
13 changes: 13 additions & 0 deletions docs/database.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Using H2

Xeres uses the H2 database. The following is a list of guidelines:

## Data types

### Instant

`TIMESTAMP(9)`

By default, the precision is 6, which is only up to milliseconds instead of nanoseconds. Truncation will occur and make comparison with a fresh instant fail.
So make sure to always specify 9 as precision.

0 comments on commit cbf29b9

Please sign in to comment.