diff --git a/src/Mime-Detective/Analyzers/ArrayBasedTrie.cs b/src/Mime-Detective/Analyzers/ArrayBasedTrie.cs index d12f362..6e6722b 100644 --- a/src/Mime-Detective/Analyzers/ArrayBasedTrie.cs +++ b/src/Mime-Detective/Analyzers/ArrayBasedTrie.cs @@ -1,22 +1,22 @@ using System; using System.Collections.Generic; -using System.Text; -using System.Linq; namespace MimeDetective.Analyzers { public sealed class ArrayBasedTrie : IFileAnalyzer { - public const int NullStandInValue = 256; - public const int MaxNodeSize = 257; + private const int NullStandInValue = 256; + private const int MaxNodeSize = 257; - private List Nodes = new List(10); + private OffsetNode[] OffsetNodes = new OffsetNode[10]; + private int offsetNodesLength = 1; /// /// Constructs an empty ArrayBasedTrie, to add definitions /// public ArrayBasedTrie() { + OffsetNodes[0] = new OffsetNode(0); } /// @@ -28,67 +28,55 @@ public ArrayBasedTrie(IEnumerable types) if (types is null) throw new ArgumentNullException(nameof(types)); + OffsetNodes[0] = new OffsetNode(0); + foreach (var type in types) { if ((object)type != null) Insert(type); } - - Nodes = Nodes.OrderBy(x => x.Offset).ToList(); } + //TODO need tests for highestmatching count behavior public FileType Search(in ReadResult readResult) { FileType match = null; + int highestMatchingCount = 0; //iterate through offset nodes - for (int offsetNodeIndex = 0; offsetNodeIndex < Nodes.Count; offsetNodeIndex++) + for (int offsetNodeIndex = 0; offsetNodeIndex < offsetNodesLength; offsetNodeIndex++) { - //get offset node - var offsetNode = Nodes[offsetNodeIndex]; - + OffsetNode offsetNode = OffsetNodes[offsetNodeIndex]; int i = offsetNode.Offset; - byte value = readResult.Array[i]; - - var node = offsetNode.Children[value]; - - if (node is null) - { - node = offsetNode.Children[NullStandInValue]; - - if (node is null) - break; - } - - if ((object)node.Record != null) - match = node.Record; + Node[] prevNode = offsetNode.Children; - i++; - - //iterate through the current trie - for (; i < readResult.ReadLength; i++) + while (i < readResult.ReadLength) { - value = readResult.Array[i]; - - var prevNode = node; - node = node.Children[value]; + int currentVal = readResult.Array[i]; + Node node = prevNode[currentVal]; - if (node is null) + if (node.Children == null) { - node = prevNode.Children[NullStandInValue]; + node = prevNode[NullStandInValue]; - if (node is null) + if (node.Children is null) break; } - if ((object)node.Record != null) + //increment here + i++; + + //collect the record + if (i > highestMatchingCount && (object)node.Record != null) + { match = node.Record; - } + highestMatchingCount = i; + } - if ((object)match != null) - break; + prevNode = node.Children; + } } - + return match; } @@ -97,90 +85,83 @@ public void Insert(FileType type) if (type is null) throw new ArgumentNullException(nameof(type)); - OffsetNode match = null; + ref OffsetNode match = ref OffsetNodes[0]; + bool matchFound = false; - foreach (var offsetNode in Nodes) + for (int offsetNodeIndex = 0; offsetNodeIndex < offsetNodesLength; offsetNodeIndex++) { - if (offsetNode.Offset == type.HeaderOffset) + ref var currentNode = ref OffsetNodes[offsetNodeIndex]; + + if (currentNode.Offset == type.HeaderOffset) { - match = offsetNode; + match = ref currentNode; + matchFound = true; break; } } - if (match is null) + if (!matchFound) { - match = new OffsetNode(type.HeaderOffset); - Nodes.Add(match); - } - - match.Insert(type); - } - - private sealed class OffsetNode - { - public readonly ushort Offset; - public readonly Node[] Children; + int newNodePos = offsetNodesLength; - public OffsetNode(ushort offset) - { - if (offset > (MimeTypes.MaxHeaderSize - 1)) - throw new ArgumentException("Offset cannot be greater than MaxHeaderSize - 1"); + if (newNodePos >= OffsetNodes.Length) + { + int newOffsetNodeCount = OffsetNodes.Length * 2 + 1; + var newOffsetNodes = new OffsetNode[newOffsetNodeCount]; + Array.Copy(OffsetNodes, newOffsetNodes, offsetNodesLength); + OffsetNodes = newOffsetNodes; + } - Offset = offset; - Children = new Node[MaxNodeSize]; + match = ref OffsetNodes[newNodePos]; + match = new OffsetNode(type.HeaderOffset); + offsetNodesLength++; } - public void Insert(FileType type) + Node[] prevNode = match.Children; + + for (int i = 0; i < type.Header.Length; i++) { - int i = 0; byte? value = type.Header[i]; int arrayPos = value ?? NullStandInValue; + ref Node node = ref prevNode[arrayPos]; - var node = Children[arrayPos]; - - if (node is null) + if (node.Children is null) { - node = new Node(value); - Children[arrayPos] = node; - } + FileType record = null; - i++; + if (i == type.Header.Length - 1) + record = type; - for (; i < type.Header.Length; i++) - { - value = type.Header[i]; - arrayPos = value ?? NullStandInValue; - var prevNode = node; - node = node.Children[arrayPos]; - - if (node is null) - { - var newNode = new Node(value); + node = new Node(record); + } - if (i == type.Header.Length - 1) - newNode.Record = type; + prevNode = node.Children; + } + } + + private readonly struct OffsetNode + { + public readonly ushort Offset; + public readonly Node[] Children; - node = prevNode.Children[arrayPos] = newNode; - } - } + public OffsetNode(ushort offset) + { + Offset = offset; + Children = new Node[MaxNodeSize]; } } - private sealed class Node + private struct Node { - public readonly Node[] Children; + public Node[] Children; //if complete node then this not null public FileType Record; - public readonly byte? Value; - - public Node(byte? value) + public Node(FileType record) { - Value = value; Children = new Node[MaxNodeSize]; - Record = null; + Record = record; } } } diff --git a/src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs b/src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs index a37909c..884d787 100644 --- a/src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs +++ b/src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs @@ -38,12 +38,14 @@ public FileType Search(in ReadResult readResult) { FileType match = null; var enumerator = Nodes.GetEnumerator(); + int highestMatchingCount = 0; - while (match is null && enumerator.MoveNext()) + while (enumerator.MoveNext()) { Node node = enumerator.Current.Value; + int i = node.Value; - for (int i = node.Value; i < readResult.ReadLength; i++) + while (i < readResult.ReadLength) { Node prevNode = node; @@ -51,12 +53,14 @@ public FileType Search(in ReadResult readResult) && !prevNode.Children.TryGetValue(NullStandInValue, out node)) break; - if ((object)node.Record != null) + i++; + + if (i > highestMatchingCount && (object)node.Record != null) + { match = node.Record; + highestMatchingCount = i; + } } - - if ((object)match != null) - break; } return match; @@ -73,50 +77,45 @@ public void Insert(FileType type) Nodes.Add(type.HeaderOffset, offsetNode); } - offsetNode.Insert(type); - } - - private sealed class Node - { - public readonly Dictionary Children = new Dictionary(); + int i = 0; + ushort value = type.Header[i] ?? NullStandInValue; - //if complete node then this not null - public FileType Record; - - public readonly ushort Value; - - public Node(ushort value) + if (!offsetNode.Children.TryGetValue(value, out Node node)) { - Value = value; + node = new Node(value); + offsetNode.Children.Add(value, node); } - public void Insert(FileType type) + i++; + + for (; i < type.Header.Length; i++) { - int i = 0; - ushort value = type.Header[i] ?? NullStandInValue; + value = type.Header[i] ?? NullStandInValue; - if (!Children.TryGetValue(value, out Node node)) + if (!node.Children.ContainsKey(value)) { - node = new Node(value); - Children.Add(value, node); + Node newNode = new Node(value); + node.Children.Add(value, newNode); } - i++; + node = node.Children[value]; + } - for (; i < type.Header.Length; i++) - { - value = type.Header[i] ?? NullStandInValue; + node.Record = type; + } - if (!node.Children.ContainsKey(value)) - { - Node newNode = new Node(value); - node.Children.Add(value, newNode); - } + private sealed class Node + { + public Dictionary Children = new Dictionary(); - node = node.Children[value]; - } + //if complete node then this not null + public FileType Record; + + public ushort Value; - node.Record = type; + public Node(ushort value) + { + Value = value; } } } diff --git a/src/Mime-Detective/Analyzers/Helpers.cs b/src/Mime-Detective/Analyzers/Helpers.cs new file mode 100644 index 0000000..66dda53 --- /dev/null +++ b/src/Mime-Detective/Analyzers/Helpers.cs @@ -0,0 +1,14 @@ +using System; +using System.Collections.Generic; +using System.Text; + +namespace MimeDetective.Analyzers +{ + internal static class ThrowHelpers + { + public static void GreaterThanMaxHeaderSize() + { + throw new ArgumentException("Offset cannot be greater than MaxHeaderSize - 1"); + } + } +} diff --git a/src/Mime-Detective/Analyzers/HybridTrie.cs b/src/Mime-Detective/Analyzers/HybridTrie.cs new file mode 100644 index 0000000..c9d1b0e --- /dev/null +++ b/src/Mime-Detective/Analyzers/HybridTrie.cs @@ -0,0 +1,292 @@ +using System; +using System.Collections.Generic; + +namespace MimeDetective.Analyzers +{ + public sealed class HybridTrie : IFileAnalyzer + { + private const int DefaultSize = 7; + private const ushort NullStandInValue = 256; + private const int MaxNodeSize = 257; + + private OffsetNode[] OffsetNodes = new OffsetNode[10]; + private int offsetNodesLength = 1; + + private readonly struct OffsetNode + { + public readonly ushort Offset; + public readonly Node[] Children; + + public OffsetNode(ushort offset) + { + Offset = offset; + Children = new Node[MaxNodeSize]; + } + } + + /// + /// Constructs an empty DictionaryBasedTrie + /// + public HybridTrie() + { + OffsetNodes[0] = new OffsetNode(0); + } + + /// + /// Constructs a DictionaryBasedTrie from an Enumerable of FileTypes + /// + /// + public HybridTrie(IEnumerable types) + { + if (types is null) + throw new ArgumentNullException(nameof(types)); + + OffsetNodes[0] = new OffsetNode(0); + + foreach (var type in types) + { + Insert(type); + } + } + + public FileType Search(in ReadResult readResult) + { + FileType match = null; + int highestMatchingCount = 0; + + //iterate through offset nodes + for (int offsetNodeIndex = 0; offsetNodeIndex < offsetNodesLength; offsetNodeIndex++) + { + //get offset node + var offsetNode = OffsetNodes[offsetNodeIndex]; + int i = offsetNode.Offset; + + if (!(i < readResult.ReadLength)) + continue; + + Node node = offsetNode.Children[readResult.Array[i]]; + + if (node == null) + { + node = offsetNode.Children[NullStandInValue]; + + if (node is null) + continue; + } + + i++; + + if (i > highestMatchingCount && (object)node.Record != null) + { + match = node.Record; + highestMatchingCount = i; + } + + while (i < readResult.ReadLength) + { + Node prevNode = node; + + if (!prevNode.TryGetValue(readResult.Array[i], out node) + && !prevNode.TryGetValue(NullStandInValue, out node)) + break; + + i++; + + if (i > highestMatchingCount && (object)node.Record != null) + { + match = node.Record; + highestMatchingCount = i; + } + } + } + + return match; + } + + public void Insert(FileType type) + { + if (type is null) + throw new ArgumentNullException(nameof(type)); + + ref OffsetNode match = ref OffsetNodes[0]; + bool matchFound = false; + + for (int offsetNodeIndex = 0; offsetNodeIndex < offsetNodesLength; offsetNodeIndex++) + { + ref var currentNode = ref OffsetNodes[offsetNodeIndex]; + + if (currentNode.Offset == type.HeaderOffset) + { + match = ref currentNode; + matchFound = true; + break; + } + } + + //handle expanding collection + if (!matchFound) + { + int newNodePos = offsetNodesLength; + + if (newNodePos >= OffsetNodes.Length) + { + int newOffsetNodeCount = OffsetNodes.Length * 2 + 1; + var newOffsetNodes = new OffsetNode[newOffsetNodeCount]; + Array.Copy(OffsetNodes, newOffsetNodes, offsetNodesLength); + OffsetNodes = newOffsetNodes; + } + + match = ref OffsetNodes[newNodePos]; + match = new OffsetNode(type.HeaderOffset); + offsetNodesLength++; + } + + int i = 0; + byte? value = type.Header[i]; + int arrayPos = value ?? NullStandInValue; + + var node = match.Children[arrayPos]; + + if (node is null) + { + node = new Node((ushort)arrayPos); + match.Children[arrayPos] = node; + } + + i++; + + for (; i < type.Header.Length; i++) + { + value = type.Header[i]; + arrayPos = value ?? NullStandInValue; + var prevNode = node; + + if (!node.TryGetValue((ushort)arrayPos, out node)) + { + node = new Node((ushort)arrayPos); + + //if (i == type.Header.Length - 1) + // node.Record = type; + + prevNode.Add((ushort)arrayPos, node); + } + } + + node.Record = type; + } + + private sealed class Node + { + //if complete node then this not null + public FileType Record; + + public ushort Value; + + private sealed class Entry + { + //public ushort _key; + public Node _value; + public Entry _next; + } + + private Entry[] _buckets; + private int _numEntries; + + public Node(ushort value) + { + Value = value; + Clear(DefaultSize); + } + + public bool TryGetValue(ushort key, out Node value) + { + Entry entry = Find(key); + + if (entry != null) + { + value = entry._value; + return true; + } + + value = null; + return false; + } + + public void Add(ushort key, Node value) + { + Entry entry = Find(key); + + if (entry != null) + throw new ArgumentException("entry already added"); + + UncheckedAdd(key, value); + } + + public void Clear(int capacity = DefaultSize) + { + _buckets = new Entry[capacity]; + _numEntries = 0; + } + + private Entry Find(ushort key) + { + int bucket = GetBucket(key); + Entry entry = _buckets[bucket]; + while (entry != null) + { + if (key == entry._value.Value) + return entry; + + entry = entry._next; + } + return null; + } + + private Entry UncheckedAdd(ushort key, Node value) + { + Entry entry = new Entry + { + _value = value + }; + + int bucket = GetBucket(key); + entry._next = _buckets[bucket]; + _buckets[bucket] = entry; + + _numEntries++; + if (_numEntries > (_buckets.Length * 2)) + ExpandBuckets(); + + return entry; + } + + private void ExpandBuckets() + { + int newNumBuckets = _buckets.Length * 2 + 1; + Entry[] newBuckets = new Entry[newNumBuckets]; + for (int i = 0; i < _buckets.Length; i++) + { + Entry entry = _buckets[i]; + while (entry != null) + { + Entry nextEntry = entry._next; + + int bucket = GetBucket(entry._value.Value, newNumBuckets); + entry._next = newBuckets[bucket]; + newBuckets[bucket] = entry; + + entry = nextEntry; + } + } + _buckets = newBuckets; + } + + private int GetBucket(ushort key, int numBuckets = 0) + { + int h = key; + h &= 0x7fffffff; + return (h % (numBuckets == 0 ? _buckets.Length : numBuckets)); + } + } + } +} \ No newline at end of file diff --git a/src/Mime-Detective/Analyzers/LinearCountingAnalyzer.cs b/src/Mime-Detective/Analyzers/LinearCountingAnalyzer.cs index 7bba089..441d39e 100644 --- a/src/Mime-Detective/Analyzers/LinearCountingAnalyzer.cs +++ b/src/Mime-Detective/Analyzers/LinearCountingAnalyzer.cs @@ -57,18 +57,19 @@ public FileType Search(in ReadResult readResult) uint matchingCount = 0; int iOffset = type.HeaderOffset; - int readLength = iOffset + type.Header.Length; + int readEnd = iOffset + type.Header.Length; - if (readLength > readResult.ReadLength) + if (readEnd > readResult.ReadLength) continue; - for (int i = 0; iOffset < readLength; i++, iOffset++) + for (int i = 0; iOffset < readEnd; i++, iOffset++) { if (type.Header[i] is null || type.Header[i].Value == readResult.Array[iOffset]) matchingCount++; } - if (type.Header.Length == matchingCount && matchingCount > highestMatchingCount) + //TODO should this be default behavior + if (type.Header.Length == matchingCount && matchingCount >= highestMatchingCount) { highestMatchingType = type; highestMatchingCount = matchingCount; diff --git a/src/Mime-Detective/FileType.cs b/src/Mime-Detective/FileType.cs index bd2cdee..8c6ad80 100644 --- a/src/Mime-Detective/FileType.cs +++ b/src/Mime-Detective/FileType.cs @@ -32,7 +32,7 @@ public FileType(byte?[] header, string extension, string mime, ushort offset = 0 Header = header ?? throw new ArgumentNullException(nameof(header), $"cannot be null, {nameof(FileType)} needs file header data"); if (offset > (MimeTypes.MaxHeaderSize - 1)) - throw new ArgumentException("Header Offset cannot exceed Max Header Size - 1"); + throw new ArgumentException($"Header Offset cannot exceed Max Header Size {MimeTypes.MaxHeaderSize} - 1"); HeaderOffset = offset; Extension = extension; diff --git a/src/Mime-Detective/Mime-Detective.csproj b/src/Mime-Detective/Mime-Detective.csproj index 4b4f737..93369c9 100644 --- a/src/Mime-Detective/Mime-Detective.csproj +++ b/src/Mime-Detective/Mime-Detective.csproj @@ -18,8 +18,8 @@ 0.0.6.0 0.0.6.0 - See beta2 PR - 0.0.6.0-beta2 + See beta3 PR + 0.0.6.0-beta3 true diff --git a/test/Mime-Detective.Benchmarks/Data/Images/Thumbs.db b/test/Mime-Detective.Benchmarks/Data/Images/Thumbs.db new file mode 100644 index 0000000..d41bb0c Binary files /dev/null and b/test/Mime-Detective.Benchmarks/Data/Images/Thumbs.db differ diff --git a/test/Mime-Detective.Benchmarks/Program.cs b/test/Mime-Detective.Benchmarks/Program.cs index a32aa79..da79e7d 100644 --- a/test/Mime-Detective.Benchmarks/Program.cs +++ b/test/Mime-Detective.Benchmarks/Program.cs @@ -20,11 +20,12 @@ public class MyConfig : ManualConfig { public MyConfig() { + /* Add(Job.Default.With(Runtime.Clr) .With(CsProjClassicNetToolchain.Net47) .With(Jit.RyuJit) .With(Platform.X64) - .WithId("Net47")); + .WithId("Net47"));*/ Add(Job.Default.With(Runtime.Core) .With(CsProjCoreToolchain.NetCoreApp11) @@ -62,6 +63,7 @@ public class TypeLookup const int OpsPerInvoke = 6; static readonly LinearCountingAnalyzer linear = new LinearCountingAnalyzer(MimeTypes.Types); static readonly DictionaryBasedTrie trie2 = new DictionaryBasedTrie(MimeTypes.Types); + static readonly HybridTrie trie3 = new HybridTrie(MimeTypes.Types); static readonly ArrayBasedTrie trie5 = new ArrayBasedTrie(MimeTypes.Types); static byte[] ReadFile(FileInfo info) @@ -74,8 +76,32 @@ static byte[] ReadFile(FileInfo info) return bytes; } - [Benchmark(OperationsPerInvoke = OpsPerInvoke, Baseline = true)] - public FileType LinearCountingAnalyzer() + //[Benchmark] + public LinearCountingAnalyzer LinearCountingAnalyzerInsertAll() + { + return new LinearCountingAnalyzer(MimeTypes.Types); + } + + //[Benchmark] + public DictionaryBasedTrie DictTrieInsertAll() + { + return new DictionaryBasedTrie(MimeTypes.Types); + } + + //[Benchmark] + public ArrayBasedTrie ArrayTrieInsertAll() + { + return new ArrayBasedTrie(MimeTypes.Types); + } + + //[Benchmark] + public HybridTrie HybridTrieInsertAll() + { + return new HybridTrie(MimeTypes.Types); + } + + [Benchmark(OperationsPerInvoke = OpsPerInvoke)] + public FileType LinearCountingAnalyzerSearch() { FileType result = null; foreach (var array in files) @@ -89,7 +115,7 @@ public FileType LinearCountingAnalyzer() } [Benchmark(OperationsPerInvoke = OpsPerInvoke)] - public FileType DictionaryBasedTrie() + public FileType DictionaryTrieSearch() { FileType result = null; foreach (var array in files) @@ -102,8 +128,24 @@ public FileType DictionaryBasedTrie() return result; } + + [Benchmark(OperationsPerInvoke = OpsPerInvoke)] + public FileType HybridTrieSearch() + { + FileType result = null; + foreach (var array in files) + { + using (ReadResult readResult = new ReadResult(array, MimeTypes.MaxHeaderSize)) + { + result = trie3.Search(in readResult); + } + } + return result; + } + + [Benchmark(OperationsPerInvoke = OpsPerInvoke)] - public FileType ArrayBasedTrie() + public FileType ArrayTrieSearch() { FileType result = null; foreach (var array in files) diff --git a/test/Mime-Detective.Tests/Data/Images/Thumbs.db b/test/Mime-Detective.Tests/Data/Images/Thumbs.db new file mode 100644 index 0000000..d41bb0c Binary files /dev/null and b/test/Mime-Detective.Tests/Data/Images/Thumbs.db differ diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/ArrayBasedTrieTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/ArrayBasedTrieTests.cs index bdd4886..6013cfb 100644 --- a/test/Mime-Detective.Tests/Tests/Analyzers/ArrayBasedTrieTests.cs +++ b/test/Mime-Detective.Tests/Tests/Analyzers/ArrayBasedTrieTests.cs @@ -56,7 +56,16 @@ public void Insert() [InlineData("./Data/Zip/images.zip", "zip")] [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] [InlineData("./Data/images/test.gif", "gif")] + [InlineData("./Data/images/test.jpg", "jpg")] + [InlineData("./Data/images/test.ico", "ico")] + [InlineData("./Data/images/test.png", "png")] + [InlineData("./Data/images/test.bmp", "bmp")] [InlineData("./Data/Audio/wavVLC.wav", "wav")] + [InlineData("./Data/Audio/flacVLC.flac", "flac")] + [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] public async Task Search(string path, string ext) { var analyzer = new ArrayBasedTrie(MimeTypes.Types); @@ -71,5 +80,90 @@ public async Task Search(string path, string ext) Assert.NotNull(type); Assert.Contains(ext, type.Extension); } + + [Fact] + public void InsertZeroOffsetFirstWildCard() + { + var analyzer = new ArrayBasedTrie(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1], 1); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(0, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCard() + { + var analyzer = new ArrayBasedTrie(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[560], 560); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCardFull() + { + var analyzer = new ArrayBasedTrie(); + FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1120], 1120); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void IncrementalInsertSearchBoundries() + { + var analyzer = new ArrayBasedTrie(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + + var bytes1 = new byte[i+1]; + ReadResult readResult = new ReadResult(bytes1, bytes1.Length); + FileType type = analyzer.Search(in readResult); + + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(i, type.HeaderOffset); + } + } + + [Fact] + public void InsertSearchBoundries() + { + var analyzer = new ArrayBasedTrie(); + List fileTypes = new List(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + fileTypes.Add(fileType); + } + + for (int i = 0; i < 560; i++) + { + var bytes = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes, bytes.Length); + FileType type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileTypes[i], type); + Assert.Equal(i, type.HeaderOffset); + } + } } } diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/DictionaryBasedTrieTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/DictionaryBasedTrieTests.cs index b3f90f3..56a5fee 100644 --- a/test/Mime-Detective.Tests/Tests/Analyzers/DictionaryBasedTrieTests.cs +++ b/test/Mime-Detective.Tests/Tests/Analyzers/DictionaryBasedTrieTests.cs @@ -28,7 +28,6 @@ public void EnumerableConstructor() //assertion here just to have Assert.NotNull(analyzer); - Assert.Throws(() => new DictionaryBasedTrie(null)); analyzer.Insert(MimeTypes.WORD); @@ -57,7 +56,16 @@ public void Insert() [InlineData("./Data/Zip/images.zip", "zip")] [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] [InlineData("./Data/images/test.gif", "gif")] + [InlineData("./Data/images/test.jpg", "jpg")] + [InlineData("./Data/images/test.ico", "ico")] + [InlineData("./Data/images/test.png", "png")] + [InlineData("./Data/images/test.bmp", "bmp")] [InlineData("./Data/Audio/wavVLC.wav", "wav")] + [InlineData("./Data/Audio/flacVLC.flac", "flac")] + [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] public async Task Search(string path, string ext) { var analyzer = new DictionaryBasedTrie(MimeTypes.Types); @@ -72,5 +80,90 @@ public async Task Search(string path, string ext) Assert.NotNull(type); Assert.Contains(ext, type.Extension); } + + [Fact] + public void InsertZeroOffsetFirstWildCard() + { + var analyzer = new DictionaryBasedTrie(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1], 1); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(0, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCard() + { + var analyzer = new DictionaryBasedTrie(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[560], 560); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCardFull() + { + var analyzer = new DictionaryBasedTrie(); + FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1120], 1120); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void IncrementalInsertSearchBoundries() + { + var analyzer = new DictionaryBasedTrie(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + + var bytes1 = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes1, bytes1.Length); + FileType type = analyzer.Search(in readResult); + + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(i, type.HeaderOffset); + } + } + + [Fact] + public void InsertSearchBoundries() + { + var analyzer = new DictionaryBasedTrie(); + List fileTypes = new List(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + fileTypes.Add(fileType); + } + + for (int i = 0; i < 560; i++) + { + var bytes = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes, bytes.Length); + FileType type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileTypes[i], type); + Assert.Equal(i, type.HeaderOffset); + } + } } } diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/HybridTrieTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/HybridTrieTests.cs new file mode 100644 index 0000000..d16aa67 --- /dev/null +++ b/test/Mime-Detective.Tests/Tests/Analyzers/HybridTrieTests.cs @@ -0,0 +1,169 @@ +using MimeDetective.Analyzers; +using System; +using System.Collections.Generic; +using System.IO; +using System.Text; +using System.Threading.Tasks; +using Xunit; + +namespace MimeDetective.Tests.Analyzers +{ + public class HybridTrieTests + { + [Fact] + public void DefaultConstructor() + { + var analyzer = new HybridTrie(); + + //assertion here just to have + Assert.NotNull(analyzer); + + analyzer.Insert(MimeTypes.ZIP); + } + + [Fact] + public void EnumerableConstructor() + { + var analyzer = new HybridTrie(MimeTypes.Types); + + //assertion here just to have + Assert.NotNull(analyzer); + Assert.Throws(() => new HybridTrie(null)); + + analyzer.Insert(MimeTypes.WORD); + } + + [Fact] + public void Insert() + { + var analyzer = new HybridTrie(); + Assert.Throws(() => analyzer.Insert(null)); + + foreach (var fileType in MimeTypes.Types) + { + analyzer.Insert(fileType); + } + + analyzer.Insert(MimeTypes.WORD); + } + + [Theory] + [InlineData("./Data/Documents/XlsExcel2016.xls", "xls")] + [InlineData("./Data/Documents/PptPowerpoint2016.ppt", "ppt")] + [InlineData("./Data/Documents/DocWord2016.doc", "doc")] + [InlineData("./Data/Documents/PdfWord2016.pdf", "pdf")] + [InlineData("./Data/Zip/empty.zip", "zip")] + [InlineData("./Data/Zip/images.zip", "zip")] + [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] + [InlineData("./Data/images/test.gif", "gif")] + [InlineData("./Data/images/test.jpg", "jpg")] + [InlineData("./Data/images/test.ico", "ico")] + [InlineData("./Data/images/test.png", "png")] + [InlineData("./Data/images/test.bmp", "bmp")] + [InlineData("./Data/Audio/wavVLC.wav", "wav")] + [InlineData("./Data/Audio/flacVLC.flac", "flac")] + [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] + public async Task Search(string path, string ext) + { + var analyzer = new HybridTrie(MimeTypes.Types); + FileInfo file = new FileInfo(path); + FileType type = null; + + using (ReadResult result = await ReadResult.ReadFileHeaderAsync(file)) + { + type = analyzer.Search(in result); + } + + Assert.NotNull(type); + Assert.Contains(ext, type.Extension); + } + + [Fact] + public void InsertZeroOffsetFirstWildCard() + { + var analyzer = new HybridTrie(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1], 1); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(0, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCard() + { + var analyzer = new HybridTrie(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[560], 560); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCardFull() + { + var analyzer = new HybridTrie(); + FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1120], 1120); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void IncrementalInsertSearchBoundries() + { + var analyzer = new HybridTrie(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + + var bytes1 = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes1, bytes1.Length); + FileType type = analyzer.Search(in readResult); + + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(i, type.HeaderOffset); + } + } + + [Fact] + public void InsertSearchBoundries() + { + var analyzer = new HybridTrie(); + List fileTypes = new List(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + fileTypes.Add(fileType); + } + + for (int i = 0; i < 560; i++) + { + var bytes = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes, bytes.Length); + FileType type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileTypes[i], type); + Assert.Equal(i, type.HeaderOffset); + } + } + } +} diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/LinearCountingAnalyzerTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/LinearCountingAnalyzerTests.cs index f742ca5..8a2d868 100644 --- a/test/Mime-Detective.Tests/Tests/Analyzers/LinearCountingAnalyzerTests.cs +++ b/test/Mime-Detective.Tests/Tests/Analyzers/LinearCountingAnalyzerTests.cs @@ -13,7 +13,7 @@ public class LinearCountingAnalyzerTests [Fact] public void DefaultConstructor() { - LinearCountingAnalyzer analyzer = new LinearCountingAnalyzer(); + var analyzer = new LinearCountingAnalyzer(); //assertion here just to have Assert.NotNull(analyzer); @@ -24,7 +24,7 @@ public void DefaultConstructor() [Fact] public void EnumerableConstructor() { - LinearCountingAnalyzer analyzer = new LinearCountingAnalyzer(MimeTypes.Types); + var analyzer = new LinearCountingAnalyzer(MimeTypes.Types); //assertion here just to have Assert.NotNull(analyzer); @@ -36,7 +36,7 @@ public void EnumerableConstructor() [Fact] public void Insert() { - LinearCountingAnalyzer analyzer = new LinearCountingAnalyzer(); + var analyzer = new LinearCountingAnalyzer(); Assert.Throws(() => analyzer.Insert(null)); foreach (var fileType in MimeTypes.Types) @@ -56,10 +56,19 @@ public void Insert() [InlineData("./Data/Zip/images.zip", "zip")] [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] [InlineData("./Data/images/test.gif", "gif")] + [InlineData("./Data/images/test.jpg", "jpg")] + [InlineData("./Data/images/test.ico", "ico")] + [InlineData("./Data/images/test.png", "png")] + [InlineData("./Data/images/test.bmp", "bmp")] [InlineData("./Data/Audio/wavVLC.wav", "wav")] + [InlineData("./Data/Audio/flacVLC.flac", "flac")] + [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] public async Task Search(string path, string ext) { - LinearCountingAnalyzer analyzer = new LinearCountingAnalyzer(MimeTypes.Types); + var analyzer = new LinearCountingAnalyzer(MimeTypes.Types); FileInfo file = new FileInfo(path); FileType type = null; @@ -71,5 +80,90 @@ public async Task Search(string path, string ext) Assert.NotNull(type); Assert.Contains(ext, type.Extension); } + + [Fact] + public void InsertZeroOffsetFirstWildCard() + { + var analyzer = new LinearCountingAnalyzer(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1], 1); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(0, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCard() + { + var analyzer = new LinearCountingAnalyzer(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[560], 560); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void InsertLastOffsetWildCardFull() + { + var analyzer = new LinearCountingAnalyzer(); + FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1120], 1120); + var type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(559, type.HeaderOffset); + } + + [Fact] + public void IncrementalInsertSearchBoundries() + { + var analyzer = new LinearCountingAnalyzer(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + i, (ushort)i); + analyzer.Insert(fileType); + + var bytes1 = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes1, bytes1.Length); + FileType type = analyzer.Search(in readResult); + + Assert.NotNull(type); + Assert.Same(fileType, type); + Assert.Equal(i, type.HeaderOffset); + } + } + + [Fact] + public void InsertSearchBoundries() + { + var analyzer = new LinearCountingAnalyzer(); + List fileTypes = new List(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + i, (ushort)i); + analyzer.Insert(fileType); + fileTypes.Add(fileType); + } + + for (int i = 0; i < 560; i++) + { + var bytes = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes, bytes.Length); + FileType type = analyzer.Search(in readResult); + Assert.NotNull(type); + Assert.Same(fileTypes[i], type); + Assert.Equal(i, type.HeaderOffset); + } + } } }