diff --git a/src/Mime-Detective/Analyzers/ArrayBasedTrie.cs b/src/Mime-Detective/Analyzers/ArrayTrie.cs similarity index 78% rename from src/Mime-Detective/Analyzers/ArrayBasedTrie.cs rename to src/Mime-Detective/Analyzers/ArrayTrie.cs index 6e6722b..bca7c5e 100644 --- a/src/Mime-Detective/Analyzers/ArrayBasedTrie.cs +++ b/src/Mime-Detective/Analyzers/ArrayTrie.cs @@ -3,7 +3,7 @@ namespace MimeDetective.Analyzers { - public sealed class ArrayBasedTrie : IFileAnalyzer + public sealed class ArrayTrie : IFileAnalyzer { private const int NullStandInValue = 256; private const int MaxNodeSize = 257; @@ -14,7 +14,7 @@ public sealed class ArrayBasedTrie : IFileAnalyzer /// /// Constructs an empty ArrayBasedTrie, to add definitions /// - public ArrayBasedTrie() + public ArrayTrie() { OffsetNodes[0] = new OffsetNode(0); } @@ -23,10 +23,10 @@ public ArrayBasedTrie() /// Constructs an ArrayBasedTrie from an Enumerable of FileTypes, to add more definitions /// /// - public ArrayBasedTrie(IEnumerable types) + public ArrayTrie(IEnumerable types) { if (types is null) - throw new ArgumentNullException(nameof(types)); + ThrowHelpers.FileTypeEnumerableIsNull(); OffsetNodes[0] = new OffsetNode(0); @@ -55,7 +55,7 @@ public FileType Search(in ReadResult readResult) int currentVal = readResult.Array[i]; Node node = prevNode[currentVal]; - if (node.Children == null) + if (node.Children is null) { node = prevNode[NullStandInValue]; @@ -83,7 +83,7 @@ public FileType Search(in ReadResult readResult) public void Insert(FileType type) { if (type is null) - throw new ArgumentNullException(nameof(type)); + ThrowHelpers.FileTypeArgumentIsNull(); ref OffsetNode match = ref OffsetNodes[0]; bool matchFound = false; @@ -102,17 +102,17 @@ public void Insert(FileType type) if (!matchFound) { - int newNodePos = offsetNodesLength; - - if (newNodePos >= OffsetNodes.Length) + if (offsetNodesLength >= OffsetNodes.Length) { - int newOffsetNodeCount = OffsetNodes.Length * 2 + 1; + //TODO put max size check + int newOffsetNodeCalc = OffsetNodes.Length * 2; + int newOffsetNodeCount = newOffsetNodeCalc > 560 ? 560 : newOffsetNodeCalc; var newOffsetNodes = new OffsetNode[newOffsetNodeCount]; Array.Copy(OffsetNodes, newOffsetNodes, offsetNodesLength); OffsetNodes = newOffsetNodes; } - match = ref OffsetNodes[newNodePos]; + match = ref OffsetNodes[offsetNodesLength]; match = new OffsetNode(type.HeaderOffset); offsetNodesLength++; } @@ -121,19 +121,15 @@ public void Insert(FileType type) for (int i = 0; i < type.Header.Length; i++) { - byte? value = type.Header[i]; - int arrayPos = value ?? NullStandInValue; + int arrayPos = type.Header[i] ?? NullStandInValue; ref Node node = ref prevNode[arrayPos]; - if (node.Children is null) - { - FileType record = null; + //TODO maybe short circuit it + if (i == type.Header.Length - 1) + node.Record = type; - if (i == type.Header.Length - 1) - record = type; - - node = new Node(record); - } + if (node.Children is null) + node.Children = new Node[MaxNodeSize]; prevNode = node.Children; } @@ -155,14 +151,7 @@ private struct Node { public Node[] Children; - //if complete node then this not null public FileType Record; - - public Node(FileType record) - { - Children = new Node[MaxNodeSize]; - Record = record; - } } } } \ No newline at end of file diff --git a/src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs b/src/Mime-Detective/Analyzers/DictionaryTrie.cs similarity index 90% rename from src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs rename to src/Mime-Detective/Analyzers/DictionaryTrie.cs index 884d787..0bd62b5 100644 --- a/src/Mime-Detective/Analyzers/DictionaryBasedTrie.cs +++ b/src/Mime-Detective/Analyzers/DictionaryTrie.cs @@ -4,7 +4,7 @@ namespace MimeDetective.Analyzers { - public sealed class DictionaryBasedTrie : IFileAnalyzer + public sealed class DictionaryTrie : IFileAnalyzer { private const ushort NullStandInValue = 256; @@ -14,7 +14,7 @@ public sealed class DictionaryBasedTrie : IFileAnalyzer /// /// Constructs an empty DictionaryBasedTrie /// - public DictionaryBasedTrie() + public DictionaryTrie() { } @@ -23,14 +23,15 @@ public DictionaryBasedTrie() /// Constructs a DictionaryBasedTrie from an Enumerable of FileTypes /// /// - public DictionaryBasedTrie(IEnumerable types) + public DictionaryTrie(IEnumerable types) { if (types is null) - throw new ArgumentNullException(nameof(types)); + ThrowHelpers.FileTypeEnumerableIsNull(); foreach (var type in types) { - Insert(type); + if ((object)type != null) + Insert(type); } } @@ -69,7 +70,7 @@ public FileType Search(in ReadResult readResult) public void Insert(FileType type) { if (type is null) - throw new ArgumentNullException(nameof(type)); + ThrowHelpers.FileTypeArgumentIsNull(); if (!Nodes.TryGetValue(type.HeaderOffset, out var offsetNode)) { diff --git a/src/Mime-Detective/Analyzers/Helpers.cs b/src/Mime-Detective/Analyzers/Helpers.cs index 66dda53..6ea3f49 100644 --- a/src/Mime-Detective/Analyzers/Helpers.cs +++ b/src/Mime-Detective/Analyzers/Helpers.cs @@ -10,5 +10,15 @@ public static void GreaterThanMaxHeaderSize() { throw new ArgumentException("Offset cannot be greater than MaxHeaderSize - 1"); } + + public static void FileTypeArgumentIsNull() + { + throw new ArgumentNullException("FileType argument cannot be null"); + } + + public static void FileTypeEnumerableIsNull() + { + throw new ArgumentNullException("FileType Enumerable cannot be null"); + } } } diff --git a/src/Mime-Detective/Analyzers/HybridTrie.cs b/src/Mime-Detective/Analyzers/HybridTrie.cs index c9d1b0e..7b18ddd 100644 --- a/src/Mime-Detective/Analyzers/HybridTrie.cs +++ b/src/Mime-Detective/Analyzers/HybridTrie.cs @@ -39,13 +39,14 @@ public HybridTrie() public HybridTrie(IEnumerable types) { if (types is null) - throw new ArgumentNullException(nameof(types)); + ThrowHelpers.FileTypeArgumentIsNull(); OffsetNodes[0] = new OffsetNode(0); foreach (var type in types) { - Insert(type); + if ((object)type != null) + Insert(type); } } @@ -106,7 +107,7 @@ public FileType Search(in ReadResult readResult) public void Insert(FileType type) { if (type is null) - throw new ArgumentNullException(nameof(type)); + ThrowHelpers.FileTypeArgumentIsNull(); ref OffsetNode match = ref OffsetNodes[0]; bool matchFound = false; @@ -126,17 +127,16 @@ public void Insert(FileType type) //handle expanding collection if (!matchFound) { - int newNodePos = offsetNodesLength; - - if (newNodePos >= OffsetNodes.Length) + if (offsetNodesLength >= OffsetNodes.Length) { - int newOffsetNodeCount = OffsetNodes.Length * 2 + 1; + int newOffsetNodeCalc = OffsetNodes.Length * 2; + int newOffsetNodeCount = newOffsetNodeCalc > 560 ? 560 : newOffsetNodeCalc; var newOffsetNodes = new OffsetNode[newOffsetNodeCount]; Array.Copy(OffsetNodes, newOffsetNodes, offsetNodesLength); OffsetNodes = newOffsetNodes; } - match = ref OffsetNodes[newNodePos]; + match = ref OffsetNodes[offsetNodesLength]; match = new OffsetNode(type.HeaderOffset); offsetNodesLength++; } @@ -175,6 +175,7 @@ public void Insert(FileType type) node.Record = type; } + //TODO make a base-1 dict private sealed class Node { //if complete node then this not null diff --git a/src/Mime-Detective/Analyzers/LinearCountingAnalyzer.cs b/src/Mime-Detective/Analyzers/LinearCounting.cs similarity index 57% rename from src/Mime-Detective/Analyzers/LinearCountingAnalyzer.cs rename to src/Mime-Detective/Analyzers/LinearCounting.cs index 441d39e..883a7d0 100644 --- a/src/Mime-Detective/Analyzers/LinearCountingAnalyzer.cs +++ b/src/Mime-Detective/Analyzers/LinearCounting.cs @@ -1,74 +1,80 @@ using System; using System.Collections.Generic; using System.Text; +using System.Linq; namespace MimeDetective.Analyzers { - public class LinearCountingAnalyzer : IFileAnalyzer + public class LinearCounting : IFileAnalyzer { - private readonly List types; + private FileType[] types = new FileType[20]; + private int typesLength = 0; /// /// Constructs an empty LinearCountingAnalyzer, use to add file types /// - public LinearCountingAnalyzer() + public LinearCounting() { - types = new List(); } /// /// Constructs a LinearCountingAnalyzer using the supplied IEnumerable /// /// - public LinearCountingAnalyzer(IEnumerable fileTypes) + public LinearCounting(IEnumerable fileTypes) { if (fileTypes is null) - throw new ArgumentNullException(nameof(fileTypes)); - - types = new List(); + ThrowHelpers.FileTypeEnumerableIsNull(); foreach (var fileType in fileTypes) { if ((object)fileType != null) Insert(fileType); } + + //types.OrderBy(x => x.HeaderOffset); + //todo sort + //Array.Sort(types, (x,y) => x.HeaderOffset.CompareTo(y.HeaderOffset)); + //types = types; } public void Insert(FileType fileType) { if (fileType is null) - throw new ArgumentNullException(nameof(fileType)); + ThrowHelpers.FileTypeArgumentIsNull(); + + if (typesLength >= types.Length) + { + int newTypesCount = types.Length * 2; + var newTypes = new FileType[newTypesCount]; + Array.Copy(types, newTypes, typesLength); + types = newTypes; + } - types.Add(fileType); + types[typesLength] = fileType; + typesLength++; } public FileType Search(in ReadResult readResult) { - if (readResult.ReadLength == 0) - return null; - uint highestMatchingCount = 0; FileType highestMatchingType = null; // compare the file header to the stored file headers - for (int typeIndex = 0; typeIndex < types.Count; typeIndex++) + for (int typeIndex = 0; typeIndex < typesLength; typeIndex++) { FileType type = types[typeIndex]; - uint matchingCount = 0; - int iOffset = type.HeaderOffset; - int readEnd = iOffset + type.Header.Length; - - if (readEnd > readResult.ReadLength) - continue; - for (int i = 0; iOffset < readEnd; i++, iOffset++) + for (int i = 0, iOffset = type.HeaderOffset; iOffset < readResult.ReadLength && i < type.Header.Length; i++, iOffset++) { if (type.Header[i] is null || type.Header[i].Value == readResult.Array[iOffset]) matchingCount++; + else + break; } - //TODO should this be default behavior + //TODO should this be default behavior? if (type.Header.Length == matchingCount && matchingCount >= highestMatchingCount) { highestMatchingType = type; diff --git a/src/Mime-Detective/Analyzers/LinearTrie.cs b/src/Mime-Detective/Analyzers/LinearTrie.cs new file mode 100644 index 0000000..5972290 --- /dev/null +++ b/src/Mime-Detective/Analyzers/LinearTrie.cs @@ -0,0 +1,183 @@ +using System; +using System.Collections.Generic; +using System.Runtime.InteropServices; + +namespace MimeDetective.Analyzers +{ + //this is somehow off by one in the insert... + //on the single insert search test + //record is inserted on the 3rd node, when it should be on teh second + public sealed class LinearTrie : IFileAnalyzer + { + private const int NullStandInValue = 256; + private const int MaxNodeSize = 257; + + private OffsetNode[] OffsetNodes = new OffsetNode[10]; + private int offsetNodesLength = 1; + + [StructLayout(LayoutKind.Auto)] + private struct OffsetNode + { + public ushort Offset; + public int TrieLength; + public Node[] Trie; + + public OffsetNode(ushort offset) + { + Offset = offset; + //this is the issue resizing this is dropping the reference in the insert and search algs + Trie = new Node[64]; + TrieLength = 1; + } + } + + //if we do an offset here we could cut this in half + [StructLayout(LayoutKind.Auto)] + private unsafe struct Node + { + //if complete node then this not null + public FileType Record; + + public fixed ushort Children[MaxNodeSize]; + } + + /// + /// Constructs an empty ArrayBasedTrie, to add definitions + /// + public LinearTrie() + { + OffsetNodes[0] = new OffsetNode(0); + } + + /// + /// Constructs an ArrayBasedTrie from an Enumerable of FileTypes, to add more definitions + /// + /// + public LinearTrie(IEnumerable types) + { + if (types is null) + ThrowHelpers.FileTypeEnumerableIsNull(); + + OffsetNodes[0] = new OffsetNode(0); + + foreach (var type in types) + { + if ((object)type != null) + Insert(type); + } + } + + //TODO need tests for highestmatching count behavior + public unsafe FileType Search(in ReadResult readResult) + { + FileType match = null; + int highestMatchingCount = 0; + + //iterate through offset nodes + for (int offsetNodeIndex = 0; offsetNodeIndex < offsetNodesLength; offsetNodeIndex++) + { + ref OffsetNode offsetNode = ref OffsetNodes[offsetNodeIndex]; + ref Node node = ref offsetNode.Trie[0]; + int i = offsetNode.Offset; + + //todo currently loops longer than it should + while (i < readResult.ReadLength) + { + int arrayPos = readResult.Array[i]; + int triePos = node.Children[arrayPos]; + + if (triePos <= 0) + { + triePos = node.Children[NullStandInValue]; + + if (triePos <= 0) + break; + } + + node = ref offsetNode.Trie[triePos]; + i++; + + //collect the record + if (i > highestMatchingCount && (object)node.Record != null) + { + match = node.Record; + highestMatchingCount = i; + } + } + } + + return match; + } + + public unsafe void Insert(FileType type) + { + if (type is null) + ThrowHelpers.FileTypeArgumentIsNull(); + + ref OffsetNode offsetNode = ref OffsetNodes[0]; + bool matchFound = false; + + for (int offsetNodeIndex = 0; offsetNodeIndex < offsetNodesLength; offsetNodeIndex++) + { + ref OffsetNode currentNode = ref OffsetNodes[offsetNodeIndex]; + + if (currentNode.Offset == type.HeaderOffset) + { + offsetNode = ref currentNode; + matchFound = true; + break; + } + } + + //handle adding new offsetNode and offsetNOde array resize + if (!matchFound) + { + if (offsetNodesLength >= OffsetNodes.Length) + { + //TODO put max size check + int newOffsetNodeCount = OffsetNodes.Length * 2; + var newOffsetNodes = new OffsetNode[newOffsetNodeCount]; + Array.Copy(OffsetNodes, newOffsetNodes, offsetNodesLength); + OffsetNodes = newOffsetNodes; + } + + offsetNode = ref OffsetNodes[offsetNodesLength]; + offsetNode = new OffsetNode(type.HeaderOffset); + offsetNodesLength++; + } + + //setup variables for walking the trie + int i = 0; + ref Node node = ref offsetNode.Trie[0]; + + while (i < type.Header.Length) + { + int arrayPos = type.Header[i] ?? NullStandInValue; + int triePos = node.Children[arrayPos]; + + //insert new node, handle possible resize + if (triePos <= 0) + { + triePos = offsetNode.TrieLength; + node.Children[arrayPos] = (ushort)triePos; + + if (offsetNode.TrieLength >= offsetNode.Trie.Length) + { + //TODO put max size check + int newTrieNodeCount = offsetNode.Trie.Length * 2; + var newTrieNodes = new Node[newTrieNodeCount]; + Array.Copy(offsetNode.Trie, newTrieNodes, offsetNode.TrieLength); + offsetNode.Trie = newTrieNodes; + } + + offsetNode.TrieLength++; + } + + node = ref offsetNode.Trie[triePos]; + i++; + } + + node.Record = type; + } + } +} \ No newline at end of file diff --git a/src/Mime-Detective/Analyzers/MSOfficeAnalyzer.cs b/src/Mime-Detective/Analyzers/MSOfficeAnalyzer.cs index 017b63d..269783f 100644 --- a/src/Mime-Detective/Analyzers/MSOfficeAnalyzer.cs +++ b/src/Mime-Detective/Analyzers/MSOfficeAnalyzer.cs @@ -11,11 +11,11 @@ public class MsOfficeAnalyzer : IReadOnlyFileAnalyzer public static FileType[] MsDocTypes { get; } = new FileType[] { MimeTypes.PPT, MimeTypes.WORD, MimeTypes.EXCEL }; - private readonly DictionaryBasedTrie dictTrie; + private readonly DictionaryTrie dictTrie; public MsOfficeAnalyzer() { - dictTrie = new DictionaryBasedTrie(MsDocTypes); + dictTrie = new DictionaryTrie(MsDocTypes); } public FileType Search(in ReadResult readResult) diff --git a/src/Mime-Detective/Analyzers/MimeAnalyzers.cs b/src/Mime-Detective/Analyzers/MimeAnalyzers.cs index a418664..19e669d 100644 --- a/src/Mime-Detective/Analyzers/MimeAnalyzers.cs +++ b/src/Mime-Detective/Analyzers/MimeAnalyzers.cs @@ -9,7 +9,7 @@ namespace MimeDetective /// public static class MimeAnalyzers { - private static IFileAnalyzer primaryAnalyzer = new DictionaryBasedTrie(MimeTypes.Types); + private static IFileAnalyzer primaryAnalyzer = new DictionaryTrie(MimeTypes.Types); /// /// diff --git a/src/Mime-Detective/Extensions/ByteArrayExtensions.cs b/src/Mime-Detective/Extensions/ByteArrayExtensions.cs index 95666aa..4dfa609 100644 --- a/src/Mime-Detective/Extensions/ByteArrayExtensions.cs +++ b/src/Mime-Detective/Extensions/ByteArrayExtensions.cs @@ -14,7 +14,8 @@ public static class ByteArrayExtensions /// FileType or null not identified public static FileType GetFileType(this byte[] bytes) { - using (ReadResult readResult = new ReadResult(bytes, Math.Min(bytes.Length, MimeTypes.MaxHeaderSize))) + int min = bytes.Length > MimeTypes.MaxHeaderSize ? MimeTypes.MaxHeaderSize : bytes.Length; + using (ReadResult readResult = new ReadResult(bytes, min)) { return MimeAnalyzers.GetFileType(in readResult); } diff --git a/src/Mime-Detective/Mime-Detective.csproj b/src/Mime-Detective/Mime-Detective.csproj index 93369c9..77a3a62 100644 --- a/src/Mime-Detective/Mime-Detective.csproj +++ b/src/Mime-Detective/Mime-Detective.csproj @@ -18,9 +18,11 @@ 0.0.6.0 0.0.6.0 - See beta3 PR - 0.0.6.0-beta3 + See beta4 PR + 0.0.6.0-beta4 true + + true @@ -28,23 +30,20 @@ - - + - - - + diff --git a/src/Mime-Detective/MimeDetective.cs b/src/Mime-Detective/MimeDetective.cs index 5e214d0..39b6423 100644 --- a/src/Mime-Detective/MimeDetective.cs +++ b/src/Mime-Detective/MimeDetective.cs @@ -1,6 +1,5 @@ using System.Collections.Generic; using System.IO; -using System.Linq; namespace MimeDetective { diff --git a/src/Mime-Detective/MimeTypes.cs b/src/Mime-Detective/MimeTypes.cs index 00af6a7..317f140 100644 --- a/src/Mime-Detective/MimeTypes.cs +++ b/src/Mime-Detective/MimeTypes.cs @@ -229,6 +229,7 @@ EML is also used by Outlook Express and QuickMail. #endregion Constants + /* public static void SaveToXmlFile(string path, IEnumerable types) { using (FileStream file = File.OpenWrite(path)) @@ -246,7 +247,7 @@ public static FileType[] LoadFromXmlFile(string path) return (FileType[])serializer.Deserialize(file); } - } + }*/ /// /// Gets the list of FileTypes based on list of extensions in Comma-Separated-Values string diff --git a/test/Mime-Detective.Benchmarks/Mime-Detective.Benchmarks.csproj b/test/Mime-Detective.Benchmarks/Mime-Detective.Benchmarks.csproj index c0e8bd3..ef761c0 100644 --- a/test/Mime-Detective.Benchmarks/Mime-Detective.Benchmarks.csproj +++ b/test/Mime-Detective.Benchmarks/Mime-Detective.Benchmarks.csproj @@ -11,8 +11,9 @@ - 0.10.13 + 0.10.14 + diff --git a/test/Mime-Detective.Benchmarks/Program.cs b/test/Mime-Detective.Benchmarks/Program.cs index da79e7d..02f725a 100644 --- a/test/Mime-Detective.Benchmarks/Program.cs +++ b/test/Mime-Detective.Benchmarks/Program.cs @@ -14,7 +14,7 @@ using System.Runtime.CompilerServices; using MimeDetective.Analyzers; -namespace Mime_Detective.Benchmarks +namespace MimeDetective.Benchmarks { public class MyConfig : ManualConfig { @@ -26,18 +26,20 @@ public MyConfig() .With(Jit.RyuJit) .With(Platform.X64) .WithId("Net47"));*/ - + /* Add(Job.Default.With(Runtime.Core) .With(CsProjCoreToolchain.NetCoreApp11) .With(Platform.X64) .With(Jit.RyuJit) - .WithId("NetCore1.1")); - + .WithId("NetCore1.1"));*/ + + /* Add(Job.Default.With(Runtime.Core) .With(CsProjCoreToolchain.NetCoreApp20) .With(Platform.X64) .With(Jit.RyuJit) .WithId("NetCore2.0")); + */ Add(Job.Default.With(Runtime.Core) .With(CsProjCoreToolchain.NetCoreApp21) @@ -48,23 +50,28 @@ public MyConfig() } [Config(typeof(MyConfig)), MemoryDiagnoser] - public class TypeLookup + public class Benchmarks { static readonly byte[][] files = new byte[][] { + ReadFile(new FileInfo("./data/Assemblies/ManagedDLL.dll")), + ReadFile(new FileInfo("./data/Assemblies/ManagedExe.exe")), + ReadFile(new FileInfo("./data/Images/test.png")), ReadFile(new FileInfo("./data/Images/test.jpg")), ReadFile(new FileInfo("./data/Images/test.gif")), ReadFile(new FileInfo("./data/Documents/DocWord2016.doc")), ReadFile(new FileInfo("./data/Zip/Images.zip")), ReadFile(new FileInfo("./data/Assemblies/NativeExe.exe")), - ReadFile(new FileInfo("./data/Audio/wavVLC.wav")) + ReadFile(new FileInfo("./data/Audio/wavVLC.wav")), + ReadFile(new FileInfo("./data/Documents/PdfWord2016.pdf")) }; - const int OpsPerInvoke = 6; - static readonly LinearCountingAnalyzer linear = new LinearCountingAnalyzer(MimeTypes.Types); - static readonly DictionaryBasedTrie trie2 = new DictionaryBasedTrie(MimeTypes.Types); - static readonly HybridTrie trie3 = new HybridTrie(MimeTypes.Types); - static readonly ArrayBasedTrie trie5 = new ArrayBasedTrie(MimeTypes.Types); + const int OpsPerInvoke = 10; + static readonly LinearCounting linear = new LinearCounting(MimeTypes.Types); + static readonly DictionaryTrie dict = new DictionaryTrie(MimeTypes.Types); + static readonly HybridTrie hybrid = new HybridTrie(MimeTypes.Types); + static readonly ArrayTrie array = new ArrayTrie(MimeTypes.Types); + static readonly LinearTrie linearTrie = new LinearTrie(MimeTypes.Types); static byte[] ReadFile(FileInfo info) { @@ -76,32 +83,38 @@ static byte[] ReadFile(FileInfo info) return bytes; } - //[Benchmark] - public LinearCountingAnalyzer LinearCountingAnalyzerInsertAll() + [Benchmark] + public LinearCounting LinearCountingInsertAll() + { + return new LinearCounting(MimeTypes.Types); + } + + [Benchmark] + public LinearTrie LinearTrieInsertAll() { - return new LinearCountingAnalyzer(MimeTypes.Types); + return new LinearTrie(MimeTypes.Types); } - //[Benchmark] - public DictionaryBasedTrie DictTrieInsertAll() + [Benchmark] + public DictionaryTrie DictTrieInsertAll() { - return new DictionaryBasedTrie(MimeTypes.Types); + return new DictionaryTrie(MimeTypes.Types); } - //[Benchmark] - public ArrayBasedTrie ArrayTrieInsertAll() + [Benchmark] + public ArrayTrie ArrayTrieInsertAll() { - return new ArrayBasedTrie(MimeTypes.Types); + return new ArrayTrie(MimeTypes.Types); } - //[Benchmark] + [Benchmark] public HybridTrie HybridTrieInsertAll() { return new HybridTrie(MimeTypes.Types); } [Benchmark(OperationsPerInvoke = OpsPerInvoke)] - public FileType LinearCountingAnalyzerSearch() + public FileType LinearCountingSearch() { FileType result = null; foreach (var array in files) @@ -114,6 +127,20 @@ public FileType LinearCountingAnalyzerSearch() return result; } + [Benchmark(OperationsPerInvoke = OpsPerInvoke)] + public FileType LinearTrieSearch() + { + FileType result = null; + foreach (var array in files) + { + using (ReadResult readResult = new ReadResult(array, MimeTypes.MaxHeaderSize)) + { + result = linearTrie.Search(in readResult); + } + } + return result; + } + [Benchmark(OperationsPerInvoke = OpsPerInvoke)] public FileType DictionaryTrieSearch() { @@ -122,7 +149,7 @@ public FileType DictionaryTrieSearch() { using (ReadResult readResult = new ReadResult(array, MimeTypes.MaxHeaderSize)) { - result = trie2.Search(in readResult); + result = dict.Search(in readResult); } } return result; @@ -137,7 +164,7 @@ public FileType HybridTrieSearch() { using (ReadResult readResult = new ReadResult(array, MimeTypes.MaxHeaderSize)) { - result = trie3.Search(in readResult); + result = hybrid.Search(in readResult); } } return result; @@ -152,7 +179,7 @@ public FileType ArrayTrieSearch() { using (ReadResult readResult = new ReadResult(array, MimeTypes.MaxHeaderSize)) { - result = trie5.Search(in readResult); + result = Benchmarks.array.Search(in readResult); } } return result; @@ -163,7 +190,7 @@ public class Program { public static void Main(string[] args) { - var summary = BenchmarkRunner.Run(); + var summary = BenchmarkRunner.Run(); } } } \ No newline at end of file diff --git a/test/Mime-Detective.Tests/Mime-Detective.Tests.csproj b/test/Mime-Detective.Tests/Mime-Detective.Tests.csproj index d147dc2..de0abbb 100644 --- a/test/Mime-Detective.Tests/Mime-Detective.Tests.csproj +++ b/test/Mime-Detective.Tests/Mime-Detective.Tests.csproj @@ -1,7 +1,7 @@  - netcoreapp2.0;netcoreapp1.1;net45 + netcoreapp2.0;net45 Mime-Detective.Tests Mime-Detective.Tests true diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/ArrayBasedTrieTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/ArrayBasedTrieTests.cs deleted file mode 100644 index 6013cfb..0000000 --- a/test/Mime-Detective.Tests/Tests/Analyzers/ArrayBasedTrieTests.cs +++ /dev/null @@ -1,169 +0,0 @@ -using MimeDetective.Analyzers; -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using System.Threading.Tasks; -using Xunit; - -namespace MimeDetective.Tests.Analyzers -{ - public class ArrayBasedTrieTests - { - [Fact] - public void DefaultConstructor() - { - var analyzer = new ArrayBasedTrie(); - - //assertion here just to have - Assert.NotNull(analyzer); - - analyzer.Insert(MimeTypes.ZIP); - } - - [Fact] - public void EnumerableConstructor() - { - var analyzer = new ArrayBasedTrie(MimeTypes.Types); - - //assertion here just to have - Assert.NotNull(analyzer); - Assert.Throws(() => new ArrayBasedTrie(null)); - - analyzer.Insert(MimeTypes.WORD); - } - - [Fact] - public void Insert() - { - var analyzer = new ArrayBasedTrie(); - Assert.Throws(() => analyzer.Insert(null)); - - foreach (var fileType in MimeTypes.Types) - { - analyzer.Insert(fileType); - } - - analyzer.Insert(MimeTypes.WORD); - } - - [Theory] - [InlineData("./Data/Documents/XlsExcel2016.xls", "xls")] - [InlineData("./Data/Documents/PptPowerpoint2016.ppt", "ppt")] - [InlineData("./Data/Documents/DocWord2016.doc", "doc")] - [InlineData("./Data/Documents/PdfWord2016.pdf", "pdf")] - [InlineData("./Data/Zip/empty.zip", "zip")] - [InlineData("./Data/Zip/images.zip", "zip")] - [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] - [InlineData("./Data/images/test.gif", "gif")] - [InlineData("./Data/images/test.jpg", "jpg")] - [InlineData("./Data/images/test.ico", "ico")] - [InlineData("./Data/images/test.png", "png")] - [InlineData("./Data/images/test.bmp", "bmp")] - [InlineData("./Data/Audio/wavVLC.wav", "wav")] - [InlineData("./Data/Audio/flacVLC.flac", "flac")] - [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] - [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] - [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] - [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] - public async Task Search(string path, string ext) - { - var analyzer = new ArrayBasedTrie(MimeTypes.Types); - FileInfo file = new FileInfo(path); - FileType type = null; - - using (ReadResult result = await ReadResult.ReadFileHeaderAsync(file)) - { - type = analyzer.Search(in result); - } - - Assert.NotNull(type); - Assert.Contains(ext, type.Extension); - } - - [Fact] - public void InsertZeroOffsetFirstWildCard() - { - var analyzer = new ArrayBasedTrie(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1], 1); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(0, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCard() - { - var analyzer = new ArrayBasedTrie(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[560], 560); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCardFull() - { - var analyzer = new ArrayBasedTrie(); - FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1120], 1120); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void IncrementalInsertSearchBoundries() - { - var analyzer = new ArrayBasedTrie(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); - analyzer.Insert(fileType); - - var bytes1 = new byte[i+1]; - ReadResult readResult = new ReadResult(bytes1, bytes1.Length); - FileType type = analyzer.Search(in readResult); - - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(i, type.HeaderOffset); - } - } - - [Fact] - public void InsertSearchBoundries() - { - var analyzer = new ArrayBasedTrie(); - List fileTypes = new List(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); - analyzer.Insert(fileType); - fileTypes.Add(fileType); - } - - for (int i = 0; i < 560; i++) - { - var bytes = new byte[i + 1]; - ReadResult readResult = new ReadResult(bytes, bytes.Length); - FileType type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileTypes[i], type); - Assert.Equal(i, type.HeaderOffset); - } - } - } -} diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/DictionaryBasedTrieTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/DictionaryBasedTrieTests.cs deleted file mode 100644 index 56a5fee..0000000 --- a/test/Mime-Detective.Tests/Tests/Analyzers/DictionaryBasedTrieTests.cs +++ /dev/null @@ -1,169 +0,0 @@ -using MimeDetective.Analyzers; -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using System.Threading.Tasks; -using Xunit; - -namespace MimeDetective.Tests.Analyzers -{ - public class DictionaryBasedTrieTests - { - [Fact] - public void DefaultConstructor() - { - var analyzer = new DictionaryBasedTrie(); - - //assertion here just to have - Assert.NotNull(analyzer); - - analyzer.Insert(MimeTypes.ZIP); - } - - [Fact] - public void EnumerableConstructor() - { - var analyzer = new DictionaryBasedTrie(MimeTypes.Types); - - //assertion here just to have - Assert.NotNull(analyzer); - Assert.Throws(() => new DictionaryBasedTrie(null)); - - analyzer.Insert(MimeTypes.WORD); - } - - [Fact] - public void Insert() - { - var analyzer = new DictionaryBasedTrie(); - Assert.Throws(() => analyzer.Insert(null)); - - foreach (var fileType in MimeTypes.Types) - { - analyzer.Insert(fileType); - } - - analyzer.Insert(MimeTypes.WORD); - } - - [Theory] - [InlineData("./Data/Documents/XlsExcel2016.xls", "xls")] - [InlineData("./Data/Documents/PptPowerpoint2016.ppt", "ppt")] - [InlineData("./Data/Documents/DocWord2016.doc", "doc")] - [InlineData("./Data/Documents/PdfWord2016.pdf", "pdf")] - [InlineData("./Data/Zip/empty.zip", "zip")] - [InlineData("./Data/Zip/images.zip", "zip")] - [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] - [InlineData("./Data/images/test.gif", "gif")] - [InlineData("./Data/images/test.jpg", "jpg")] - [InlineData("./Data/images/test.ico", "ico")] - [InlineData("./Data/images/test.png", "png")] - [InlineData("./Data/images/test.bmp", "bmp")] - [InlineData("./Data/Audio/wavVLC.wav", "wav")] - [InlineData("./Data/Audio/flacVLC.flac", "flac")] - [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] - [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] - [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] - [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] - public async Task Search(string path, string ext) - { - var analyzer = new DictionaryBasedTrie(MimeTypes.Types); - FileInfo file = new FileInfo(path); - FileType type = null; - - using (ReadResult result = await ReadResult.ReadFileHeaderAsync(file)) - { - type = analyzer.Search(in result); - } - - Assert.NotNull(type); - Assert.Contains(ext, type.Extension); - } - - [Fact] - public void InsertZeroOffsetFirstWildCard() - { - var analyzer = new DictionaryBasedTrie(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1], 1); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(0, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCard() - { - var analyzer = new DictionaryBasedTrie(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[560], 560); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCardFull() - { - var analyzer = new DictionaryBasedTrie(); - FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1120], 1120); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void IncrementalInsertSearchBoundries() - { - var analyzer = new DictionaryBasedTrie(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); - analyzer.Insert(fileType); - - var bytes1 = new byte[i + 1]; - ReadResult readResult = new ReadResult(bytes1, bytes1.Length); - FileType type = analyzer.Search(in readResult); - - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(i, type.HeaderOffset); - } - } - - [Fact] - public void InsertSearchBoundries() - { - var analyzer = new DictionaryBasedTrie(); - List fileTypes = new List(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); - analyzer.Insert(fileType); - fileTypes.Add(fileType); - } - - for (int i = 0; i < 560; i++) - { - var bytes = new byte[i + 1]; - ReadResult readResult = new ReadResult(bytes, bytes.Length); - FileType type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileTypes[i], type); - Assert.Equal(i, type.HeaderOffset); - } - } - } -} diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/HybridTrieTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/HybridTrieTests.cs deleted file mode 100644 index d16aa67..0000000 --- a/test/Mime-Detective.Tests/Tests/Analyzers/HybridTrieTests.cs +++ /dev/null @@ -1,169 +0,0 @@ -using MimeDetective.Analyzers; -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using System.Threading.Tasks; -using Xunit; - -namespace MimeDetective.Tests.Analyzers -{ - public class HybridTrieTests - { - [Fact] - public void DefaultConstructor() - { - var analyzer = new HybridTrie(); - - //assertion here just to have - Assert.NotNull(analyzer); - - analyzer.Insert(MimeTypes.ZIP); - } - - [Fact] - public void EnumerableConstructor() - { - var analyzer = new HybridTrie(MimeTypes.Types); - - //assertion here just to have - Assert.NotNull(analyzer); - Assert.Throws(() => new HybridTrie(null)); - - analyzer.Insert(MimeTypes.WORD); - } - - [Fact] - public void Insert() - { - var analyzer = new HybridTrie(); - Assert.Throws(() => analyzer.Insert(null)); - - foreach (var fileType in MimeTypes.Types) - { - analyzer.Insert(fileType); - } - - analyzer.Insert(MimeTypes.WORD); - } - - [Theory] - [InlineData("./Data/Documents/XlsExcel2016.xls", "xls")] - [InlineData("./Data/Documents/PptPowerpoint2016.ppt", "ppt")] - [InlineData("./Data/Documents/DocWord2016.doc", "doc")] - [InlineData("./Data/Documents/PdfWord2016.pdf", "pdf")] - [InlineData("./Data/Zip/empty.zip", "zip")] - [InlineData("./Data/Zip/images.zip", "zip")] - [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] - [InlineData("./Data/images/test.gif", "gif")] - [InlineData("./Data/images/test.jpg", "jpg")] - [InlineData("./Data/images/test.ico", "ico")] - [InlineData("./Data/images/test.png", "png")] - [InlineData("./Data/images/test.bmp", "bmp")] - [InlineData("./Data/Audio/wavVLC.wav", "wav")] - [InlineData("./Data/Audio/flacVLC.flac", "flac")] - [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] - [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] - [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] - [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] - public async Task Search(string path, string ext) - { - var analyzer = new HybridTrie(MimeTypes.Types); - FileInfo file = new FileInfo(path); - FileType type = null; - - using (ReadResult result = await ReadResult.ReadFileHeaderAsync(file)) - { - type = analyzer.Search(in result); - } - - Assert.NotNull(type); - Assert.Contains(ext, type.Extension); - } - - [Fact] - public void InsertZeroOffsetFirstWildCard() - { - var analyzer = new HybridTrie(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1], 1); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(0, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCard() - { - var analyzer = new HybridTrie(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[560], 560); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCardFull() - { - var analyzer = new HybridTrie(); - FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1120], 1120); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void IncrementalInsertSearchBoundries() - { - var analyzer = new HybridTrie(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); - analyzer.Insert(fileType); - - var bytes1 = new byte[i + 1]; - ReadResult readResult = new ReadResult(bytes1, bytes1.Length); - FileType type = analyzer.Search(in readResult); - - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(i, type.HeaderOffset); - } - } - - [Fact] - public void InsertSearchBoundries() - { - var analyzer = new HybridTrie(); - List fileTypes = new List(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); - analyzer.Insert(fileType); - fileTypes.Add(fileType); - } - - for (int i = 0; i < 560; i++) - { - var bytes = new byte[i + 1]; - ReadResult readResult = new ReadResult(bytes, bytes.Length); - FileType type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileTypes[i], type); - Assert.Equal(i, type.HeaderOffset); - } - } - } -} diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/LinearCountingAnalyzerTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/LinearCountingAnalyzerTests.cs deleted file mode 100644 index 8a2d868..0000000 --- a/test/Mime-Detective.Tests/Tests/Analyzers/LinearCountingAnalyzerTests.cs +++ /dev/null @@ -1,169 +0,0 @@ -using MimeDetective.Analyzers; -using System; -using System.Collections.Generic; -using System.IO; -using System.Text; -using System.Threading.Tasks; -using Xunit; - -namespace MimeDetective.Tests.Analyzers -{ - public class LinearCountingAnalyzerTests - { - [Fact] - public void DefaultConstructor() - { - var analyzer = new LinearCountingAnalyzer(); - - //assertion here just to have - Assert.NotNull(analyzer); - - analyzer.Insert(MimeTypes.ZIP); - } - - [Fact] - public void EnumerableConstructor() - { - var analyzer = new LinearCountingAnalyzer(MimeTypes.Types); - - //assertion here just to have - Assert.NotNull(analyzer); - Assert.Throws(() => new LinearCountingAnalyzer(null)); - - analyzer.Insert(MimeTypes.WORD); - } - - [Fact] - public void Insert() - { - var analyzer = new LinearCountingAnalyzer(); - Assert.Throws(() => analyzer.Insert(null)); - - foreach (var fileType in MimeTypes.Types) - { - analyzer.Insert(fileType); - } - - analyzer.Insert(MimeTypes.WORD); - } - - [Theory] - [InlineData("./Data/Documents/XlsExcel2016.xls", "xls")] - [InlineData("./Data/Documents/PptPowerpoint2016.ppt", "ppt")] - [InlineData("./Data/Documents/DocWord2016.doc", "doc")] - [InlineData("./Data/Documents/PdfWord2016.pdf", "pdf")] - [InlineData("./Data/Zip/empty.zip", "zip")] - [InlineData("./Data/Zip/images.zip", "zip")] - [InlineData("./Data/Zip/imagesBy7zip.zip", "zip")] - [InlineData("./Data/images/test.gif", "gif")] - [InlineData("./Data/images/test.jpg", "jpg")] - [InlineData("./Data/images/test.ico", "ico")] - [InlineData("./Data/images/test.png", "png")] - [InlineData("./Data/images/test.bmp", "bmp")] - [InlineData("./Data/Audio/wavVLC.wav", "wav")] - [InlineData("./Data/Audio/flacVLC.flac", "flac")] - [InlineData("./Data/Audio/mp3ID3Test1.mp3", "mp3")] - [InlineData("./Data/Audio/mp3ID3Test2.mp3", "mp3")] - [InlineData("./Data/Assemblies/ManagedExe.exe", "exe")] - [InlineData("./Data/Assemblies/ManagedDLL.dll", "dll")] - public async Task Search(string path, string ext) - { - var analyzer = new LinearCountingAnalyzer(MimeTypes.Types); - FileInfo file = new FileInfo(path); - FileType type = null; - - using (ReadResult result = await ReadResult.ReadFileHeaderAsync(file)) - { - type = analyzer.Search(in result); - } - - Assert.NotNull(type); - Assert.Contains(ext, type.Extension); - } - - [Fact] - public void InsertZeroOffsetFirstWildCard() - { - var analyzer = new LinearCountingAnalyzer(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1], 1); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(0, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCard() - { - var analyzer = new LinearCountingAnalyzer(); - FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[560], 560); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void InsertLastOffsetWildCardFull() - { - var analyzer = new LinearCountingAnalyzer(); - FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); - analyzer.Insert(fileType); - ReadResult readResult = new ReadResult(new byte[1120], 1120); - var type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(559, type.HeaderOffset); - } - - [Fact] - public void IncrementalInsertSearchBoundries() - { - var analyzer = new LinearCountingAnalyzer(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + i, (ushort)i); - analyzer.Insert(fileType); - - var bytes1 = new byte[i + 1]; - ReadResult readResult = new ReadResult(bytes1, bytes1.Length); - FileType type = analyzer.Search(in readResult); - - Assert.NotNull(type); - Assert.Same(fileType, type); - Assert.Equal(i, type.HeaderOffset); - } - } - - [Fact] - public void InsertSearchBoundries() - { - var analyzer = new LinearCountingAnalyzer(); - List fileTypes = new List(); - - for (int i = 0; i < 560; i++) - { - var bytes = new byte?[1]; - FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + i, (ushort)i); - analyzer.Insert(fileType); - fileTypes.Add(fileType); - } - - for (int i = 0; i < 560; i++) - { - var bytes = new byte[i + 1]; - ReadResult readResult = new ReadResult(bytes, bytes.Length); - FileType type = analyzer.Search(in readResult); - Assert.NotNull(type); - Assert.Same(fileTypes[i], type); - Assert.Equal(i, type.HeaderOffset); - } - } - } -} diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/MimeAnalyzersTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/MimeAnalyzersTests.cs index 8278bb8..1cc202d 100644 --- a/test/Mime-Detective.Tests/Tests/Analyzers/MimeAnalyzersTests.cs +++ b/test/Mime-Detective.Tests/Tests/Analyzers/MimeAnalyzersTests.cs @@ -12,7 +12,7 @@ public class MimeAnalyzersTests public void DefaultPrimaryAnalyzerNotNullOrEmpty() { Assert.NotNull(MimeAnalyzers.PrimaryAnalyzer); - Assert.IsType(MimeAnalyzers.PrimaryAnalyzer); + Assert.IsType(MimeAnalyzers.PrimaryAnalyzer); } [Fact] diff --git a/test/Mime-Detective.Tests/Tests/Analyzers/TrieAnalyzerTests.cs b/test/Mime-Detective.Tests/Tests/Analyzers/TrieAnalyzerTests.cs new file mode 100644 index 0000000..f6b6b39 --- /dev/null +++ b/test/Mime-Detective.Tests/Tests/Analyzers/TrieAnalyzerTests.cs @@ -0,0 +1,418 @@ +using MimeDetective.Analyzers; +using System; +using System.Collections.Concurrent; +using System.Collections.Generic; +using System.IO; +using System.Linq; +using System.Linq.Expressions; +using System.Text; +using System.Threading.Tasks; +using Xunit; + +namespace MimeDetective.Tests.Analyzers +{ + public static class TestHelpers + { + public static IFileAnalyzer DefaultCtor(this Type type) => Activator.CreateInstance(type) as IFileAnalyzer; + + private readonly static ConcurrentDictionary, IFileAnalyzer>> ctorCache = new ConcurrentDictionary, IFileAnalyzer>>(); + + public static IFileAnalyzer EnumerableCtor(this Type type, IEnumerable fileTypes) + { + if(!ctorCache.TryGetValue(type, out var func)) + { + var ctor = type.GetConstructors().Where(x => x.GetParameters().Any(y => y.ParameterType == typeof(IEnumerable))).Single(); + var info = Expression.Parameter(typeof(IEnumerable), "fileTypes"); + Expression callTheCtor = Expression.New(ctor, info); + func = Expression.Lambda, IFileAnalyzer>>(callTheCtor, new ParameterExpression[] { info }).Compile(); + ctorCache.TryAdd(type, func); + } + + return func(fileTypes); + } + } + + public class TrieAnalyzerTests + { + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void DefaultConstructor(Type type) + { + IFileAnalyzer analyzer = type.DefaultCtor(); + + //assertion here just to have + Assert.NotNull(analyzer); + + analyzer.Insert(MimeTypes.ZIP); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void EnumerableConstructor(Type type) + { + IFileAnalyzer analyzer = type.EnumerableCtor(MimeTypes.Types); + + //assertion here just to have + Assert.NotNull(analyzer); + Assert.Throws(() => type.EnumerableCtor(null)); + + IFileAnalyzer emptyAnalyzer = type.EnumerableCtor(Enumerable.Empty()); + Assert.NotNull(emptyAnalyzer); + + analyzer.Insert(MimeTypes.WORD); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void EnumerableCtorDoesNotThrowIfSequenceContainsNull(Type type) + { + FileType[] types = new FileType[] { MimeTypes.ELF, null, MimeTypes.DLL_EXE }; + + IFileAnalyzer analyzer = type.EnumerableCtor(types); + + Assert.NotNull(analyzer); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void Insert(Type type) + { + var analyzer = type.DefaultCtor(); + Assert.Throws(() => analyzer.Insert(null)); + + foreach (var fileType in MimeTypes.Types) + { + analyzer.Insert(fileType); + } + + analyzer.Insert(MimeTypes.WORD); + } + + [Theory] + [InlineData("./Data/images/test.ico", "ico")] + public async Task SearchLinear(string path, string ext) + { + var analyzer = new LinearTrie(); + analyzer.Insert(MimeTypes.ICO); + + FileInfo file = new FileInfo(path); + FileType type = null; + + using (ReadResult result = await ReadResult.ReadFileHeaderAsync(file)) + { + type = analyzer.Search(in result); + } + + Assert.NotNull(type); + Assert.Contains(ext, type.Extension); + } + + [Theory] + [InlineData(typeof(ArrayTrie), "./Data/Documents/XlsExcel2016.xls", "xls")] + [InlineData(typeof(ArrayTrie), "./Data/Documents/PptPowerpoint2016.ppt", "ppt")] + [InlineData(typeof(ArrayTrie), "./Data/Documents/DocWord2016.doc", "doc")] + [InlineData(typeof(ArrayTrie), "./Data/Documents/PdfWord2016.pdf", "pdf")] + [InlineData(typeof(ArrayTrie), "./Data/Zip/empty.zip", "zip")] + [InlineData(typeof(ArrayTrie), "./Data/Zip/images.zip", "zip")] + [InlineData(typeof(ArrayTrie), "./Data/Zip/imagesBy7zip.zip", "zip")] + [InlineData(typeof(ArrayTrie), "./Data/images/test.gif", "gif")] + [InlineData(typeof(ArrayTrie), "./Data/images/test.jpg", "jpg")] + [InlineData(typeof(ArrayTrie), "./Data/images/test.ico", "ico")] + [InlineData(typeof(ArrayTrie), "./Data/images/test.png", "png")] + [InlineData(typeof(ArrayTrie), "./Data/images/test.bmp", "bmp")] + [InlineData(typeof(ArrayTrie), "./Data/Audio/wavVLC.wav", "wav")] + [InlineData(typeof(ArrayTrie), "./Data/Audio/flacVLC.flac", "flac")] + [InlineData(typeof(ArrayTrie), "./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData(typeof(ArrayTrie), "./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData(typeof(ArrayTrie), "./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData(typeof(ArrayTrie), "./Data/Assemblies/ManagedDLL.dll", "dll")] + [InlineData(typeof(HybridTrie), "./Data/Documents/XlsExcel2016.xls", "xls")] + [InlineData(typeof(HybridTrie), "./Data/Documents/PptPowerpoint2016.ppt", "ppt")] + [InlineData(typeof(HybridTrie), "./Data/Documents/DocWord2016.doc", "doc")] + [InlineData(typeof(HybridTrie), "./Data/Documents/PdfWord2016.pdf", "pdf")] + [InlineData(typeof(HybridTrie), "./Data/Zip/empty.zip", "zip")] + [InlineData(typeof(HybridTrie), "./Data/Zip/images.zip", "zip")] + [InlineData(typeof(HybridTrie), "./Data/Zip/imagesBy7zip.zip", "zip")] + [InlineData(typeof(HybridTrie), "./Data/images/test.gif", "gif")] + [InlineData(typeof(HybridTrie), "./Data/images/test.jpg", "jpg")] + [InlineData(typeof(HybridTrie), "./Data/images/test.ico", "ico")] + [InlineData(typeof(HybridTrie), "./Data/images/test.png", "png")] + [InlineData(typeof(HybridTrie), "./Data/images/test.bmp", "bmp")] + [InlineData(typeof(HybridTrie), "./Data/Audio/wavVLC.wav", "wav")] + [InlineData(typeof(HybridTrie), "./Data/Audio/flacVLC.flac", "flac")] + [InlineData(typeof(HybridTrie), "./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData(typeof(HybridTrie), "./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData(typeof(HybridTrie), "./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData(typeof(HybridTrie), "./Data/Assemblies/ManagedDLL.dll", "dll")] + [InlineData(typeof(DictionaryTrie), "./Data/Documents/XlsExcel2016.xls", "xls")] + [InlineData(typeof(DictionaryTrie), "./Data/Documents/PptPowerpoint2016.ppt", "ppt")] + [InlineData(typeof(DictionaryTrie), "./Data/Documents/DocWord2016.doc", "doc")] + [InlineData(typeof(DictionaryTrie), "./Data/Documents/PdfWord2016.pdf", "pdf")] + [InlineData(typeof(DictionaryTrie), "./Data/Zip/empty.zip", "zip")] + [InlineData(typeof(DictionaryTrie), "./Data/Zip/images.zip", "zip")] + [InlineData(typeof(DictionaryTrie), "./Data/Zip/imagesBy7zip.zip", "zip")] + [InlineData(typeof(DictionaryTrie), "./Data/images/test.gif", "gif")] + [InlineData(typeof(DictionaryTrie), "./Data/images/test.jpg", "jpg")] + [InlineData(typeof(DictionaryTrie), "./Data/images/test.ico", "ico")] + [InlineData(typeof(DictionaryTrie), "./Data/images/test.png", "png")] + [InlineData(typeof(DictionaryTrie), "./Data/images/test.bmp", "bmp")] + [InlineData(typeof(DictionaryTrie), "./Data/Audio/wavVLC.wav", "wav")] + [InlineData(typeof(DictionaryTrie), "./Data/Audio/flacVLC.flac", "flac")] + [InlineData(typeof(DictionaryTrie), "./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData(typeof(DictionaryTrie), "./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData(typeof(DictionaryTrie), "./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData(typeof(DictionaryTrie), "./Data/Assemblies/ManagedDLL.dll", "dll")] + [InlineData(typeof(LinearCounting), "./Data/Documents/XlsExcel2016.xls", "xls")] + [InlineData(typeof(LinearCounting), "./Data/Documents/PptPowerpoint2016.ppt", "ppt")] + [InlineData(typeof(LinearCounting), "./Data/Documents/DocWord2016.doc", "doc")] + [InlineData(typeof(LinearCounting), "./Data/Documents/PdfWord2016.pdf", "pdf")] + [InlineData(typeof(LinearCounting), "./Data/Zip/empty.zip", "zip")] + [InlineData(typeof(LinearCounting), "./Data/Zip/images.zip", "zip")] + [InlineData(typeof(LinearCounting), "./Data/Zip/imagesBy7zip.zip", "zip")] + [InlineData(typeof(LinearCounting), "./Data/images/test.gif", "gif")] + [InlineData(typeof(LinearCounting), "./Data/images/test.jpg", "jpg")] + [InlineData(typeof(LinearCounting), "./Data/images/test.ico", "ico")] + [InlineData(typeof(LinearCounting), "./Data/images/test.png", "png")] + [InlineData(typeof(LinearCounting), "./Data/images/test.bmp", "bmp")] + [InlineData(typeof(LinearCounting), "./Data/Audio/wavVLC.wav", "wav")] + [InlineData(typeof(LinearCounting), "./Data/Audio/flacVLC.flac", "flac")] + [InlineData(typeof(LinearCounting), "./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData(typeof(LinearCounting), "./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData(typeof(LinearCounting), "./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData(typeof(LinearCounting), "./Data/Assemblies/ManagedDLL.dll", "dll")] + [InlineData(typeof(LinearTrie), "./Data/Documents/XlsExcel2016.xls", "xls")] + [InlineData(typeof(LinearTrie), "./Data/Documents/PptPowerpoint2016.ppt", "ppt")] + [InlineData(typeof(LinearTrie), "./Data/Documents/DocWord2016.doc", "doc")] + [InlineData(typeof(LinearTrie), "./Data/Documents/PdfWord2016.pdf", "pdf")] + [InlineData(typeof(LinearTrie), "./Data/Zip/empty.zip", "zip")] + [InlineData(typeof(LinearTrie), "./Data/Zip/images.zip", "zip")] + [InlineData(typeof(LinearTrie), "./Data/Zip/imagesBy7zip.zip", "zip")] + [InlineData(typeof(LinearTrie), "./Data/images/test.gif", "gif")] + [InlineData(typeof(LinearTrie), "./Data/images/test.jpg", "jpg")] + [InlineData(typeof(LinearTrie), "./Data/images/test.ico", "ico")] + [InlineData(typeof(LinearTrie), "./Data/images/test.png", "png")] + [InlineData(typeof(LinearTrie), "./Data/images/test.bmp", "bmp")] + [InlineData(typeof(LinearTrie), "./Data/Audio/wavVLC.wav", "wav")] + [InlineData(typeof(LinearTrie), "./Data/Audio/flacVLC.flac", "flac")] + [InlineData(typeof(LinearTrie), "./Data/Audio/mp3ID3Test1.mp3", "mp3")] + [InlineData(typeof(LinearTrie), "./Data/Audio/mp3ID3Test2.mp3", "mp3")] + [InlineData(typeof(LinearTrie), "./Data/Assemblies/ManagedExe.exe", "exe")] + [InlineData(typeof(LinearTrie), "./Data/Assemblies/ManagedDLL.dll", "dll")] + public async Task Search(Type analyzerType, string path, string ext) + { + var analyzer = analyzerType.EnumerableCtor(MimeTypes.Types); + IEnumerable expectedTypes = MimeTypes.Types.Where(x => x.Extension.Contains(ext)); + FileInfo file = new FileInfo(path); + FileType type = null; + + using (ReadResult result = await ReadResult.ReadFileHeaderAsync(file)) + { + type = analyzer.Search(in result); + } + + Assert.NotNull(type); + Assert.Contains(ext, type.Extension); + Assert.Contains(type, expectedTypes); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void InsertZeroOffsetFirstWildCard(Type type) + { + var analyzer = type.DefaultCtor(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 0); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1], 1); + var result = analyzer.Search(in readResult); + Assert.NotNull(result); + Assert.Same(fileType, result); + Assert.Equal(0, result.HeaderOffset); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void InsertLastOffsetWildCard(Type type) + { + var analyzer = type.DefaultCtor(); + FileType fileType = new FileType(new byte?[1], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[560], 560); + var result = analyzer.Search(in readResult); + Assert.NotNull(result); + Assert.Same(fileType, result); + Assert.Equal(559, result.HeaderOffset); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void InsertLastOffsetWildCardFull(Type type) + { + var analyzer = type.DefaultCtor(); + FileType fileType = new FileType(new byte?[560], "ext", "app/ext", 559); + analyzer.Insert(fileType); + ReadResult readResult = new ReadResult(new byte[1120], 1120); + var result = analyzer.Search(in readResult); + Assert.NotNull(result); + Assert.Same(fileType, result); + Assert.Equal(559, result.HeaderOffset); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void IncrementalInsertSearchBoundries(Type type) + { + var analyzer = type.DefaultCtor(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + + var bytes1 = new byte[i+1]; + ReadResult readResult = new ReadResult(bytes1, bytes1.Length); + FileType result = analyzer.Search(in readResult); + + Assert.NotNull(result); + Assert.Same(fileType, result); + Assert.Equal(i, result.HeaderOffset); + } + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void InsertSearchBoundries(Type type) + { + var analyzer = type.DefaultCtor(); + List fileTypes = new List(); + + for (int i = 0; i < 560; i++) + { + var bytes = new byte?[1]; + FileType fileType = new FileType(bytes, "ext" + i, "app/ext" + 1, (ushort)i); + analyzer.Insert(fileType); + fileTypes.Add(fileType); + } + + for (int i = 0; i < 560; i++) + { + var bytes = new byte[i + 1]; + ReadResult readResult = new ReadResult(bytes, bytes.Length); + FileType result = analyzer.Search(in readResult); + Assert.NotNull(result); + Assert.Same(fileTypes[i], result); + Assert.Equal(i, result.HeaderOffset); + } + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void InsertReverseSequentialScenario(Type type) + { + var trie = type.DefaultCtor(); + + FileType type1 = new FileType(new byte?[] { 1, 0, 1 }, "ext1", "app/ext1"); + byte[] type1Bytes = new byte[] { 1, 0, 1 }; + FileType type2 = new FileType(new byte?[] { 1, 0, 1, 0, 1 }, "ext2", "app/ext2"); + byte[] type2Bytes = new byte[] { 1, 0, 1, 0, 1 }; + FileType type3 = new FileType(new byte?[] { 1, 0, 1, 0, 1, 0, 1 }, "ext3", "app/ext3"); + byte[] type3Bytes = new byte[] { 1, 0, 1, 0, 1, 0, 1 }; + + trie.Insert(type3); + trie.Insert(type2); + trie.Insert(type1); + + //lookup type 1 + FileType type1Result = trie.Search(new ReadResult(type1Bytes, type1Bytes.Length)); + Assert.NotNull(type1Result); + Assert.Same(type1, type1Result); + + //lookup type 2 + FileType type2Result = trie.Search(new ReadResult(type2Bytes, type2Bytes.Length)); + Assert.NotNull(type2Result); + Assert.Same(type2, type2Result); + + //lookup type 3 + FileType type3Result = trie.Search(new ReadResult(type3Bytes, type3Bytes.Length)); + Assert.NotNull(type3Result); + Assert.Same(type3, type3Result); + } + + [Theory] + [InlineData(typeof(LinearTrie))] + [InlineData(typeof(ArrayTrie))] + [InlineData(typeof(HybridTrie))] + [InlineData(typeof(DictionaryTrie))] + [InlineData(typeof(LinearCounting))] + public void InsertSequentialScenario(Type type) + { + var trie = type.DefaultCtor(); + + FileType type1 = new FileType(new byte?[] { 1, 0, 1 }, "ext1", "app/ext1"); + byte[] type1Bytes = new byte[] { 1, 0, 1 }; + FileType type2 = new FileType(new byte?[] { 1, 0, 1, 0, 1 }, "ext2", "app/ext2"); + byte[] type2Bytes = new byte[] { 1, 0, 1, 0, 1 }; + FileType type3 = new FileType(new byte?[] { 1, 0, 1, 0, 1, 0, 1 }, "ext3", "app/ext3"); + byte[] type3Bytes = new byte[] { 1, 0, 1, 0, 1, 0, 1 }; + + trie.Insert(type1); + trie.Insert(type2); + trie.Insert(type3); + + //lookup type 1 + FileType type1Result = trie.Search(new ReadResult(type1Bytes, type1Bytes.Length)); + Assert.NotNull(type1Result); + Assert.Same(type1, type1Result); + + //lookup type 2 + FileType type2Result = trie.Search(new ReadResult(type2Bytes, type2Bytes.Length)); + Assert.NotNull(type2Result); + Assert.Same(type2, type2Result); + + //lookup type 3 + FileType type3Result = trie.Search(new ReadResult(type3Bytes, type3Bytes.Length)); + Assert.NotNull(type3Result); + Assert.Same(type3, type3Result); + } + } +}