Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate hash seed with a delimiter #94

Open
wants to merge 2 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions GeUtilities/Intervals/Functions/HashFunctions.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,12 @@ public static class HashFunctions
private const uint _FNVPrime_32 = 16777619;
private const uint _FNVOffsetBasis_32 = 2166136261;

/// <summary>
/// Sets and gets a string used as a delimiter separating
/// properties used as hash seed.
/// </summary>
public const string HashSeedDelimiter = ";;";

public static uint FNVHashFunction(string bytes)
{
uint hash = _FNVOffsetBasis_32;
Expand All @@ -20,5 +26,10 @@ public static uint FNVHashFunction(string bytes)

return hash;
}

public static string GetHashSeed(params string[] properties)
{
return string.Join(HashSeedDelimiter, properties);
}
}
}
10 changes: 9 additions & 1 deletion GeUtilities/Intervals/Genome/Chromosome.cs
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,20 @@ public Chromosome()
Strands = new Dictionary<char, Strand<I>>();
}

public void Add(I interval, char strand)
public bool TryAdd(I interval, char strand)
{
if (!Strands.ContainsKey(strand))
Strands.Add(strand, new Strand<I>());

if (Strands[strand].TryAdd(interval))
{
Statistics.Update(interval);
return true;
}
else
{
return false;
}
}
}
}
3 changes: 2 additions & 1 deletion GeUtilities/Intervals/Model/GeneralFeature.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
// See the LICENSE file in the project root for more information.

using Genometric.GeUtilities.IGenomics;
using Genometric.GeUtilities.Intervals.Functions;

namespace Genometric.GeUtilities.Intervals.Model
{
public class GeneralFeature : Interval, IGeneralFeature
{
public GeneralFeature(int left, int right, string source, string feature, double score,
string frame, string attribute, string hashSeed = "") :
base(left, right, source + feature + score.ToString() + frame + attribute + hashSeed)
base(left, right, HashFunctions.GetHashSeed(source, feature, score.ToString(), frame, attribute, hashSeed))
{
Source = source;
Feature = feature;
Expand Down
2 changes: 1 addition & 1 deletion GeUtilities/Intervals/Model/Interval.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ public Interval(int left, int right, string hashSeed = "")

unchecked
{
_hashKey = (int)HashFunctions.FNVHashFunction(left.ToString() + right.ToString() + hashSeed);
_hashKey = (int)HashFunctions.FNVHashFunction(HashFunctions.GetHashSeed(left.ToString(), right.ToString(), hashSeed));
}
}

Expand Down
3 changes: 2 additions & 1 deletion GeUtilities/Intervals/Model/Peak.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
// See the LICENSE file in the project root for more information.

using Genometric.GeUtilities.IGenomics;
using Genometric.GeUtilities.Intervals.Functions;

namespace Genometric.GeUtilities.Intervals.Model
{
public class Peak : Interval, IPeak
{
public Peak(int left, int right, double value, string name = null, int summit = -1, string hashSeed = "") :
base(left, right, value.ToString() + summit.ToString() + name + hashSeed)
base(left, right, HashFunctions.GetHashSeed(value.ToString(), summit.ToString(), name, hashSeed))
{
Value = value;
Summit = summit != -1 ? summit : (right - left) / 2;
Expand Down
3 changes: 2 additions & 1 deletion GeUtilities/Intervals/Model/RefSeqGene.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@
// See the LICENSE file in the project root for more information.

using Genometric.GeUtilities.IGenomics;
using Genometric.GeUtilities.Intervals.Functions;

namespace Genometric.GeUtilities.Intervals.Model
{
public class RefSeqGene : Interval, IRefSeqGene
{
public RefSeqGene(int left, int right, string refSeqID, string geneSymbol, string hashSeed = "") :
base(left, right, refSeqID + geneSymbol + hashSeed)
base(left, right, HashFunctions.GetHashSeed(refSeqID, geneSymbol, hashSeed))
{
RefSeqID = refSeqID;
GeneSymbol = geneSymbol;
Expand Down
5 changes: 3 additions & 2 deletions GeUtilities/Intervals/Model/Variant.cs
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,16 @@
// See the LICENSE file in the project root for more information.

using Genometric.GeUtilities.IGenomics;
using Genometric.GeUtilities.Intervals.Functions;

namespace Genometric.GeUtilities.Intervals.Model
{
public class Variant : Interval, IVariant
{
public Variant(int left, int right, string id, Base[] refBase, Base[] altBase, double quality,
string filter, string info, string hashSeed = "") :
base(left, right, id + (refBase == null ? "" : refBase.ToString())
+ (altBase == null ? "" : altBase.ToString()) + quality.ToString() + filter + info + hashSeed)
base(left, right, HashFunctions.GetHashSeed(id, (refBase == null ? "" : refBase.ToString()),
(altBase == null ? "" : altBase.ToString()), quality.ToString(), filter, info, hashSeed))
{
ID = id;
RefBase = refBase;
Expand Down
13 changes: 10 additions & 3 deletions GeUtilities/Intervals/Parsers/Model/ParsedIntervals.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,19 @@ protected ParsedIntervals()
Statistics = new S();
}

public void Add(I interval, string chr, char strand)
public bool TryAdd(I interval, string chr, char strand)
{
if (!Chromosomes.ContainsKey(chr))
Chromosomes.Add(chr, new Chromosome<I, S>());
Chromosomes[chr].Add(interval, strand);
Statistics.Update(interval);
if (Chromosomes[chr].TryAdd(interval, strand))
{
Statistics.Update(interval);
return true;
}
else
{
return false;
}
}
}
}
11 changes: 8 additions & 3 deletions GeUtilities/Intervals/Parsers/Parser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ private void Parse()
continue;
}

I readingInterval = BuildInterval(left, right, splittedLine, lineCounter, _data.FileHashKey + lineCounter.ToString());
I readingInterval = BuildInterval(left, right, splittedLine, lineCounter, HashFunctions.GetHashSeed(_data.FileHashKey.ToString(), lineCounter.ToString()));
if (DropReadingPeak)
continue;

Expand Down Expand Up @@ -291,8 +291,13 @@ private void Parse()
(char.TryParse(splittedLine[_strandColumn], out strand) && strand != '+' && strand != '-' && strand != UnspecifiedStrandChar))
strand = UnspecifiedStrandChar;

_data.Add(readingInterval, chrName, strand);
_data.IntervalsCount++;
if (_data.TryAdd(readingInterval, chrName, strand))
_data.IntervalsCount++;
else
{
DropLine("\tLine " + lineCounter.ToString() + "\t:\tPossibly Hash key collision.");
continue;
}
}
}
}
Expand Down