Skip to content

Commit

Permalink
Pass subjectGroup as an Array, to avoid read it 3 times. Should impro…
Browse files Browse the repository at this point in the history
…ve performance (?)
  • Loading branch information
gabrieldelaparra committed Dec 9, 2019
1 parent a8de3a6 commit 05588a5
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 25 deletions.
14 changes: 8 additions & 6 deletions SparqlForHumans.CLI/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ private static void Main(string[] args)
Options.InternUris = false;
//FilterReorderSortAll();
//FilterReorderSort500();
CreateIndex(@"C:\Users\admin\Desktop\DCC\SparqlforHumans\SparqlForHumans.CLI\bin\x64\Debug\netcoreapp2.1\filtered-All.Sorted.nt", true);
//CreateEntitiesIndex(@"C:\Users\admin\Desktop\DCC\SparqlforHumans\SparqlForHumans.CLI\bin\x64\Debug\netcoreapp2.1\filtered-All.Sorted.nt", true);
CreatePropertiesIndex(@"C:\Users\admin\Desktop\DCC\SparqlforHumans\SparqlForHumans.CLI\bin\x64\Debug\netcoreapp2.1\filtered-All.Sorted.nt", true);
//ReorderAll();
//FilterAll();
//Filter5k();
Expand Down Expand Up @@ -58,15 +59,16 @@ private static void Main(string[] args)
//IndexBuilder.CreateTypesIndex();
}

public static void CreateIndex(string filename, bool overwrite = false)
public static void CreateEntitiesIndex(string filename, bool overwrite = false)
{
var entitiesOutputPath = LuceneDirectoryDefaults.EntityIndexPath;
var propertyOutputPath = LuceneDirectoryDefaults.PropertyIndexPath;

entitiesOutputPath.DeleteIfExists(overwrite);
propertyOutputPath.DeleteIfExists(overwrite);

new EntitiesIndexer(filename, LuceneDirectoryDefaults.EntityIndexPath).Index();
}
public static void CreatePropertiesIndex(string filename, bool overwrite = false)
{
var propertyOutputPath = LuceneDirectoryDefaults.PropertyIndexPath;
propertyOutputPath.DeleteIfExists(overwrite);
new SimplePropertiesIndexer(filename, LuceneDirectoryDefaults.PropertyIndexPath).Index();
}

Expand Down
8 changes: 4 additions & 4 deletions SparqlForHumans.Logger/BaseNotifier.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,15 @@
{
public abstract class BaseNotifier
{
private readonly NLog.Logger Logger = SparqlForHumans.Logger.Logger.Init();
public int NotifyTicks { get; } = 100000;
public abstract string NotifyMessage { get; }
private readonly NLog.Logger _logger = SparqlForHumans.Logger.Logger.Init();
public int NotifyTicks { get; set; } = 100000;
public abstract string NotifyMessage { get; }

public virtual void LogProgress(long Ticks, bool overrideCheck = false)
{
if (Ticks % NotifyTicks == 0 || overrideCheck)
{
Logger.Info($"{NotifyMessage}, Count: {Ticks:N0}");
_logger.Info($"{NotifyMessage}, Count: {Ticks:N0}");
}
}
}
Expand Down
33 changes: 18 additions & 15 deletions SparqlForHumans.Lucene/Index/SimplePropertiesIndexer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,9 @@ public bool FilterGroups(SubjectGroup tripleGroup)
private Dictionary<int, int> FrequencyDictionary { get; set; } = new Dictionary<int, int>();
private Dictionary<int, List<int>> DomainDictionary { get; set; } = new Dictionary<int, List<int>>();
private Dictionary<int, List<int>> RangeDictionary { get; set; } = new Dictionary<int, List<int>>();
private Dictionary<int, List<int>> rangeAuxiliaryDictionary { get; set; } = new Dictionary<int, List<int>>();
private string FrequencyFieldName => Labels.Rank.ToString();
private string DomainFieldName => Labels.DomainType.ToString();
private string RangeFieldName => Labels.Range.ToString();
private static string FrequencyFieldName => Labels.Rank.ToString();
private static string DomainFieldName => Labels.DomainType.ToString();
private static string RangeFieldName => Labels.Range.ToString();
internal void FrequencyParseTripleGroup(Dictionary<int, int> dictionary, IEnumerable<Triple> triples)
{
foreach (var triple in triples)
Expand Down Expand Up @@ -117,18 +116,18 @@ public IEnumerable<DoubleField> FrequencyGetField(SubjectGroup subjectGroup)
: new List<DoubleField>();
}

public IEnumerable<StringField> RangeGetField(SubjectGroup tripleGroup)
public IEnumerable<StringField> RangeGetField(SubjectGroup subjectGroup)
{
return RangeDictionary.ContainsKey(tripleGroup.Id.ToNumbers())
? RangeDictionary[tripleGroup.Id.ToNumbers()]
return RangeDictionary.ContainsKey(subjectGroup.Id.ToNumbers())
? RangeDictionary[subjectGroup.Id.ToNumbers()]
.Select(x => new StringField(RangeFieldName, x.ToString(), Field.Store.YES))
: new List<StringField>();
}

public IEnumerable<StringField> DomainGetField(SubjectGroup tripleGroup)
public IEnumerable<StringField> DomainGetField(SubjectGroup subjectGroup)
{
return DomainDictionary.ContainsKey(tripleGroup.Id.ToNumbers())
? DomainDictionary[tripleGroup.Id.ToNumbers()]
return DomainDictionary.ContainsKey(subjectGroup.Id.ToNumbers())
? DomainDictionary[subjectGroup.Id.ToNumbers()]
.Select(x => new StringField(DomainFieldName, x.ToString(), Field.Store.YES))
: new List<StringField>();
}
Expand All @@ -142,22 +141,26 @@ public void Index()
var subjectGroups = FileHelper.GetInputLines(InputFilename)
.GroupBySubject();

NotifyTicks = 10000;

foreach (var subjectGroup in subjectGroups.Where(x => x.IsEntityQ())) {
FrequencyParseTripleGroup(FrequencyDictionary, subjectGroup);
DomainParseTripleGroup(DomainDictionary, subjectGroup);
RangeParseTripleGroup(RangeDictionary, subjectGroup);
var subjectGroupArray = subjectGroup.ToArray();
FrequencyParseTripleGroup(FrequencyDictionary, subjectGroupArray);
DomainParseTripleGroup(DomainDictionary, subjectGroupArray);
RangeParseTripleGroup(RangeDictionary, subjectGroupArray);
LogProgress(readCount++);
}

NotifyTicks = 100000;

readCount = 0;

var indexConfig = LuceneIndexDefaults.CreateStandardIndexWriterConfig();

using (var indexDirectory = FSDirectory.Open(OutputDirectory.GetOrCreateDirectory()))
using (var writer = new IndexWriter(indexDirectory, indexConfig))
{
foreach (var subjectGroup in subjectGroups.Where(FilterGroups).AsParallel())
{
foreach (var subjectGroup in subjectGroups.Where(FilterGroups)) {
var document = new Document();

FrequencyGetField(subjectGroup).ToList().ForEach(x => document.Add(x));
Expand Down

0 comments on commit 05588a5

Please sign in to comment.