diff --git a/SparqlForHumans.CLI/Program.cs b/SparqlForHumans.CLI/Program.cs index ca37fef6..16cfc679 100644 --- a/SparqlForHumans.CLI/Program.cs +++ b/SparqlForHumans.CLI/Program.cs @@ -24,7 +24,8 @@ private static void Main(string[] args) Options.InternUris = false; //FilterReorderSortAll(); //FilterReorderSort500(); - CreateIndex(@"C:\Users\admin\Desktop\DCC\SparqlforHumans\SparqlForHumans.CLI\bin\x64\Debug\netcoreapp2.1\filtered-All.Sorted.nt", true); + //CreateEntitiesIndex(@"C:\Users\admin\Desktop\DCC\SparqlforHumans\SparqlForHumans.CLI\bin\x64\Debug\netcoreapp2.1\filtered-All.Sorted.nt", true); + CreatePropertiesIndex(@"C:\Users\admin\Desktop\DCC\SparqlforHumans\SparqlForHumans.CLI\bin\x64\Debug\netcoreapp2.1\filtered-All.Sorted.nt", true); //ReorderAll(); //FilterAll(); //Filter5k(); @@ -58,15 +59,16 @@ private static void Main(string[] args) //IndexBuilder.CreateTypesIndex(); } - public static void CreateIndex(string filename, bool overwrite = false) + public static void CreateEntitiesIndex(string filename, bool overwrite = false) { var entitiesOutputPath = LuceneDirectoryDefaults.EntityIndexPath; - var propertyOutputPath = LuceneDirectoryDefaults.PropertyIndexPath; - entitiesOutputPath.DeleteIfExists(overwrite); - propertyOutputPath.DeleteIfExists(overwrite); - new EntitiesIndexer(filename, LuceneDirectoryDefaults.EntityIndexPath).Index(); + } + public static void CreatePropertiesIndex(string filename, bool overwrite = false) + { + var propertyOutputPath = LuceneDirectoryDefaults.PropertyIndexPath; + propertyOutputPath.DeleteIfExists(overwrite); new SimplePropertiesIndexer(filename, LuceneDirectoryDefaults.PropertyIndexPath).Index(); } diff --git a/SparqlForHumans.Logger/BaseNotifier.cs b/SparqlForHumans.Logger/BaseNotifier.cs index 354a0f11..14ce3a1c 100644 --- a/SparqlForHumans.Logger/BaseNotifier.cs +++ b/SparqlForHumans.Logger/BaseNotifier.cs @@ -2,15 +2,15 @@ { public abstract class BaseNotifier { - private readonly NLog.Logger Logger = SparqlForHumans.Logger.Logger.Init(); - public int NotifyTicks { get; } = 100000; - public abstract string NotifyMessage { get; } + private readonly NLog.Logger _logger = SparqlForHumans.Logger.Logger.Init(); + public int NotifyTicks { get; set; } = 100000; + public abstract string NotifyMessage { get; } public virtual void LogProgress(long Ticks, bool overrideCheck = false) { if (Ticks % NotifyTicks == 0 || overrideCheck) { - Logger.Info($"{NotifyMessage}, Count: {Ticks:N0}"); + _logger.Info($"{NotifyMessage}, Count: {Ticks:N0}"); } } } diff --git a/SparqlForHumans.Lucene/Index/SimplePropertiesIndexer.cs b/SparqlForHumans.Lucene/Index/SimplePropertiesIndexer.cs index dca5766b..181147ae 100644 --- a/SparqlForHumans.Lucene/Index/SimplePropertiesIndexer.cs +++ b/SparqlForHumans.Lucene/Index/SimplePropertiesIndexer.cs @@ -42,10 +42,9 @@ public bool FilterGroups(SubjectGroup tripleGroup) private Dictionary FrequencyDictionary { get; set; } = new Dictionary(); private Dictionary> DomainDictionary { get; set; } = new Dictionary>(); private Dictionary> RangeDictionary { get; set; } = new Dictionary>(); - private Dictionary> rangeAuxiliaryDictionary { get; set; } = new Dictionary>(); - private string FrequencyFieldName => Labels.Rank.ToString(); - private string DomainFieldName => Labels.DomainType.ToString(); - private string RangeFieldName => Labels.Range.ToString(); + private static string FrequencyFieldName => Labels.Rank.ToString(); + private static string DomainFieldName => Labels.DomainType.ToString(); + private static string RangeFieldName => Labels.Range.ToString(); internal void FrequencyParseTripleGroup(Dictionary dictionary, IEnumerable triples) { foreach (var triple in triples) @@ -117,18 +116,18 @@ public IEnumerable FrequencyGetField(SubjectGroup subjectGroup) : new List(); } - public IEnumerable RangeGetField(SubjectGroup tripleGroup) + public IEnumerable RangeGetField(SubjectGroup subjectGroup) { - return RangeDictionary.ContainsKey(tripleGroup.Id.ToNumbers()) - ? RangeDictionary[tripleGroup.Id.ToNumbers()] + return RangeDictionary.ContainsKey(subjectGroup.Id.ToNumbers()) + ? RangeDictionary[subjectGroup.Id.ToNumbers()] .Select(x => new StringField(RangeFieldName, x.ToString(), Field.Store.YES)) : new List(); } - public IEnumerable DomainGetField(SubjectGroup tripleGroup) + public IEnumerable DomainGetField(SubjectGroup subjectGroup) { - return DomainDictionary.ContainsKey(tripleGroup.Id.ToNumbers()) - ? DomainDictionary[tripleGroup.Id.ToNumbers()] + return DomainDictionary.ContainsKey(subjectGroup.Id.ToNumbers()) + ? DomainDictionary[subjectGroup.Id.ToNumbers()] .Select(x => new StringField(DomainFieldName, x.ToString(), Field.Store.YES)) : new List(); } @@ -142,13 +141,18 @@ public void Index() var subjectGroups = FileHelper.GetInputLines(InputFilename) .GroupBySubject(); + NotifyTicks = 10000; + foreach (var subjectGroup in subjectGroups.Where(x => x.IsEntityQ())) { - FrequencyParseTripleGroup(FrequencyDictionary, subjectGroup); - DomainParseTripleGroup(DomainDictionary, subjectGroup); - RangeParseTripleGroup(RangeDictionary, subjectGroup); + var subjectGroupArray = subjectGroup.ToArray(); + FrequencyParseTripleGroup(FrequencyDictionary, subjectGroupArray); + DomainParseTripleGroup(DomainDictionary, subjectGroupArray); + RangeParseTripleGroup(RangeDictionary, subjectGroupArray); LogProgress(readCount++); } + NotifyTicks = 100000; + readCount = 0; var indexConfig = LuceneIndexDefaults.CreateStandardIndexWriterConfig(); @@ -156,8 +160,7 @@ public void Index() using (var indexDirectory = FSDirectory.Open(OutputDirectory.GetOrCreateDirectory())) using (var writer = new IndexWriter(indexDirectory, indexConfig)) { - foreach (var subjectGroup in subjectGroups.Where(FilterGroups).AsParallel()) - { + foreach (var subjectGroup in subjectGroups.Where(FilterGroups)) { var document = new Document(); FrequencyGetField(subjectGroup).ToList().ForEach(x => document.Add(x));