diff --git a/CHANGELOG.md b/CHANGELOG.md index f583c30..cf6dd77 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Improved PukuiElbert source parsing and cleanup * New HawDict.Test test library with StringUtils tests * Process dictionaries in parallel to speed up result +* Removed tags from XDXF output files * Switch from string.Format to interpolated strings to improve performance ## v0.12 ## diff --git a/src/HawDict/Input/PukuiElbertInputDict.cs b/src/HawDict/Input/PukuiElbertInputDict.cs index 49259a7..5ed0cf5 100644 --- a/src/HawDict/Input/PukuiElbertInputDict.cs +++ b/src/HawDict/Input/PukuiElbertInputDict.cs @@ -59,6 +59,8 @@ protected override string CleanSourceHtml(string s) .Replace("ʻāpeʻ,ʻapeʻa", "ʻāpeʻapeʻa") .Replace("kaniā,ʻau", "kaniāʻau") .Replace("hoʻonā,aikola", "hoʻonāʻaikola") + // Missing definition number fixes + .Replace("

n. Name of a large valley on", "

1. n. Name of a large valley on") ; } diff --git a/src/HawDict/Output/OutputArticle.cs b/src/HawDict/Output/OutputArticle.cs index 2680a01..c9bfbca 100644 --- a/src/HawDict/Output/OutputArticle.cs +++ b/src/HawDict/Output/OutputArticle.cs @@ -79,7 +79,6 @@ public string StarDictValue string value = GetXdxfValue(true); value = value - .Replace("", "

").Replace("", "

") .Replace("", "").Replace("", ""); value = value @@ -88,6 +87,12 @@ public string StarDictValue value = Regex.Replace(value, "

([0-9]+)\\. ", "

$1. "); + if (value.Contains("2. ")) + { + // Fix bolding number one for pre-text + value = Regex.Replace(value, "

(.*[^>])1\\. ", "

$11. "); + } + return value; } } @@ -117,34 +122,24 @@ private string GetXdxfValue(bool keepDefinitionNumbers = false) // Add abbreviation tags foreach (OutputAbbreviation abbreviation in OutputDict.Abbreviations) { - value = AddXdxfAbbreviationTags(value, abbreviation.Key, abbreviation.AbbreviationType == AbbreviationType.Grammatical); + value = AddXdxfAbbreviationTags(value, abbreviation.Key); if (char.IsLower(abbreviation.Key[0]) && abbreviation.Key.Length > 1) { - value = AddXdxfAbbreviationTags(value, char.ToUpper(abbreviation.Key[0]) + abbreviation.Key.Substring(1), abbreviation.AbbreviationType == AbbreviationType.Grammatical); + value = AddXdxfAbbreviationTags(value, char.ToUpper(abbreviation.Key[0]) + abbreviation.Key.Substring(1)); } } - string grammar = ""; - - // Pull out grammar - if (value.StartsWith("")) - { - int grammarEndIndex = value.IndexOf("") + 5; - grammar = $"{value.Substring(0, grammarEndIndex)}"; - value = value.Substring(grammarEndIndex + 1); - } - IEnumerable definitions = GetDefinitions(value, keepDefinitionNumbers); if (definitions.Count() > 1) { value = string.Join("", definitions); - value = $"{grammar}{value}"; + value = $"{value}"; } else { - value = $"{grammar}{value}"; + value = $"{value}"; } return value; @@ -152,24 +147,41 @@ private string GetXdxfValue(bool keepDefinitionNumbers = false) private static IEnumerable GetDefinitions(string value, bool keepDefinitionNumbers, int num = 1) { - int nextFoundIndex = -1; - string numStr = $"{num}. "; string nextNumStr = $" {num + 1}. "; - if (value.StartsWith(numStr) && (nextFoundIndex = value.IndexOf(nextNumStr)) > 0) + int foundIndex = value.IndexOf(numStr); + int nextFoundIndex = value.IndexOf(nextNumStr, foundIndex + 1); + + bool preOneText = num == 1 && foundIndex > 0; + + if (num == 1 && foundIndex > 0 && nextFoundIndex > 0) { + // Numbered definition with some pre-text if (keepDefinitionNumbers) { yield return value.Substring(0, nextFoundIndex); } else { - yield return value.Substring(numStr.Length, nextFoundIndex - numStr.Length); + yield return value[0..foundIndex] + value[(foundIndex + numStr.Length)..nextFoundIndex]; } } - else if (value.StartsWith(numStr)) + else if (foundIndex == 0 && nextFoundIndex > 0) { + // Numbered definition without pre-text + if (keepDefinitionNumbers) + { + yield return value[0..nextFoundIndex]; + } + else + { + yield return value[numStr.Length..nextFoundIndex]; + } + } + else if (foundIndex == 0) + { + // Last numbered definition if (keepDefinitionNumbers) { yield return value; @@ -181,6 +193,7 @@ private static IEnumerable GetDefinitions(string value, bool keepDefinit } else { + // No numbers, just one definition yield return value; } @@ -193,7 +206,7 @@ private static IEnumerable GetDefinitions(string value, bool keepDefinit } } - private static string AddXdxfAbbreviationTags(string value, string abbreviation, bool grammar) + private static string AddXdxfAbbreviationTags(string value, string abbreviation) { value = value.Replace($" {abbreviation} ", $" {abbreviation} "); value = value.Replace($"({abbreviation} ", $"({abbreviation} "); @@ -214,14 +227,7 @@ private static string AddXdxfAbbreviationTags(string value, string abbreviation, if (value.StartsWith(abbreviation + " ")) { - if (grammar) - { - value = $"{abbreviation}{value.Substring(abbreviation.Length)}"; - } - else - { - value = $"{abbreviation}{value.Substring(abbreviation.Length)}"; - } + value = $"{abbreviation}{value.Substring(abbreviation.Length)}"; } if (value.EndsWith(" " + abbreviation))