Skip to content

Commit

Permalink
Removed <gr> tags from XDXF output files
Browse files Browse the repository at this point in the history
  • Loading branch information
jonthysell committed Sep 10, 2021
1 parent 5f4fe71 commit 112d78b
Show file tree
Hide file tree
Showing 3 changed files with 38 additions and 29 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
* Improved PukuiElbert source parsing and cleanup
* New HawDict.Test test library with StringUtils tests
* Process dictionaries in parallel to speed up result
* Removed <gr> tags from XDXF output files
* Switch from string.Format to interpolated strings to improve performance

## v0.12 ##
Expand Down
2 changes: 2 additions & 0 deletions src/HawDict/Input/PukuiElbertInputDict.cs
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ protected override string CleanSourceHtml(string s)
.Replace("<span lang=\"HAW\">&#699;&#257;pe&#699;,&#699;ape&#699;a</span>", "<span lang=\"HAW\">&#699;&#257;pe&#699;ape&#699;a</span>")
.Replace("<span lang=\"HAW\">kani&#257;,&#699;au</span>", "<span lang=\"HAW\">kani&#257;&#699;au</span>")
.Replace("<span lang=\"HAW\">ho&#699;on&#257;,aikola</span>", "<span lang=\"HAW\">ho&#699;on&#257;&#699;aikola</span>")
// Missing definition number fixes
.Replace("<p><span>n.</span> Name of a large valley on", "<p>1. <span>n.</span> Name of a large valley on")
;
}

Expand Down
64 changes: 35 additions & 29 deletions src/HawDict/Output/OutputArticle.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ public string StarDictValue
string value = GetXdxfValue(true);

value = value
.Replace("<gr>", "<p>").Replace("</gr>", "</p>")
.Replace("<abbr>", "<i>").Replace("</abbr>", "</i>");

value = value
Expand All @@ -88,6 +87,12 @@ public string StarDictValue

value = Regex.Replace(value, "<p>([0-9]+)\\. ", "<p><b>$1</b>. ");

if (value.Contains("<b>2</b>. "))
{
// Fix bolding number one for pre-text
value = Regex.Replace(value, "<p>(.*[^>])1\\. ", "<p>$1<b>1</b>. ");
}

return value;
}
}
Expand Down Expand Up @@ -117,59 +122,66 @@ private string GetXdxfValue(bool keepDefinitionNumbers = false)
// Add abbreviation tags
foreach (OutputAbbreviation abbreviation in OutputDict.Abbreviations)
{
value = AddXdxfAbbreviationTags(value, abbreviation.Key, abbreviation.AbbreviationType == AbbreviationType.Grammatical);
value = AddXdxfAbbreviationTags(value, abbreviation.Key);

if (char.IsLower(abbreviation.Key[0]) && abbreviation.Key.Length > 1)
{
value = AddXdxfAbbreviationTags(value, char.ToUpper(abbreviation.Key[0]) + abbreviation.Key.Substring(1), abbreviation.AbbreviationType == AbbreviationType.Grammatical);
value = AddXdxfAbbreviationTags(value, char.ToUpper(abbreviation.Key[0]) + abbreviation.Key.Substring(1));
}
}

string grammar = "";

// Pull out grammar
if (value.StartsWith("<gr>"))
{
int grammarEndIndex = value.IndexOf("</gr>") + 5;
grammar = $"{value.Substring(0, grammarEndIndex)}";
value = value.Substring(grammarEndIndex + 1);
}

IEnumerable<string> definitions = GetDefinitions(value, keepDefinitionNumbers);

if (definitions.Count() > 1)
{
value = string.Join("</deftext></def><def><deftext>", definitions);
value = $"<def>{grammar}<def><deftext>{value}</deftext></def></def>";
value = $"<def><def><deftext>{value}</deftext></def></def>";
}
else
{
value = $"<def>{grammar}<deftext>{value}</deftext></def>";
value = $"<def><deftext>{value}</deftext></def>";
}

return value;
}

private static IEnumerable<string> GetDefinitions(string value, bool keepDefinitionNumbers, int num = 1)
{
int nextFoundIndex = -1;

string numStr = $"{num}. ";
string nextNumStr = $" {num + 1}. ";

if (value.StartsWith(numStr) && (nextFoundIndex = value.IndexOf(nextNumStr)) > 0)
int foundIndex = value.IndexOf(numStr);
int nextFoundIndex = value.IndexOf(nextNumStr, foundIndex + 1);

bool preOneText = num == 1 && foundIndex > 0;

if (num == 1 && foundIndex > 0 && nextFoundIndex > 0)
{
// Numbered definition with some pre-text
if (keepDefinitionNumbers)
{
yield return value.Substring(0, nextFoundIndex);
}
else
{
yield return value.Substring(numStr.Length, nextFoundIndex - numStr.Length);
yield return value[0..foundIndex] + value[(foundIndex + numStr.Length)..nextFoundIndex];
}
}
else if (value.StartsWith(numStr))
else if (foundIndex == 0 && nextFoundIndex > 0)
{
// Numbered definition without pre-text
if (keepDefinitionNumbers)
{
yield return value[0..nextFoundIndex];
}
else
{
yield return value[numStr.Length..nextFoundIndex];
}
}
else if (foundIndex == 0)
{
// Last numbered definition
if (keepDefinitionNumbers)
{
yield return value;
Expand All @@ -181,6 +193,7 @@ private static IEnumerable<string> GetDefinitions(string value, bool keepDefinit
}
else
{
// No numbers, just one definition
yield return value;
}

Expand All @@ -193,7 +206,7 @@ private static IEnumerable<string> GetDefinitions(string value, bool keepDefinit
}
}

private static string AddXdxfAbbreviationTags(string value, string abbreviation, bool grammar)
private static string AddXdxfAbbreviationTags(string value, string abbreviation)
{
value = value.Replace($" {abbreviation} ", $" <abbr>{abbreviation}</abbr> ");
value = value.Replace($"({abbreviation} ", $"(<abbr>{abbreviation}</abbr> ");
Expand All @@ -214,14 +227,7 @@ private static string AddXdxfAbbreviationTags(string value, string abbreviation,

if (value.StartsWith(abbreviation + " "))
{
if (grammar)
{
value = $"<gr><abbr>{abbreviation}</abbr></gr>{value.Substring(abbreviation.Length)}";
}
else
{
value = $"<abbr>{abbreviation}</abbr>{value.Substring(abbreviation.Length)}";
}
value = $"<abbr>{abbreviation}</abbr>{value.Substring(abbreviation.Length)}";
}

if (value.EndsWith(" " + abbreviation))
Expand Down

0 comments on commit 112d78b

Please sign in to comment.