Skip to content

Commit

Permalink
Replace the "n/a" retuned values with empty string
Browse files Browse the repository at this point in the history
  • Loading branch information
nemethviktor committed Mar 17, 2024
1 parent 8766c18 commit 28e6151
Show file tree
Hide file tree
Showing 7 changed files with 249 additions and 223 deletions.
2 changes: 1 addition & 1 deletion FrmMainApp.cs
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,7 @@ private static async Task GetHtmlAsync(string url, FrmMainApp formInstance, Canc
GBP_Market_capitalisation = marketCap * gbpEqivalent,
Sector = sector,
ETF_Type = etfType,
Top10_Components = TagsToModelValueTransformations.T2M_Top10_Components(pageText: pageText),
Top10_Exposures = TagsToModelValueTransformations.T2M_Top10_Exposures(pageText: pageText),
Exchange = TagsToModelValueTransformations.T2M_Exchange(companyPageText: companyPageText),
Country = TagsToModelValueTransformations.T2M_Country(companyPageText: companyPageText),
Indices = TagsToModelValueTransformations.T2M_Indices(companyPageText: companyPageText)
Expand Down
13 changes: 8 additions & 5 deletions HLWebScraper.Net.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
<ForceDesignerDPIUnaware>true</ForceDesignerDPIUnaware>
<ApplicationIcon>Resources\AppIcon.ico</ApplicationIcon>
<SupportedOSPlatformVersion>10.0.17763.0</SupportedOSPlatformVersion>
<RuntimeIdentifier>win-x64</RuntimeIdentifier>
<GenerateAssemblyInfo>false</GenerateAssemblyInfo>
<ApplicationManifest>app.manifest</ApplicationManifest>

Expand All @@ -34,7 +35,6 @@
<ItemGroup>
<PackageReference Include="CsvHelper" Version="31.0.2" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.59" />
<PackageReference Include="Microsoft.Data.Sqlite" Version="9.0.0-preview.2.24128.4" />
<PackageReference Include="Newtonsoft.Json" Version="13.0.3" />
<PackageReference Include="System.Data.SQLite" Version="1.0.118" />
</ItemGroup>
Expand Down Expand Up @@ -67,13 +67,16 @@
</ItemGroup>

<ItemGroup>
<None Update="Output\example_HLWebScraper_Output_20240315_192630.csv">
<None Update="changelog.md">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="Properties\Settings.settings">
<Generator>SettingsSingleFileGenerator</Generator>
<LastGenOutput>Settings.Designer.cs</LastGenOutput>
<None Update="Output\example_HLWebScraper_Output_20240317_112225.csv">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>
<None Update="readme.md">
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
</None>

</ItemGroup>

</Project>
2 changes: 1 addition & 1 deletion Model/SEDOL.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ internal class SEDOL
public string SEDOL_ID { get; set; }
public string Sector { get; set; }
public string ETF_Type { get; set; }
public string Top10_Components { get; set; }
public string Top10_Exposures { get; set; }
public string Exchange { get; set; }
public string Country { get; set; }
public string Indices { get; set; }
Expand Down
48 changes: 35 additions & 13 deletions Model/TagsToModelValueTransformations.cs
Original file line number Diff line number Diff line change
Expand Up @@ -180,28 +180,50 @@ public static string T2M_ETF_Type(string name)
return "Not classified";
}


/// <summary>
/// Reads the Exchange list (where available)
/// </summary>
/// <param name="companyPageText"></param>
/// <returns></returns>
public static string T2M_Exchange(string companyPageText)
{
return HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
string likelyExchange = HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
pageText: companyPageText,
textStart: "Exchange:<dd>",
textEnd: "</dd>"));

return likelyExchange == "-" || likelyExchange == "n/a" ? string.Empty : likelyExchange;
}

/// <summary>
/// Reads the Country (where available)
/// </summary>
/// <param name="companyPageText"></param>
/// <returns></returns>
public static string T2M_Country(string companyPageText)
{
return HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
string likelyCountry = HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
pageText: companyPageText,
textStart: "Country:<dd>",
textEnd: "</dd>"));

return likelyCountry == "-" || likelyCountry == "n/a" ? string.Empty : likelyCountry;
}

/// <summary>
/// Reads the Indices [really, index] (where available)
/// </summary>
/// <param name="companyPageText"></param>
/// <returns></returns>
public static string T2M_Indices(string companyPageText)
{
return HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
string likelyIndex = HelperStringUtils.ClearUTFChars(input: HelperStringUtils.FindTextBetween(
pageText: companyPageText,
textStart: "Indices:<dd>",
textEnd: "</dd>"));

return likelyIndex == "-" || likelyIndex == "n/a" ? string.Empty : likelyIndex;
}

/// <summary>
Expand Down Expand Up @@ -497,24 +519,24 @@ public static double T2M_Volume(string pageText)
/// </summary>
/// <param name="pageText"></param>
/// <returns></returns>
public static string T2M_Top10_Components(string pageText)
public static string T2M_Top10_Exposures(string pageText)
{
string top10components = string.Empty;
string likelyComponents = HelperStringUtils.FindTextBetween(
string top10Exposures = string.Empty;
string likelyExposures = HelperStringUtils.FindTextBetween(
pageText: pageText,
textStart: "<div id=\"top_10_exposures_data\">",
textEnd: "</div>");
if (likelyComponents.Contains(value: "No top ten information is available at this stage"))
return top10components;
if (likelyExposures.Contains(value: "No top ten information is available at this stage"))
return top10Exposures;

try
{
likelyComponents = HelperStringUtils.FindTextBetween(
pageText: likelyComponents,
likelyExposures = HelperStringUtils.FindTextBetween(
pageText: likelyExposures,
textStart: "<tbody>",
textEnd: "</tbody>");

string[] lines = likelyComponents.Split(separator: new[] { '\r', '\n' },
string[] lines = likelyExposures.Split(separator: new[] { '\r', '\n' },
options: StringSplitOptions.RemoveEmptyEntries);


Expand All @@ -525,15 +547,15 @@ public static string T2M_Top10_Components(string pageText)
// Extract text from the row (remove HTML tags)
string rowText = RemoveHtmlTags(line: line);

top10components += rowText;
top10Exposures += rowText;
}
}
catch
{
// nothing
}

return top10components;
return top10Exposures;

string RemoveHtmlTags(string line)
{
Expand Down
Loading

0 comments on commit 28e6151

Please sign in to comment.