Skip to content

Commit

Permalink
add simple validator mode to ii
Browse files Browse the repository at this point in the history
  • Loading branch information
rkm committed Dec 7, 2023
1 parent b97c47d commit 2bb3fdf
Show file tree
Hide file tree
Showing 3 changed files with 203 additions and 102 deletions.
47 changes: 47 additions & 0 deletions IsIdentifiable/Options/IsIdentifiableReportValidatorOptions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
using CommandLine;
using IsIdentifiable.Reporting.Reports;
using System;

namespace IsIdentifiable.Options;

/// <summary>
/// CLI options for the validator
/// </summary>
[Verb("validate", HelpText = "Validate a FailureStoreReport")]
public class IsIdentifiableReportValidatorOptions
{
/// <summary>
/// The CSV list of failures to process. Must be in the format of a <see cref="FailureStoreReport"/>
/// </summary>
[Option('f', "file",
Required = true,
HelpText = "Pre load an existing failures file"
)]
public string FailuresCsv { get; set; }

/// <summary>
/// Sets UseSystemConsole to true for Terminal.gui (i.e. uses the NetDriver which is based on System.Console)
/// </summary>
[Option("usc", HelpText = "Sets UseSystemConsole to true for Terminal.gui (i.e. uses the NetDriver which is based on System.Console)")]
public bool UseSystemConsole { get; internal set; }

/// <summary>
/// Sets the user interface to use a specific color palette yaml file
/// </summary>
[Option("theme", HelpText = "Sets the user interface to use a specific color palette yaml file")]
public string Theme { get; set; }


/// <summary>
/// Populates values in this instance where no value yet exists and there is a value in <paramref name="globalOpts"/>
/// to inherit.
/// </summary>
/// <param name="globalOpts"></param>
public virtual void InheritValuesFrom(IsIdentifiableReviewerOptions globalOpts)
{
ArgumentNullException.ThrowIfNull(globalOpts);

if (Theme == null && !string.IsNullOrWhiteSpace(globalOpts.Theme))
Theme = globalOpts.Theme;
}
}
228 changes: 127 additions & 101 deletions IsIdentifiable/Reporting/Reports/FailureStoreReport.cs
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,11 @@ public static IEnumerable<Failure> Deserialize(IFileInfo oldFile)
/// <param name="loadedRows">Action to call periodically as records are read from the file (for
/// when the file is very big and you want to show progress etc)</param>
/// <param name="token">Cancellation token for aborting the file deserialication (and closing the file again)</param>
/// <param name="partRules"></param>
/// <param name="runParallel"></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
public static IEnumerable<Failure> Deserialize(IFileInfo oldFile, Action<int> loadedRows, CancellationToken token, IEnumerable<PartPatternFilterRule>? partRules = null)
public static IEnumerable<Failure> Deserialize(IFileInfo oldFile, Action<int> loadedRows, CancellationToken token, IEnumerable<PartPatternFilterRule>? partRules = null, bool runParallel = true)
{
partRules ??= new List<PartPatternFilterRule>();

Expand All @@ -149,7 +151,11 @@ public static IEnumerable<Failure> Deserialize(IFileInfo oldFile, Action<int> lo

int totalProcessed = 0;
var localTokenSource = new CancellationTokenSource();

Check warning

Code scanning / CodeQL

Missing Dispose call on local IDisposable Warning

Disposable 'CancellationTokenSource' is created but not disposed.
using var timerTask = Task.Run(
var failures = new ConcurrentBag<Failure>();

if (runParallel)
{
using var timerTask = Task.Run(
async () =>
{
while (!token.IsCancellationRequested && !localTokenSource.Token.IsCancellationRequested)
Expand All @@ -159,118 +165,138 @@ public static IEnumerable<Failure> Deserialize(IFileInfo oldFile, Action<int> lo
}
},
token
);

var failures = new ConcurrentBag<Failure>();
);

try
try
{
Parallel.ForEach(
reader.GetRecords<FailureStoreReportRecord>(),
new ParallelOptions
{
CancellationToken = token,
},
(FailureStoreReportRecord row) => Process(row, partRules, failures, ref totalProcessed)
);
}
finally
{
localTokenSource.Cancel();
timerTask.Wait();
}
}
else
{
Parallel.ForEach(
reader.GetRecords<FailureStoreReportRecord>(),
new ParallelOptions
var problems = 0;
foreach (var row in reader.GetRecords<FailureStoreReportRecord>())
{
try
{
CancellationToken = token,
},
(FailureStoreReportRecord row) =>
Process(row, partRules, failures, ref totalProcessed);
}
catch (Exception e)
{
if (row.ProblemValue == null)
throw new Exception("ProblemValue was null");

var words = row.PartWords.Split(Separator);
var classes = row.PartClassifications.Split(Separator);
var offsets = row.PartOffsets.Split(Separator);

var parts = words.Select(
(word, index) => new FailurePart(
word,
Enum.TryParse<FailureClassification>(classes[index], true, out var classification) ? classification : throw new Exception($"Invalid failure classification '{classes[index]}'"),
int.TryParse(offsets[index], out var offset) ? offset : throw new Exception($"Invalid offset '{row.PartOffsets}'")
)
).ToList();

if (row.ProblemField != "PixelData")
{
// Fixes any offsets that have been mangled by file endings etc.
foreach (var part in parts)
{
if (row.ProblemValue.Substring(part.Offset, part.Word.Length) == part.Word)
continue;

// Test if the ProblemValue has been HTML escaped
var encodedPartWord = WebUtility.HtmlEncode(part.Word);
try
{
if (row.ProblemValue.Substring(part.Offset, encodedPartWord.Length) == encodedPartWord)
{
part.Word = encodedPartWord;
continue;
}
}
catch (ArgumentOutOfRangeException)
{ }

// Test if the ProblemValue has hidden unicode symbols
var withoutInvisible = Regex.Replace(row.ProblemValue, @"\p{C}+", string.Empty);
if (withoutInvisible.Substring(part.Offset, part.Word.Length) == part.Word)
{
part.Word = row.ProblemValue.Substring(part.Offset, part.Word.Length + 1);

if (row.ProblemValue.Substring(part.Offset, part.Word.Length) != part.Word)
throw new Exception($"Could not fix hidden unicode characters in Failure:\n===\n{row}\n===");

continue;
}

// Finally, try shifting the offset around to find the word
try
{
FixupOffsets(row, part);
}
catch (ArgumentOutOfRangeException e)
{
throw new Exception($"Could not fixup Offset value in Failure:\n{row}", e);
}
}
}
Console.Error.WriteLine($"{row}:\n{e.Message}\n");
problems++;
}

Check notice

Code scanning / CodeQL

Generic catch clause Note

Generic catch clause.
}

if (problems > 0)
Console.Error.WriteLine($"Problem with {problems}/{totalProcessed} records");
}

loadedRows(totalProcessed);

return failures;
}

/* TEMP - Filter out any FailureParts covered by an PartPatternFilterRule */
var toRemove = new List<FailurePart>();
foreach (var partRule in partRules)
private static void Process(FailureStoreReportRecord row, IEnumerable<PartPatternFilterRule>? partRules, ConcurrentBag<Failure> failures, ref int totalProcessed)
{
if (row.ProblemValue == null)
throw new Exception("ProblemValue was null");

var words = row.PartWords.Split(Separator);
var classes = row.PartClassifications.Split(Separator);
var offsets = row.PartOffsets.Split(Separator);

var parts = words.Select(
(word, index) => new FailurePart(
word,
Enum.TryParse<FailureClassification>(classes[index], true, out var classification) ? classification : throw new Exception($"Invalid failure classification '{classes[index]}'"),
int.TryParse(offsets[index], out var offset) ? offset : throw new Exception($"Invalid offset '{row.PartOffsets}'")
)
).ToList();

if (row.ProblemField != "PixelData")
{
// Fixes any offsets that have been mangled by file endings etc.
foreach (var part in parts)
{
if (row.ProblemValue.Substring(part.Offset, part.Word.Length) == part.Word)
continue;

// Test if the ProblemValue has been HTML escaped
var encodedPartWord = WebUtility.HtmlEncode(part.Word);
try
{
if (row.ProblemValue.Substring(part.Offset, encodedPartWord.Length) == encodedPartWord)
{
if (!string.IsNullOrWhiteSpace(partRule.IfColumn) && !string.Equals(partRule.IfColumn, row.ProblemField, StringComparison.InvariantCultureIgnoreCase))
continue;

foreach (var part in parts.Where(x => partRule.Covers(x, row.ProblemValue)))
{
toRemove.Add(part);
partRule.IncrementUsed();
}
part.Word = encodedPartWord;
continue;
}
parts = parts.Except(toRemove).ToList();
/* TEMP */

if (parts.Any())
failures.Add(new Failure(parts)
{
Resource = row.Resource,
ResourcePrimaryKey = row.ResourcePrimaryKey,
ProblemField = row.ProblemField,
ProblemValue = row.ProblemValue,
});

Interlocked.Increment(ref totalProcessed);
}
);
catch (ArgumentOutOfRangeException)
{ }

Check notice

Code scanning / CodeQL

Poor error handling: empty catch block Note

Poor error handling: empty catch block.

// Test if the ProblemValue has hidden unicode symbols
var withoutInvisible = Regex.Replace(row.ProblemValue, @"\p{C}+", string.Empty);
if (withoutInvisible.Substring(part.Offset, part.Word.Length) == part.Word)
{
part.Word = row.ProblemValue.Substring(part.Offset, part.Word.Length + 1);

if (row.ProblemValue.Substring(part.Offset, part.Word.Length) != part.Word)
throw new Exception($"Could not fix hidden unicode characters in Failure:\n===\n{row}\n===");

continue;
}

// Finally, try shifting the offset around to find the word
try
{
FixupOffsets(row, part);
}
catch (ArgumentOutOfRangeException e)
{
throw new Exception($"Could not fixup Offset value in Failure:\n{row}", e);
}
}

Check notice

Code scanning / CodeQL

Missed opportunity to use Where Note

This foreach loop
implicitly filters its target sequence
- consider filtering the sequence explicitly using '.Where(...)'.
}
finally

/* TEMP - Filter out any FailureParts covered by an PartPatternFilterRule */
var toRemove = new List<FailurePart>();
foreach (var partRule in partRules)
{
localTokenSource.Cancel();
timerTask.Wait();
if (!string.IsNullOrWhiteSpace(partRule.IfColumn) && !string.Equals(partRule.IfColumn, row.ProblemField, StringComparison.InvariantCultureIgnoreCase))
continue;

foreach (var part in parts.Where(x => partRule.Covers(x, row.ProblemValue)))
{
toRemove.Add(part);
partRule.IncrementUsed();
}
}

Check notice

Code scanning / CodeQL

Missed opportunity to use Where Note

This foreach loop
implicitly filters its target sequence
- consider filtering the sequence explicitly using '.Where(...)'.
parts = parts.Except(toRemove).ToList();
/* TEMP */

loadedRows(totalProcessed);
if (parts.Any())
failures.Add(new Failure(parts)
{
Resource = row.Resource,
ResourcePrimaryKey = row.ResourcePrimaryKey,
ProblemField = row.ProblemField,
ProblemValue = row.ProblemValue,
});

return failures;
Interlocked.Increment(ref totalProcessed);
}

private static void FixupOffsets(FailureStoreReportRecord row, FailurePart part)
Expand Down
30 changes: 29 additions & 1 deletion ii/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
using FAnsi.Implementations.PostgreSql;
using FellowOakDicom;
using IsIdentifiable.Options;
using IsIdentifiable.Reporting.Reports;
using IsIdentifiable.Runners;
using Microsoft.Extensions.FileSystemGlobbing;
using System;
using System.IO.Abstractions;
using System.Linq;
using System.Text.RegularExpressions;
using System.Threading;
using YamlDotNet.Serialization;

namespace ii;
Expand Down Expand Up @@ -101,13 +103,15 @@ public static int Main(string[] args)
IsIdentifiableDicomFileOptions,
IsIdentifiableMongoOptions,
IsIdentifiableFileGlobOptions,
IsIdentifiableReviewerOptions>(args)
IsIdentifiableReviewerOptions,
IsIdentifiableReportValidatorOptions>(args)
.MapResult(
(IsIdentifiableRelationalDatabaseOptions o) => Run(o, fileSystem),
(IsIdentifiableDicomFileOptions o) => Run(o, fileSystem),
(IsIdentifiableMongoOptions o) => Run(o, fileSystem),
(IsIdentifiableFileGlobOptions o) => Run(o, fileSystem),
(IsIdentifiableReviewerOptions o) => Run(o, fileSystem),
(IsIdentifiableReportValidatorOptions o) => Run(o, fileSystem),

// return exit code 0 for user requests for help
errors => args.Any(a => a.Equals("--help", StringComparison.InvariantCultureIgnoreCase)) ? 0 : 1);
Expand Down Expand Up @@ -146,6 +150,30 @@ private static int Run(IsIdentifiableReviewerOptions opts, IFileSystem fileSyste
return reviewer.Run();
}

private static int Run(IsIdentifiableReportValidatorOptions opts, IFileSystem fileSystem)
{
if (GlobalOptions?.IsIdentifiableReviewerOptions != null)
opts.InheritValuesFrom(GlobalOptions.IsIdentifiableReviewerOptions);

if (!fileSystem.File.Exists(opts.FailuresCsv))
{
Console.Error.WriteLine($"Error: Could not find {opts.FailuresCsv}");
return 1;
}

const string expectedHeader = "Resource,ResourcePrimaryKey,ProblemField,ProblemValue,PartWords,PartClassifications,PartOffsets";
var line = fileSystem.File.ReadLines(opts.FailuresCsv).FirstOrDefault();
if (line == null || Regex.Replace(line, @"\s+", "") != line)
{
Console.Error.WriteLine($"Error: Expected CSV Failure header {expectedHeader}");
return 1;
}

var report = new FailureStoreReport("", 0, fileSystem);

Check warning

Code scanning / CodeQL

Useless assignment to local variable Warning

This assignment to
report
is useless, since its value is never read.
var failures = FailureStoreReport.Deserialize(fileSystem.FileInfo.New(opts.FailuresCsv), (_) => { }, new CancellationTokenSource().Token, partRules: null, runParallel: false).ToArray();

Check warning

Code scanning / CodeQL

Missing Dispose call on local IDisposable Warning

Disposable 'CancellationTokenSource' is created but not disposed.

Check warning

Code scanning / CodeQL

Useless assignment to local variable Warning

This assignment to
failures
is useless, since its value is never read.

return 0;
}

private static int Run(IsIdentifiableDicomFileOptions opts, IFileSystem fileSystem)
{
Expand Down

0 comments on commit 2bb3fdf

Please sign in to comment.