Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

C++/STL: recover from aborted tests #105

Merged
merged 3 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
140 changes: 107 additions & 33 deletions aggregate/boost_test_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,47 +3,121 @@
require_relative 'test_parser'

class BoostTestParser < TestParser
def initialize(fn)
@doc = REXML::Document.new(File.read(fn))
def initialize(fn, glob = false)
@docs =
if glob
Dir.glob(fn).map { |x|
REXML::Document.new(File.read(x))
}
else
[REXML::Document.new(File.read(fn))]
end
end

def each_test
@doc.root.elements.each('TestSuite') { |ts|
ts.elements.each('TestCase') { |tc|
name = tc.attribute('name').value
@docs.each do |doc|
r = doc.root
next unless r

r.elements.each('TestSuite') { |ts|
ts.elements.each('TestCase') { |tc|
name = tc.attribute('name').value

raise "Unable to parse name: \"#{name}\"" unless name =~ /^test_(.*?)$/
name = underscore_to_ucamelcase($1)

failures = []
tc.elements.each('Error') { |err|
failures << TestResult::Failure.new(
err.attribute('file'),
err.attribute('line'),
err.text,
nil
)
}
tc.elements.each('Exception') { |err|
failures << TestResult::Failure.new(
err.attribute('file'),
err.attribute('line'),
err.text,
nil
)
}

raise "Unable to parse name: \"#{name}\"" unless name =~ /^test_(.*?)$/
name = underscore_to_ucamelcase($1)

failures = []
tc.elements.each('Error') { |err|
failures << TestResult::Failure.new(
err.attribute('file'),
err.attribute('line'),
err.text,
nil
)
if failures.empty?
status = :passed
failure = nil
else
status = :failed
failure = failures[0]
end

# `<TestCase>` elements of tests that are disabled (using the
# `--run_test=!<test_name>` command-line option) because they aborted
# (typically due to a memory access violation, which terminates the
# entire test suite) in a previous run attempt look like this in the
# XML log:
#
# ```xml
# <TestCase name="test_debug_array_user" skipped="yes" reason="disabled"/>
# ```
#
# As you can see, they don't contain any `<TestingTime>` tag, so we
# need to make sure we handle this case gracefully.
testing_time_node = tc.elements['TestingTime']
testing_time =
if testing_time_node.nil?
0.0
else
testing_time_node.text.to_f
end
GreyCat marked this conversation as resolved.
Show resolved Hide resolved

tr = TestResult.new(name, status, testing_time, failure)
yield tr
}
tc.elements.each('Exception') { |err|
failures << TestResult::Failure.new(
err.attribute('file'),
err.attribute('line'),
err.text,
nil
)
}
end
end

def aborted_tests
tests = []
# An "aborted" test means that "a critical error is detected", which means
# that "the test execution is aborted and all remaining tests are discarded"
# (see https://www.boost.org/doc/libs/1_84_0/libs/test/doc/html/boost/unit_test/test_observer.html#idm46380-bb).
# So in practice there will probably never be more than 1 aborted test (per
# log file), but we don't rely on that here and return an array anyway.
each_aborted_test { |t| tests << t }
tests
end

def each_aborted_test
@docs.each do |doc|
r = doc.root
next unless r

r.elements.each('TestSuite/TestCase') { |tc|
next if tc.elements['Exception'].nil?

aborted = false

tc.elements.each('Message') { |msg|
next unless msg.text == 'Test is aborted'

file_attr = msg.attribute('file')
next if file_attr.nil?

file = file_attr.value
next unless file.end_with?('/boost/test/impl/unit_test_log.ipp')

aborted = true
break
}

if failures.empty?
status = :passed
failure = nil
else
status = :failed
failure = failures[0]
end
next unless aborted

tr = TestResult.new(name, status, tc.elements['TestingTime'].text.to_f, failure)
yield tr
name = tc.attribute('name').value
yield name
}
}
end
end
end
2 changes: 1 addition & 1 deletion aggregate/convert_to_json
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ result = case lang
when 'cpp_stl'
add_kst_adoption(
add_fails(
BoostTestParser.new("#{infile}/results.xml").to_h,
BoostTestParser.new("#{infile}/results-*.xml", true).to_h,
BuildFailedParser.new("#{infile}/build_failed_tests.txt").to_h,
ValgrindXMLParser.new("#{infile}/valgrind.xml").to_h
),
Expand Down
72 changes: 49 additions & 23 deletions builder/cpp_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

require_relative 'partial_builder'
require_relative 'shellconfig'
require_relative '../aggregate/boost_test_parser'

class CppBuilder < PartialBuilder
attr_accessor :mode
Expand Down Expand Up @@ -304,33 +305,58 @@ def file_to_test(filename)
end

def run_tests
xml_log = "#{@abs_cpp_test_out_dir}/results.xml"
attempt = 1
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This solution adds yet another trying loop with its own attempts counter, etc.

As alternative idea: will it bring any benefits we detect offending (aborted) test and then feed that information back to normal partial builder mechanism to exclude offending files out of the binary to get us the stable set of tests in one binary in the end?

On one hand:

  • this avoids looking into boost::test invocation flags which might be not compatible with boost versions you've mentioned
  • this requires test parsing to determine offending test, but you did that already

On the other hand, I see that such a feedback mechanism from run_tests does not exist now and inventing it on whole partial_builder.rb might be trickier...

Copy link
Member Author

@generalmimon generalmimon Mar 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This solution adds yet another trying loop with its own attempts counter, etc.

Yeah. I don't think you can eliminate it completely in principle, because you don't know in advance which tests will abort, and as soon one test aborts, no more tests are run (this is by design, Boost has no "continue running the suite despite an aborted test" option, that's not a thing). If you want them to run, you have no option but to run the test executable again without that offending test.

As alternative idea: will it bring any benefits we detect offending (aborted) test and then feed that information back to normal partial builder mechanism to exclude offending files out of the binary to get us the stable set of tests in one binary in the end?

I briefly considered this approach as well, but don't see any benefits. In my view, it would only be more complicated and less flexible. As I explain above, you have to run the test suite N + 1 times in any case (where N is the number of aborted tests), only now with the unnecessary extra step of repeating the linking the object files into the ks_tests executable.

  • this avoids looking into boost::test invocation flags which might be not compatible with boost versions you've mentioned

This is no longer a problem because since kaitai-io/kaitai_struct_docker_images@1ca846d the {clang3.4,gcc4.8}-linux-x86_64 targets build the latest Boost 1.84 from source (instead of using ancient the Boost 1.54), which works like a charm. None of Boost versions we use at the moment have a problem with --run_test=!<test_name> anymore (as evidenced by https://ci.kaitai.io/ - coincidentally, all cpp_stl_{98,11} targets currently have at least one aborted test NavParentRecursive, yet they all recovered successfully), so there's no need to avoid it.


An actual disadvantage of the approach you're suggesting is that with the "stable" test binary at the end, you won't be able to run the aborted tests via Valgrind after CppBuilder has finished. This means that the only information about an aborted will be the message from when the OS signal was caught by Boost, i.e.:

memory access violation at address: 0x00000008: no mapping at fault address

... which isn't really helpful to diagnose the error - there's no file name, no line number, no stack trace. Valgrind can give you all that, if you only let it run the aborted test (cpp_stl_11/gcc11-linux-x86_64 > test_out/cpp_stl_11/valgrind-1.xml:36-57):

<error>
  <unique>0x0</unique>
  <tid>1</tid>
  <kind>InvalidRead</kind>
  <what>Invalid read of size 8</what>
  <stack>
    <frame>
      <ip>0x54235A</ip>
      <obj>/tests/compiled/cpp_stl_11/bin/ks_tests</obj>
      <fn>debug_array_user_t::cat_t::_read()</fn>
      <dir>/tests/compiled/cpp_stl_11</dir>
      <file>debug_array_user.cpp</file>
      <line>37</line>
    </frame>
    <frame>
      <ip>0x542205</ip>
      <obj>/tests/compiled/cpp_stl_11/bin/ks_tests</obj>
      <fn>debug_array_user_t::_read()</fn>
      <dir>/tests/compiled/cpp_stl_11</dir>
      <file>debug_array_user.cpp</file>
      <line>20</line>
    </frame>

(These Valgrind logs of aborted tests are currently produced, but not processed by aggregate/convert_to_json - I left this as a future improvement.)

excluded_tests = []

loop {
attempt_str = @max_attempts ? "#{attempt}/#{@max_attempts}" : attempt

xml_log = "#{@abs_cpp_test_out_dir}/results-#{attempt}.xml"
log "run attempt #{attempt_str} (log: #{xml_log})"

# Choose test executable
case @mode
when :msbuild_windows
# On Windows/MSVC, executable will be nested in Debug/
ks_tests_bin = "#{@obj_dir}/Debug/ks_tests"
when :make_posix
# On POSIX systems, it will be directly in obj dir
ks_tests_bin = "#{@obj_dir}/ks_tests"
else
raise "Unknown mode=#{@mode}"
end

# Choose test executable
case @mode
when :msbuild_windows
# On Windows/MSVC, executable will be nested in Debug/
ks_tests_bin = "#{@obj_dir}/Debug/ks_tests"
when :make_posix
# On POSIX systems, it will be directly in obj dir
ks_tests_bin = "#{@obj_dir}/ks_tests"
else
raise "Unknown mode=#{@mode}"
end
tests_cli = [
ks_tests_bin,
'--log_format=XML',
"--log_sink=#{xml_log}",
'--log_level=all',
'--report_level=detailed',
]
excluded_tests.each { |test| tests_cli << "--run_test=!#{test}" }
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one is also a bit dangerous on Windows: I recall that command line was limited to 32KiB. If we'll concatenate all test names we have now ~7KiB worth of such strings, but it potentially plants an unpleasant surprise for the future.

The only way to work that around as far as I remember is for executable to provide alternative means to source command line args (e.g. read them from file), but boost::test does not seem to support that.

Copy link
Member Author

@generalmimon generalmimon Mar 23, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In theory, this is a valid concern (I thought of that too, it just didn't feel that important to me) and not only on Windows, Unix-like OSes have a limit too, though probably higher than 32 KiB. In practice, I believe we are relatively far from reaching that limit (as you mentioned, even if every single test aborted, which already sounds extraordinary, we would still be well below that limit).

As you noted, by default make Boost.Test doesn't seem to support reading arguments from file. But I see two ways we could solve this problem:

  1. Even if Boost.Test itself doesn't support reading CLI args from file, we could probably implement this functionality ourselves by customizing the module's entry point. The new entry point would simply read the arguments from a file and pass them to the actual main function provided by Boost.Test.

  2. We could take inspiration from how xargs works, i.e. splitting the long list of arguments into multiple batches so each batch is below the maximum limit. We cannot reliably do this with the exclusion list, but we can with an inclusion list - so we would use the absolute specification, positively listing each test to run.

    This approach has a number of downsides compared to approach 1 - the cpp_builder would have to keep track of the entire set of test names, find out the command line length limit (or use xargs to take care of that, although that introduces a dependency) and the number of tests that we can run at once (i.e. in one batch) would be limited by how many we can list in the command line, so the test executable would have to be run multiple times just to execute a single snapshot of the test set.

Approach 1 seems much better and it should be pretty easy to implement, probably just a few lines of code.

But I don't see it as a high priority right now because it's not currently causing a problem. My view of our testing system is that it must be only as good as it proves necessary, and no better, because it's an internal tool, not meant be used by end users. We don't need to support C++ compilers / Boost versions that we don't run, or situations that have never happened or cannot happen with the current state of the project.

You know that I'm a perfectionist and I'd rather see everything working in the general case, but when it comes to the testing system, even I realize that solving all potential problems "in advance" doesn't make any difference (it's better to focus our efforts to pretty much any other KS component), only solving real problems does.


# Actually run the tests
Dir.chdir(@test_dir) do
run_and_tee({}, tests_cli, "#{@abs_cpp_test_out_dir}/test_run-#{attempt}.stdout")
end

tests_cli = [
ks_tests_bin,
'--log_format=XML',
"--log_sink=#{xml_log}",
'--log_level=all',
'--report_level=detailed',
]
parser = BoostTestParser.new(xml_log)
aborted_tests = parser.aborted_tests
File.write("#{@abs_cpp_test_out_dir}/aborted_tests-#{attempt}.txt", aborted_tests.map { |test| "#{test}\n" }.join)
if aborted_tests.empty?
log "success on run attempt #{attempt_str} (#{excluded_tests.size} tests excluded)"
return true
end
log "run attempt #{attempt_str} aborted (#{aborted_tests.size} tests aborted)"
aborted_tests.each { |test| excluded_tests << test }

# Actually run the tests
Dir.chdir(@test_dir)
run_and_tee({}, tests_cli, "#{@abs_cpp_test_out_dir}/test_run.stdout")
attempt += 1

File.exist?(xml_log)
if @max_attempts and attempt >= @max_attempts
log "maximum number of run attempts reached, bailing out"
return false
end
}
end

private
Expand Down
4 changes: 2 additions & 2 deletions builder/partial_builder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def partial_build
t2 = Time.now
log "build attempt #{attempt_str}: elapsed: #{(t2 - t1).to_i}s"
if result == 0
log attempt == 1 ? "perfect build succeeded" : "success on attempt \##{attempt_str}, #{disp_files.size}/#{orig_size} files survived"
log attempt == 1 ? "perfect build succeeded" : "success on build attempt #{attempt_str}, #{disp_files.size}/#{orig_size} files survived"
return true
else
log "build failed"
Expand All @@ -104,7 +104,7 @@ def partial_build
attempt += 1

if @max_attempts and attempt >= @max_attempts
log "maximum number of attempts reached, bailing out"
log "maximum number of build attempts reached, bailing out"
return false
end
end
Expand Down
Loading