Skip to content

Commit

Permalink
Evaluate test report
Browse files Browse the repository at this point in the history
  • Loading branch information
srbarrios committed Nov 27, 2024
1 parent 970e062 commit de071a7
Show file tree
Hide file tree
Showing 9 changed files with 284 additions and 200 deletions.
4 changes: 2 additions & 2 deletions testsuite/Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -187,12 +187,12 @@ namespace :utils do

desc 'Collect and tag flaky tests'
task :collect_and_tag_flaky_tests do
`ruby ext-tools/machine_learning/gh_issues_parser.rb --collect-and-tag --directory-path features`
`ruby ext-tools/machine_learning/gh_issues_parser.rb --collect_and_tag --directory_path features`
end

desc 'Generate dataset from GH issues'
task :generate_dataset_gh_issues do
`ruby ext-tools/machine_learning/gh_issues_parser.rb --generate-dataset --file-path gh_issues_dataset.json`
`ruby ext-tools/machine_learning/gh_issues_parser.rb --generate_dataset --output_path gh_issues_dataset.json`
end

desc 'Generate dataset from JSON Cucumber Test Report'
Expand Down
21 changes: 11 additions & 10 deletions testsuite/ext-tools/machine_learning/cucumber_report_history.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
# Copyright (c) 2024 SUSE LLC.
# Licensed under the terms of the MIT license.

require 'csv'
require 'json'
require 'net/http'
require 'optparse'
Expand All @@ -17,7 +16,7 @@
options[:server] = server
end

opts.on('-o', '--output_path FILEPATH', 'Output file path (CSV format)') do |filepath|
opts.on('-o', '--output_path FILEPATH', 'Output file path (JSON format)') do |filepath|
options[:output_path] = filepath
end

Expand All @@ -44,26 +43,28 @@
response = Net::HTTP.get_response(uri)
if response.is_a?(Net::HTTPSuccess)
data = JSON.parse(response.body)
label_mapping = {
'PASSED' => 0,
'SKIPPED' => 1,
'FIXED' => 2,
'REGRESSION' => 3,
'FAILED' => 4
}
dataset =
data['data']['result'].map do |result|
metric = result['metric']
{
label: metric['status'].downcase,
label: label_mapping[metric['status']],
description: {
jobname: metric['jobname'],
scenario: metric['case'],
feature: metric['suite'],
# jobname: metric['jobname'],
failedsince: metric['failedsince'].to_i,
age: result['value'][1].to_i
}
}
end
CSV.open(options[:output_path], 'w') do |csv|
csv << dataset.first.keys
dataset.each do |entry|
csv << [entry[:label], entry[:description].to_json]
end
end
File.write(options[:output_path], dataset.to_json)
else
puts "Failed to fetch data from Prometheus: #{response.code} #{response.message}"
end
Expand Down
15 changes: 5 additions & 10 deletions testsuite/ext-tools/machine_learning/cucumber_report_parser.rb
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024 SUSE LLC.
# Licensed under the terms of the MIT license.

require 'csv'
require 'base64'
require 'json'
require 'nokogiri'
require 'optparse'
Expand Down Expand Up @@ -43,9 +43,9 @@ def extract_dataset_from_json(json_report_path)
time: (scenario['steps'].sum { |step| step['result']['duration'] || 0 } / 1_000_000_000.0).round
}

scenario_data[:error_message] = scenario['steps'].last['result']['error_message'] if scenario['steps'].last['result'].key?('error_message')
scenario_data[:error_message] = Base64.encode64(scenario['steps'].last['result']['error_message']) if scenario['steps'].last['result'].key?('error_message')
scenario_data[:tags] = scenario['tags'].map { |tag| tag['name'][1..] } if scenario.key?('tags')
scenario_data[:logs] = logs unless logs.empty?
scenario_data[:logs] = Base64.encode64(logs.to_s) unless logs.empty?
scenario_data[:screenshots] = screenshots unless screenshots.empty?

if scenario['before'] && scenario['before'].size > 3 && scenario['before'][3].key?('output')
Expand Down Expand Up @@ -79,7 +79,7 @@ def extract_dataset_from_json(json_report_path)
options[:report_path] = f
end

opts.on('-o', '--output_path PATH', 'Path to the processed report file (CSV format)') do |f|
opts.on('-o', '--output_path PATH', 'Path to the processed report file (JSON format)') do |f|
options[:output_path] = f
end

Expand All @@ -98,9 +98,4 @@ def extract_dataset_from_json(json_report_path)
end

dataset = extract_dataset_from_json(options[:report_path])
CSV.open(options[:output_path], 'w') do |csv|
csv << dataset.first.keys
dataset.each do |entry|
csv << [entry[:label], entry[:description].to_json]
end
end
File.write(options[:output_path], dataset.to_json)
Empty file.
34 changes: 34 additions & 0 deletions testsuite/ext-tools/machine_learning/evaluate_cucumber_report.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import pandas as pd
from joblib import load
import json

def evaluate_current_report(current_report_path, model_path, vectorizer_path, output_path):
# Load data
with open(current_report_path, 'r') as file:
current_report = json.load(file)
df = pd.DataFrame(current_report)

# Load model and vectorizer
model = load(model_path)
vectorizer = load(vectorizer_path)

# Preprocess and predict
X = vectorizer.transform(df['text'])
df['predicted_root_cause'] = model.predict(X)

# Save predictions
df.to_csv(output_path, index=False)
print(f"Predictions saved to {output_path}")

if __name__ == "__main__":
import sys
if len(sys.argv) != 5:
print("Usage: python evaluate_current_report.py <current_report_path> <model_path> <vectorizer_path> <output_path>")
sys.exit(1)

current_report_path = sys.argv[1]
model_path = sys.argv[2]
vectorizer_path = sys.argv[3]
output_path = sys.argv[4]

evaluate_current_report(current_report_path, model_path, vectorizer_path, output_path)
45 changes: 19 additions & 26 deletions testsuite/ext-tools/machine_learning/gh_issues_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
# Collect all the issues from a GitHub project board column
# and tag the corresponding Cucumber feature files with a given tag

require 'base64'
require 'csv'
require 'find'
require 'json'
Expand Down Expand Up @@ -146,18 +147,15 @@ def self.generate_dataset(organization, project_number, headers)
# end
if status_field && status_field['item'] && status_field['item']['content']
title = clean_text(status_field['item']['content']['title'])
description = clean_text(status_field['item']['content']['bodyText'])
comments = status_field['item']['content']['comments']['nodes'].map { |node| clean_text(node['body']) }
description = Base64.encode64(clean_text(status_field['item']['content']['bodyText']))
comments = Base64.encode64(status_field['item']['content']['comments']['nodes'].map { |node| clean_text(node['body']) }.to_s)
matches = title.match(/Feature:(.*)\s*\|\s*Scenario:(.*)/)
gh_issue_content = {}
if matches.nil?
gh_issue_content[:title] = title
else
gh_issue_content[:feature] = matches[1].strip
gh_issue_content[:scenario] = matches[2].strip
end
gh_issue_content[:description] = description
gh_issue_content[:comments] = comments
gh_issue_content = {
feature: matches.nil? ? title : matches[1].strip,
scenario: matches.nil? ? title : matches[2].strip,
description: description,
comments: comments
}
dataset.push({ label: label_mapping[label], description: gh_issue_content })
puts "\e[36mCard found\e[0m => #{title}"
else
Expand Down Expand Up @@ -227,20 +225,20 @@ def main
OptionParser.new do |opts|
opts.banner = 'Usage: ruby gh_issues_parser.rb [options]'

opts.on('-g', '--generate-dataset', 'Generate a dataset from GitHub project board issues') do
opts.on('-g', '--generate_dataset', 'Generate a dataset from GitHub project board issues') do
options[:generate_dataset] = true
end

opts.on('-c', '--collect-and-tag', 'Collect flaky tests and tag Cucumber features') do
opts.on('-c', '--collect_and_tag', 'Collect flaky tests and tag Cucumber features') do
options[:collect_and_tag] = true
end

opts.on('-d', '--directory-path PATH', 'Directory path to search for Cucumber feature files') do |path|
opts.on('-d', '--directory_path PATH', 'Directory path to search for Cucumber feature files') do |path|
options[:directory_path] = path
end

opts.on('-f', '--file-path PATH', 'File path to store the dataset (CSV format)') do |path|
options[:file_path] = path
opts.on('-o', '--output_path PATH', 'File path to store the dataset (JSON format)') do |path|
options[:output_path] = path
end

opts.on('-h', '--help', 'Show this help message') do
Expand All @@ -252,17 +250,17 @@ def main
parser.parse!

unless options[:generate_dataset] || options[:collect_and_tag]
puts 'Please specify either --generate-dataset or --collect-and-tag'
puts 'Please specify either --generate_dataset or --collect_and_tag'
exit 1
end

if options[:generate_dataset] && !options[:file_path]
puts 'Please specify the file path using --file-path'
if options[:generate_dataset] && !options[:output_path]
puts 'Please specify the file path using --output_path'
exit 1
end

if options[:collect_and_tag] && !options[:directory_path]
puts 'Please specify the file path using --directory-path'
puts 'Please specify the file path using --directory_path'
exit 1
end

Expand All @@ -287,12 +285,7 @@ def main

if options[:generate_dataset]
dataset = GithubProjectBoard.generate_dataset(organization, project_number, headers)
CSV.open(options[:file_path], 'w') do |csv|
csv << dataset.first.keys
dataset.each do |entry|
csv << [entry[:label], entry[:description].to_json]
end
end
File.write(options[:output_path], dataset.to_json)
elsif options[:collect_and_tag]
columns = {
'New' => 'new_issue',
Expand Down
152 changes: 0 additions & 152 deletions testsuite/ext-tools/machine_learning/gh_issues_train_model.py

This file was deleted.

Loading

0 comments on commit de071a7

Please sign in to comment.