Evaluate test report

uyuni-project · Nov 27, 2024 · de071a7 · de071a7
1 parent 970e062
commit de071a7
Show file tree

Hide file tree

Showing 9 changed files with 284 additions and 200 deletions.
diff --git a/testsuite/Rakefile b/testsuite/Rakefile
@@ -187,12 +187,12 @@ namespace :utils do
 
   desc 'Collect and tag flaky tests'
   task :collect_and_tag_flaky_tests do
-    `ruby ext-tools/machine_learning/gh_issues_parser.rb --collect-and-tag --directory-path features`
+    `ruby ext-tools/machine_learning/gh_issues_parser.rb --collect_and_tag --directory_path features`
   end
 
   desc 'Generate dataset from GH issues'
   task :generate_dataset_gh_issues do
-    `ruby ext-tools/machine_learning/gh_issues_parser.rb --generate-dataset --file-path gh_issues_dataset.json`
+    `ruby ext-tools/machine_learning/gh_issues_parser.rb --generate_dataset --output_path gh_issues_dataset.json`
   end
 
   desc 'Generate dataset from JSON Cucumber Test Report'

diff --git a/testsuite/ext-tools/machine_learning/cucumber_report_history.rb b/testsuite/ext-tools/machine_learning/cucumber_report_history.rb
@@ -2,7 +2,6 @@
 # Copyright (c) 2024 SUSE LLC.
 # Licensed under the terms of the MIT license.
 
-require 'csv'
 require 'json'
 require 'net/http'
 require 'optparse'
@@ -17,7 +16,7 @@
       options[:server] = server
     end
 
-    opts.on('-o', '--output_path FILEPATH', 'Output file path (CSV format)') do |filepath|
+    opts.on('-o', '--output_path FILEPATH', 'Output file path (JSON format)') do |filepath|
       options[:output_path] = filepath
     end
 
@@ -44,26 +43,28 @@
   response = Net::HTTP.get_response(uri)
   if response.is_a?(Net::HTTPSuccess)
     data = JSON.parse(response.body)
+    label_mapping = {
+      'PASSED' => 0,
+      'SKIPPED' => 1,
+      'FIXED' => 2,
+      'REGRESSION' => 3,
+      'FAILED' => 4
+    }
     dataset =
       data['data']['result'].map do |result|
         metric = result['metric']
         {
-          label: metric['status'].downcase,
+          label: label_mapping[metric['status']],
           description: {
-            jobname: metric['jobname'],
             scenario: metric['case'],
             feature: metric['suite'],
+            # jobname: metric['jobname'],
             failedsince: metric['failedsince'].to_i,
             age: result['value'][1].to_i
           }
         }
       end
-    CSV.open(options[:output_path], 'w') do |csv|
-      csv << dataset.first.keys
-      dataset.each do |entry|
-        csv << [entry[:label], entry[:description].to_json]
-      end
-    end
+    File.write(options[:output_path], dataset.to_json)
   else
     puts "Failed to fetch data from Prometheus: #{response.code} #{response.message}"
   end

diff --git a/testsuite/ext-tools/machine_learning/cucumber_report_parser.rb b/testsuite/ext-tools/machine_learning/cucumber_report_parser.rb
@@ -1,7 +1,7 @@
 # Copyright (c) 2024 SUSE LLC.
 # Licensed under the terms of the MIT license.
 
-require 'csv'
+require 'base64'
 require 'json'
 require 'nokogiri'
 require 'optparse'
@@ -43,9 +43,9 @@ def extract_dataset_from_json(json_report_path)
         time: (scenario['steps'].sum { |step| step['result']['duration'] || 0 } / 1_000_000_000.0).round
       }
 
-      scenario_data[:error_message] = scenario['steps'].last['result']['error_message'] if scenario['steps'].last['result'].key?('error_message')
+      scenario_data[:error_message] = Base64.encode64(scenario['steps'].last['result']['error_message']) if scenario['steps'].last['result'].key?('error_message')
       scenario_data[:tags] = scenario['tags'].map { |tag| tag['name'][1..] } if scenario.key?('tags')
-      scenario_data[:logs] = logs unless logs.empty?
+      scenario_data[:logs] = Base64.encode64(logs.to_s) unless logs.empty?
       scenario_data[:screenshots] = screenshots unless screenshots.empty?
 
       if scenario['before'] && scenario['before'].size > 3 && scenario['before'][3].key?('output')
@@ -79,7 +79,7 @@ def extract_dataset_from_json(json_report_path)
       options[:report_path] = f
     end
 
-    opts.on('-o', '--output_path PATH', 'Path to the processed report file (CSV format)') do |f|
+    opts.on('-o', '--output_path PATH', 'Path to the processed report file (JSON format)') do |f|
       options[:output_path] = f
     end
 
@@ -98,9 +98,4 @@ def extract_dataset_from_json(json_report_path)
 end
 
 dataset = extract_dataset_from_json(options[:report_path])
-CSV.open(options[:output_path], 'w') do |csv|
-  csv << dataset.first.keys
-  dataset.each do |entry|
-    csv << [entry[:label], entry[:description].to_json]
-  end
-end
+File.write(options[:output_path], dataset.to_json)
diff --git a/testsuite/ext-tools/machine_learning/cucumber_report_review.py b/testsuite/ext-tools/machine_learning/cucumber_report_review.py
diff --git a/testsuite/ext-tools/machine_learning/evaluate_cucumber_report.py b/testsuite/ext-tools/machine_learning/evaluate_cucumber_report.py
@@ -0,0 +1,34 @@
+import pandas as pd
+from joblib import load
+import json
+
+def evaluate_current_report(current_report_path, model_path, vectorizer_path, output_path):
+    # Load data
+    with open(current_report_path, 'r') as file:
+        current_report = json.load(file)
+    df = pd.DataFrame(current_report)
+
+    # Load model and vectorizer
+    model = load(model_path)
+    vectorizer = load(vectorizer_path)
+
+    # Preprocess and predict
+    X = vectorizer.transform(df['text'])
+    df['predicted_root_cause'] = model.predict(X)
+
+    # Save predictions
+    df.to_csv(output_path, index=False)
+    print(f"Predictions saved to {output_path}")
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) != 5:
+        print("Usage: python evaluate_current_report.py <current_report_path> <model_path> <vectorizer_path> <output_path>")
+        sys.exit(1)
+
+    current_report_path = sys.argv[1]
+    model_path = sys.argv[2]
+    vectorizer_path = sys.argv[3]
+    output_path = sys.argv[4]
+
+    evaluate_current_report(current_report_path, model_path, vectorizer_path, output_path)
diff --git a/testsuite/ext-tools/machine_learning/gh_issues_parser.rb b/testsuite/ext-tools/machine_learning/gh_issues_parser.rb
@@ -4,6 +4,7 @@
 # Collect all the issues from a GitHub project board column
 # and tag the corresponding Cucumber feature files with a given tag
 
+require 'base64'
 require 'csv'
 require 'find'
 require 'json'
@@ -146,18 +147,15 @@ def self.generate_dataset(organization, project_number, headers)
       # end
       if status_field && status_field['item'] && status_field['item']['content']
         title = clean_text(status_field['item']['content']['title'])
-        description = clean_text(status_field['item']['content']['bodyText'])
-        comments = status_field['item']['content']['comments']['nodes'].map { |node| clean_text(node['body']) }
+        description = Base64.encode64(clean_text(status_field['item']['content']['bodyText']))
+        comments = Base64.encode64(status_field['item']['content']['comments']['nodes'].map { |node| clean_text(node['body']) }.to_s)
         matches = title.match(/Feature:(.*)\s*\|\s*Scenario:(.*)/)
-        gh_issue_content = {}
-        if matches.nil?
-          gh_issue_content[:title] = title
-        else
-          gh_issue_content[:feature] = matches[1].strip
-          gh_issue_content[:scenario] = matches[2].strip
-        end
-        gh_issue_content[:description] = description
-        gh_issue_content[:comments] = comments
+        gh_issue_content = {
+          feature: matches.nil? ? title : matches[1].strip,
+          scenario: matches.nil? ? title : matches[2].strip,
+          description: description,
+          comments: comments
+        }
         dataset.push({ label: label_mapping[label], description: gh_issue_content })
         puts "\e[36mCard found\e[0m => #{title}"
       else
@@ -227,20 +225,20 @@ def main
     OptionParser.new do |opts|
       opts.banner = 'Usage: ruby gh_issues_parser.rb [options]'
 
-      opts.on('-g', '--generate-dataset', 'Generate a dataset from GitHub project board issues') do
+      opts.on('-g', '--generate_dataset', 'Generate a dataset from GitHub project board issues') do
         options[:generate_dataset] = true
       end
 
-      opts.on('-c', '--collect-and-tag', 'Collect flaky tests and tag Cucumber features') do
+      opts.on('-c', '--collect_and_tag', 'Collect flaky tests and tag Cucumber features') do
         options[:collect_and_tag] = true
       end
 
-      opts.on('-d', '--directory-path PATH', 'Directory path to search for Cucumber feature files') do |path|
+      opts.on('-d', '--directory_path PATH', 'Directory path to search for Cucumber feature files') do |path|
         options[:directory_path] = path
       end
 
-      opts.on('-f', '--file-path PATH', 'File path to store the dataset (CSV format)') do |path|
-        options[:file_path] = path
+      opts.on('-o', '--output_path PATH', 'File path to store the dataset (JSON format)') do |path|
+        options[:output_path] = path
       end
 
       opts.on('-h', '--help', 'Show this help message') do
@@ -252,17 +250,17 @@ def main
   parser.parse!
 
   unless options[:generate_dataset] || options[:collect_and_tag]
-    puts 'Please specify either --generate-dataset or --collect-and-tag'
+    puts 'Please specify either --generate_dataset or --collect_and_tag'
     exit 1
   end
 
-  if options[:generate_dataset] && !options[:file_path]
-    puts 'Please specify the file path using --file-path'
+  if options[:generate_dataset] && !options[:output_path]
+    puts 'Please specify the file path using --output_path'
     exit 1
   end
 
   if options[:collect_and_tag] && !options[:directory_path]
-    puts 'Please specify the file path using --directory-path'
+    puts 'Please specify the file path using --directory_path'
     exit 1
   end
 
@@ -287,12 +285,7 @@ def main
 
   if options[:generate_dataset]
     dataset = GithubProjectBoard.generate_dataset(organization, project_number, headers)
-    CSV.open(options[:file_path], 'w') do |csv|
-      csv << dataset.first.keys
-      dataset.each do |entry|
-        csv << [entry[:label], entry[:description].to_json]
-      end
-    end
+    File.write(options[:output_path], dataset.to_json)
   elsif options[:collect_and_tag]
     columns = {
       'New' => 'new_issue',

diff --git a/testsuite/ext-tools/machine_learning/gh_issues_train_model.py b/testsuite/ext-tools/machine_learning/gh_issues_train_model.py