diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index abd6dfa..5639107 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -8,13 +8,18 @@ on: merge_group: types: [checks_requested] -defaults: - run: - working-directory: ./metrics jobs: build: runs-on: ubuntu-latest + strategy: + matrix: + dir: [metrics, ciplatforms] + + defaults: + run: + working-directory: ${{ matrix.dir }} + steps: - uses: actions/checkout@v3 - name: Set up Go diff --git a/ciplatforms/cmd/info.go b/ciplatforms/cmd/info.go new file mode 100644 index 0000000..0e5c302 --- /dev/null +++ b/ciplatforms/cmd/info.go @@ -0,0 +1,104 @@ +package cmd + +import ( + "context" + "fmt" + "log" + "os" + "path/filepath" + "time" + + "github.com/mozilla-services/rapid-release-model/ciplatforms/internal/github" + "github.com/mozilla-services/rapid-release-model/ciplatforms/internal/io" + "github.com/spf13/cobra" +) + +const githubTokenEnvKey = "CIPLATFORMS_GITHUB_API_TOKEN" + +// infoOptions holds options for the CLI command +type infoOptions struct { + inputFile string + outputFile string + githubAPIToken string + timeout time.Duration + batchSize int + + // set in command PreRunE + servicesReader io.ServicesReader + resultWriter io.ResultWriter +} + +// newInfoCmd creates a new info CLI command +func newInfoCmd() *cobra.Command { + opts := new(infoOptions) + + cmd := &cobra.Command{ + Use: "info", + Short: "Collect CI platform information from GitHub.", + Long: "Collect CI platform information from GitHub.", + PreRunE: func(cmd *cobra.Command, args []string) error { + if opts.githubAPIToken == "" { + val, ok := os.LookupEnv(githubTokenEnvKey) + if !ok { + return fmt.Errorf("GitHub API token required. Pass --gh-token or set %s", githubTokenEnvKey) + } + opts.githubAPIToken = val + } + + switch ext := filepath.Ext(opts.inputFile); ext { + case ".csv": + opts.servicesReader = io.CSVServicesReader{} + default: + return fmt.Errorf("unsupported file extension: %s", ext) + } + + switch ext := filepath.Ext(opts.outputFile); ext { + case ".json": + opts.resultWriter = io.JSONResultWriter{} + case ".csv": + opts.resultWriter = io.CSVResultWriter{} + default: + return fmt.Errorf("unsupported file extension: %s", ext) + } + + return nil + }, + RunE: func(cmd *cobra.Command, args []string) error { + return runInfo(cmd.Root().Context(), opts) + }, + } + + cmd.Flags().StringVarP(&opts.inputFile, "input", "i", "repos.csv", "input file") + cmd.Flags().StringVarP(&opts.outputFile, "output", "o", "repos_ciplatforms.csv", "output file") + cmd.Flags().StringVarP(&opts.githubAPIToken, "gh-token", "t", "", "GitHub API token") + + cmd.Flags().DurationVar(&opts.timeout, "timeout", 10*time.Second, "timeout for GitHub API requests") + cmd.Flags().IntVar(&opts.batchSize, "batch-size", 50, "number of repositories to process in each batch") + return cmd +} + +func runInfo(ctx context.Context, opts *infoOptions) error { + // Load services from the given input file. + services, repos, err := opts.servicesReader.ReadServices(opts.inputFile) + if err != nil { + return fmt.Errorf("error loading services from file: %w", err) + } + + // Ensure any operations that use timeoutCtx are automatically canceled + // after 10 seconds. This includes long running HTTP requests. + timeoutCtx, cancel := context.WithTimeout(ctx, opts.timeout) + defer cancel() + + // Check CI Platform config files for each GitHub repository in batches. + if err := github.CheckCIConfigInBatches(timeoutCtx, opts.githubAPIToken, repos, opts.batchSize); err != nil { + return fmt.Errorf("error checking CI configs: %w", err) + } + + // Write the results to the specified file. + if err := opts.resultWriter.WriteResults(opts.outputFile, services); err != nil { + return fmt.Errorf("failed to save results: %w", err) + } + log.Printf("[INFO] Results saved to %s\n", opts.outputFile) + + return nil +} diff --git a/ciplatforms/cmd/root.go b/ciplatforms/cmd/root.go new file mode 100644 index 0000000..9eca61f --- /dev/null +++ b/ciplatforms/cmd/root.go @@ -0,0 +1,30 @@ +package cmd + +import ( + "context" + "fmt" + "os" + + "github.com/spf13/cobra" +) + +// newRootCmd creates a new root cobra command. +func newRootCmd() *cobra.Command { + rootCmd := &cobra.Command{ + Use: "ciplatforms", + Short: "CLI app for collecting CI platform information from GitHub.", + Long: "CLI app for collecting CI platform information from GitHub.", + } + rootCmd.AddCommand(newInfoCmd()) + return rootCmd +} + +// Execute creates and executes the CLI root command. +func Execute() { + ctx := context.Background() + rootCmd := newRootCmd() + if err := rootCmd.ExecuteContext(ctx); err != nil { + fmt.Fprintf(os.Stderr, "error: %v\n", err) + os.Exit(1) + } +} diff --git a/ciplatforms/go.mod b/ciplatforms/go.mod new file mode 100644 index 0000000..21468eb --- /dev/null +++ b/ciplatforms/go.mod @@ -0,0 +1,10 @@ +module github.com/mozilla-services/rapid-release-model/ciplatforms + +go 1.21.3 + +require github.com/spf13/cobra v1.8.1 + +require ( + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/spf13/pflag v1.0.5 // indirect +) diff --git a/ciplatforms/go.sum b/ciplatforms/go.sum new file mode 100644 index 0000000..912390a --- /dev/null +++ b/ciplatforms/go.sum @@ -0,0 +1,10 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.4/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/spf13/cobra v1.8.1 h1:e5/vxKd/rZsfSJMUX1agtjeTDf+qv1/JdBF8gg5k9ZM= +github.com/spf13/cobra v1.8.1/go.mod h1:wHxEcudfqmLYa8iTfL+OuZPbBZkmvliBWKIezN3kD9Y= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/ciplatforms/internal/github/api.go b/ciplatforms/internal/github/api.go new file mode 100644 index 0000000..7cb6697 --- /dev/null +++ b/ciplatforms/internal/github/api.go @@ -0,0 +1,191 @@ +package github + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "log" + "net/http" + "strings" + "text/template" +) + +// Service links to a GitHub repository. +type Service struct { + Name string `json:"name"` + Repository *Repository `json:"repository"` +} + +// Repository holds CI information for a GitHub repository. +type Repository struct { + Owner string `json:"owner"` + Name string `json:"name"` + CircleCI bool `json:"circle_ci"` + GitHubActions bool `json:"gh_actions"` + Taskcluster bool `json:"taskcluster"` + Accessible bool `json:"accessible"` +} + +// Define the GraphQL query template for batch queries for multiple repos. +const queryTemplate = ` +query { +{{- range $i, $repo := . }} + repo{{ $i }}: repository(owner: "{{ $repo.Owner }}", name: "{{ $repo.Name }}") { + name + owner { login } + circleci: object(expression: "HEAD:.circleci/config.yml") { + ... on Blob { id } + } + githubActions: object(expression: "HEAD:.github/workflows") { + ... on Tree { + entries { name } + } + } + taskcluster: object(expression: "HEAD:.taskcluster.yml") { + ... on Blob { id } + } + } +{{- end }} +} +` + +// See https://docs.github.com/en/graphql/guides/forming-calls-with-graphql#the-graphql-endpoint +const githubGraphQLEndpoint = "https://api.github.com/graphql" + +// CheckCIConfigInBatches dynamically generates the query for each batch and parses the response. +func CheckCIConfigInBatches(ctx context.Context, token string, repos map[string]*Repository, batchSize int) error { + var repoSlice []*Repository + for _, r := range repos { + repoSlice = append(repoSlice, r) + } + log.Printf("[INFO] Checking CI Config for %d repos (batch size %d)", len(repoSlice), batchSize) + + for i := 0; i < len(repoSlice); i += batchSize { + end := i + batchSize + if end > len(repos) { + end = len(repos) + } + batch := repoSlice[i:end] + + // Generate the query from the template + query, err := buildQueryFromTemplate(batch) + if err != nil { + return fmt.Errorf("failed to build query: %w", err) + } + + // Execute the batch query + responseData, err := executeQuery(ctx, token, query) + if err != nil { + return fmt.Errorf("GitHub API query failed: %w", err) + } + + if err := updateRepos(batch, responseData); err != nil { + return fmt.Errorf("parsing results failed: %w", err) + } + } + return nil +} + +// buildQueryFromTemplate builds the GraphQL query for the batch using a template. +func buildQueryFromTemplate(batch []*Repository) (string, error) { + tmpl, err := template.New("graphqlQuery").Parse(queryTemplate) + if err != nil { + return "", err + } + + var buf bytes.Buffer + if err := tmpl.Execute(&buf, batch); err != nil { + return "", err + } + return buf.String(), nil +} + +// executeQuery sends an HTTP request with the generated GraphQL query to the GitHub GraphQL API. +func executeQuery(ctx context.Context, token string, query string) (map[string]interface{}, error) { + reqBody := map[string]string{"query": query} + jsonBody, err := json.Marshal(reqBody) + if err != nil { + return nil, fmt.Errorf("failed to marshal request body: %w", err) + } + + req, err := http.NewRequestWithContext(ctx, "POST", githubGraphQLEndpoint, bytes.NewBuffer(jsonBody)) + if err != nil { + return nil, fmt.Errorf("failed to create HTTP request: %w", err) + } + + // See https://docs.github.com/en/graphql/guides/forming-calls-with-graphql#authenticating-with-graphql + req.Header.Set("Authorization", "Bearer "+token) + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + if err != nil { + return nil, fmt.Errorf("failed to execute request: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("query failed with status code %d", resp.StatusCode) + } + + var response map[string]interface{} + if err := json.NewDecoder(resp.Body).Decode(&response); err != nil { + return nil, fmt.Errorf("failed to decode response: %w", err) + } + return response, nil +} + +// updateRepos parses the response and maps results to the batch repositories +func updateRepos(batch []*Repository, data map[string]interface{}) error { + // Ensure that the top-level "data" field exists + dataField, ok := data["data"].(map[string]interface{}) + if !ok { + return fmt.Errorf("failed to parse 'data' field in response") + } + + for i, repo := range batch { + alias := fmt.Sprintf("repo%d", i) + + // Retrieve the repository data for the alias + repoData, exists := dataField[alias] + if !exists || repoData == nil { + // Log a warning if data for the alias is missing or set to nil (likely a 404 error) + log.Printf("[WARNING] Data for repository %s/%s (alias %s) is missing or inaccessible", repo.Owner, repo.Name, alias) + repo.Accessible = false + continue + } + + repo.Accessible = true + + repoDataMap, ok := repoData.(map[string]interface{}) + if !ok { + return fmt.Errorf("invalid data format for alias %s (%s/%s): expected a map but got %T", alias, repo.Owner, repo.Name, repoData) + } + + // Check if CircleCI configuration file is present + repo.CircleCI = repoDataMap["circleci"] != nil + + // Check if Taskcluster configuration file is present + repo.Taskcluster = repoDataMap["taskcluster"] != nil + + // Check if any GitHub Actions workflow configuration files are present + if githubActionsData, ok := repoDataMap["githubActions"].(map[string]interface{}); ok { + if entries, ok := githubActionsData["entries"].([]interface{}); ok { + repo.GitHubActions = hasYmlFile(entries) + } + } + } + return nil +} + +// hasYmlFile checks if any entry in the entries slice is a YAML file. +func hasYmlFile(entries []interface{}) bool { + for _, entry := range entries { + if entryMap, ok := entry.(map[string]interface{}); ok { + if name, ok := entryMap["name"].(string); ok && strings.HasSuffix(name, ".yml") { + return true + } + } + } + return false +} diff --git a/ciplatforms/internal/io/input.go b/ciplatforms/internal/io/input.go new file mode 100644 index 0000000..c1174a9 --- /dev/null +++ b/ciplatforms/internal/io/input.go @@ -0,0 +1,70 @@ +package io + +import ( + "encoding/csv" + "fmt" + "log" + "os" + "regexp" + + "github.com/mozilla-services/rapid-release-model/ciplatforms/internal/github" +) + +var repoPattern = regexp.MustCompile(`^(?P[a-zA-Z0-9][a-zA-Z0-9._-]*)/(?P[a-zA-Z0-9._-]+)$`) + +type ServicesReader interface { + ReadServices(filename string) ([]github.Service, map[string]*github.Repository, error) +} + +type CSVServicesReader struct{} + +// ReadServices loads service information from the given CSV file. +func (c CSVServicesReader) ReadServices(inputFile string) ([]github.Service, map[string]*github.Repository, error) { + file, err := os.Open(inputFile) + if err != nil { + return nil, nil, fmt.Errorf("error opening file at %s: %w", inputFile, err) + } + defer file.Close() + + reader := csv.NewReader(file) + records, err := reader.ReadAll() + if err != nil { + return nil, nil, fmt.Errorf("error decoding CSV file at %s: %w", inputFile, err) + } + + distinctRepos := make(map[string]*github.Repository) + var services []github.Service + + for i, record := range records { + if i == 0 { + continue + } + + if len(record) < 2 { + return nil, nil, fmt.Errorf("invalid CSV file format: expected at least 2 columsn") + } + + service, githubRepo := record[0], record[1] + + match := repoPattern.FindStringSubmatch(githubRepo) + if match == nil { + return nil, nil, fmt.Errorf("invalid GitHub repository format for %s", githubRepo) + } + + owner := match[repoPattern.SubexpIndex("owner")] + name := match[repoPattern.SubexpIndex("name")] + + key := fmt.Sprintf("%s/%s", owner, name) + repo, exists := distinctRepos[key] + if !exists { + repo = &github.Repository{Owner: owner, Name: name} + distinctRepos[key] = repo + } + + services = append(services, github.Service{Name: service, Repository: repo}) + } + + log.Printf("[INFO] Read %d services (linked to %d distinct repos) from %s", len(services), len(distinctRepos), inputFile) + + return services, distinctRepos, nil +} diff --git a/ciplatforms/internal/io/output.go b/ciplatforms/internal/io/output.go new file mode 100644 index 0000000..cb3cd94 --- /dev/null +++ b/ciplatforms/internal/io/output.go @@ -0,0 +1,62 @@ +package io + +import ( + "encoding/csv" + "encoding/json" + "fmt" + "os" + + "github.com/mozilla-services/rapid-release-model/ciplatforms/internal/github" +) + +type ResultWriter interface { + WriteResults(filename string, services []github.Service) error +} + +type JSONResultWriter struct{} + +// WriteResults saves the results to a JSON file. +func (j JSONResultWriter) WriteResults(filename string, services []github.Service) error { + data, err := json.MarshalIndent(services, "", " ") + if err != nil { + return err + } + return os.WriteFile(filename, data, 0644) +} + +type CSVResultWriter struct{} + +// WriteResults saves the results to a CSV file. +func (c CSVResultWriter) WriteResults(filename string, services []github.Service) error { + file, err := os.Create(filename) + if err != nil { + return fmt.Errorf("error creating file %s: %w", filename, err) + } + defer file.Close() + + writer := csv.NewWriter(file) + defer writer.Flush() + + // Write header row + header := []string{"service", "repo", "circleci", "github_actions", "taskcluster", "accessible"} + if err := writer.Write(header); err != nil { + return fmt.Errorf("error writing header to CSV file: %w", err) + } + + // Write each service's data as a CSV row + for _, service := range services { + row := []string{ + service.Name, + fmt.Sprintf("%s/%s", service.Repository.Owner, service.Repository.Name), + fmt.Sprintf("%t", service.Repository.CircleCI), + fmt.Sprintf("%t", service.Repository.GitHubActions), + fmt.Sprintf("%t", service.Repository.Taskcluster), + fmt.Sprintf("%t", service.Repository.Accessible), + } + if err := writer.Write(row); err != nil { + return fmt.Errorf("error writing row to CSV file: %w", err) + } + } + + return nil +} diff --git a/ciplatforms/main.go b/ciplatforms/main.go new file mode 100644 index 0000000..63db17d --- /dev/null +++ b/ciplatforms/main.go @@ -0,0 +1,7 @@ +package main + +import "github.com/mozilla-services/rapid-release-model/ciplatforms/cmd" + +func main() { + cmd.Execute() +}