diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index 6dd45e901..000000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,51 +0,0 @@ -name: Bug Report -description: File a bug report -labels: ["Type: Bug"] -body: - - type: markdown - attributes: - value: > - Thanks for taking the time to fill out this bug report! - - type: textarea - id: bug-description - attributes: - label: Bug Description - description: > - Please explain the bug in a few short sentences. - validations: - required: true - - type: textarea - id: reproduction - attributes: - label: Reproduction steps - description: > - Are you able to consistently reproduce the issue? Please add a list of steps that lead to the bug. - validations: - required: true - - type: textarea - id: environment - attributes: - label: System information - description: > - We need to know a bit more about the context in which you run the snap. - Please provide an overview of your setup (e.g. number of nodes) and the output of: - `snap version` - `uname -a` - `snap list k8s` - `snap services k8s` - `snap logs k8s -n 10000` - `k8s status` - validations: - required: true - - type: textarea - id: fix - attributes: - label: Can you suggest a fix? - description: > - This section is optional. How do you propose that the issue be fixed? - - type: textarea - id: contribution - attributes: - label: yes/no, or @mention maintainers. Community contributions are welcome. - description: > - Are you interested in contributing a fix? \ No newline at end of file diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml new file mode 100644 index 000000000..aced63356 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -0,0 +1,64 @@ +name: Bug Report +description: Something is not working + +body: + - type: markdown + attributes: + value: | + Thank you for submitting an issue. Please fill in the template below + information about the bug you encountered. + + - type: textarea + id: summary + attributes: + label: Summary + description: Please explain the bug in a few short sentences. + placeholder: Detail the bug here... + validations: + required: true + + - type: textarea + id: what-should-happen + attributes: + label: What Should Happen Instead? + description: Please explain what the expected behavior is. + placeholder: Explain the expected outcome... + validations: + required: true + + - type: textarea + id: reproduction-steps + attributes: + label: Reproduction Steps + description: Are you able to consistently reproduce the issue? Please add a list of steps that lead to the bug. + placeholder: "1.\n2.\n" + validations: + required: true + + - type: textarea + id: system-information + attributes: + label: System information + description: Please attach the tarball generated by running sudo /snap/k8s/current/k8s/scripts/inspect.sh + placeholder: Attach your system info tarball... + validations: + required: true + + - type: textarea + id: suggest-fix + attributes: + label: Can you suggest a fix? + description: How do you propose that the issue be fixed? + placeholder: Suggest a fix if you have one... + + - type: textarea + id: contribute-fix + attributes: + label: Are you interested in contributing with a fix? + description: yes/no, or @mention maintainers. Community contributions are welcome. + placeholder: Are you willing to contribute a fix? + + - type: markdown + attributes: + value: Thank you for making the k8s-snap better + diff --git a/k8s/scripts/inspect.sh b/k8s/scripts/inspect.sh new file mode 100755 index 000000000..c6939e652 --- /dev/null +++ b/k8s/scripts/inspect.sh @@ -0,0 +1,184 @@ +#!/usr/bin/env bash + +INSPECT_DUMP=$(pwd)/inspection-report + +function log_success { + printf -- '\033[32m SUCCESS: \033[0m %s\n' "$1" +} + +function log_info { + printf -- '\033[34m INFO: \033[0m %s\n' "$1" +} + +function log_warning() { + printf -- '\033[33m WARNING: \033[0m %s\n' "$1" +} + +function log_warning_red { + printf -- '\033[31m WARNING: \033[0m %s\n' "$1" +} + +function is_control_plane_node { + k8s local-node-status | grep -q "control-plane" +} + +function is_service_active { + local service + service=$1 + + systemctl status "snap.$service" | grep -q "active (running)" +} + +function collect_args { + log_info "Copy service args to the final report tarball" + cp -r --no-preserve=mode,ownership /var/snap/k8s/common/args "$INSPECT_DUMP" +} + +function collect_cluster_info { + log_info "Copy k8s cluster-info dump to the final report tarball" + k8s kubectl cluster-info dump --output-directory "$INSPECT_DUMP/cluster-info" &>/dev/null +} + +function collect_sbom { + log_info "Copy SBOM to the final report tarball" + cp --no-preserve=mode,ownership /snap/k8s/current/bom.json "$INSPECT_DUMP"/sbom.json +} + +function collect_k8s_diagnostics { + log_info "Copy uname to the final report tarball" + uname -a &>"$INSPECT_DUMP/uname.log" + + log_info "Copy snap diagnostics to the final report tarball" + snap version &>"$INSPECT_DUMP/snap-version.log" + snap list k8s &>"$INSPECT_DUMP/snap-list-k8s.log" + snap services k8s &>"$INSPECT_DUMP/snap-services-k8s.log" + snap logs k8s -n 10000 &>"$INSPECT_DUMP/snap-logs-k8s.log" + + log_info "Copy k8s diagnostics to the final report tarball" + k8s kubectl version &>"$INSPECT_DUMP/k8s-version.log" + k8s status &>"$INSPECT_DUMP/k8s-status.log" + k8s get &>"$INSPECT_DUMP/k8s-get.log" + k8s kubectl get cm k8sd-config -n kube-system -o yaml &>"$INSPECT_DUMP/k8s.k8sd/k8sd-configmap.log" + k8s kubectl get cm -n kube-system &>"$INSPECT_DUMP/k8s-configmaps.log" + + cp --no-preserve=mode,ownership /var/snap/k8s/common/var/lib/k8s-dqlite/cluster.yaml "$INSPECT_DUMP/k8s.k8s-dqlite/k8s-dqlite-cluster.yaml" + cp --no-preserve=mode,ownership /var/snap/k8s/common/var/lib/k8s-dqlite/info.yaml "$INSPECT_DUMP/k8s.k8s-dqlite/k8s-dqlite-info.yaml" + cp --no-preserve=mode,ownership /var/snap/k8s/common/var/lib/k8sd/state/database/cluster.yaml "$INSPECT_DUMP/k8s.k8sd/k8sd-cluster.yaml" + cp --no-preserve=mode,ownership /var/snap/k8s/common/var/lib/k8sd/state/database/info.yaml "$INSPECT_DUMP/k8s.k8sd/k8sd-info.yaml" + + ls -la /var/snap/k8s/common/var/lib/k8s-dqlite &>"$INSPECT_DUMP/k8s.k8s-dqlite/k8s-dqlite-files.log" + ls -la /var/snap/k8s/common/var/lib/k8sd &>"$INSPECT_DUMP/k8s.k8sd/k8sd-files.log" +} + +function collect_service_diagnostics { + local service + service=$1 + + mkdir -p "$INSPECT_DUMP/$service" + + local status_file + status_file="$INSPECT_DUMP/$service/systemctl.log" + + systemctl status "snap.$service" &>"$status_file" + + local n_restarts + n_restarts=$(systemctl show "snap.$service" -p NRestarts | cut -d'=' -f2) + + printf -- "%s -> %s\n" "$service" "$n_restarts" >> "$INSPECT_DUMP/nrestarts.log" + + if [ "$n_restarts" -gt 0 ]; then + log_warning "Service $service has restarted $n_restarts times due to errors" + fi + + journalctl -n 100000 -u "snap.$service" &>"$INSPECT_DUMP/$service/journal.log" +} + +function collect_network_diagnostics { + log_info "Copy network diagnostics to the final report tarball" + ip a &>"$INSPECT_DUMP/ip-a.log" || true + ip r &>"$INSPECT_DUMP/ip-r.log" || true + iptables-save &>"$INSPECT_DUMP/iptables.log" || true + iptables-legacy-save &>"$INSPECT_DUMP/iptables-legacy.log" || true + ss -plnt &>"$INSPECT_DUMP/ss-plnt.log" || true + + ip6tables-save &>"$INSPECT_DUMP/iptables6.log" || true + ip6tables-legacy-save &>"$INSPECT_DUMP/iptables6-legacy.log" || true + ss -6 -plnt &>"$INSPECT_DUMP/ss6-plnt.log" || true +} + +function check_expected_services { + local services + services=("$@") + + for service in "${services[@]}"; do + collect_service_diagnostics "$service" + if ! is_service_active "$service"; then + log_info "Service $service is not running" + log_warning "Service $service should be running on this node" + else + log_info "Service $service is running" + fi + done +} + +function build_report_tarball { + local now_is + now_is=$(date +"%Y%m%d_%H%M%S") + + tar -C "$(pwd)" -cf "$(pwd)/inspection-report-${now_is}.tar" inspection-report &>/dev/null + gzip "$(pwd)/inspection-report-${now_is}.tar" + log_success "Report tarball is at $(pwd)/inspection-report-$now_is.tar.gz" +} + +if [ "$EUID" -ne 0 ]; then + printf -- "Elevated permissions are needed for this command. Please use sudo." + exit 1 +fi + +rm -rf "$INSPECT_DUMP" +mkdir -p "$INSPECT_DUMP" + +printf -- 'Collecting service information\n' + +if is_control_plane_node; then + printf -- 'Running inspection on a control-plane node\n' + printf -- 'Inspection ran on a control plane node.' >"$INSPECT_DUMP/is-control-plane-node" +else + printf -- 'Running inspection on a worker node\n' + printf -- 'Inspection ran on a worker node.' >"$INSPECT_DUMP/is-worker-node" +fi + +control_plane_services=("k8s.containerd" "k8s.kube-proxy" "k8s.k8s-dqlite" "k8s.k8sd" "k8s.kube-apiserver" "k8s.kube-controller-manager" "k8s.kube-scheduler" "k8s.kubelet") +worker_services=("k8s.containerd" "k8s.k8s-apiserver-proxy" "k8s.kubelet" "k8s.k8sd" "k8s.kube-proxy") + +if is_control_plane_node; then + check_expected_services "${control_plane_services[@]}" +else + check_expected_services "${worker_services[@]}" +fi + +printf -- 'Collecting service arguments\n' +collect_args + +printf -- 'Collecting k8s cluster-info\n' +collect_cluster_info + +printf -- 'Collecting SBOM\n' +collect_sbom + +printf -- 'Collecting system information\n' +collect_k8s_diagnostics + +printf -- 'Collecting networking information\n' +collect_network_diagnostics + +matches=$(grep -rlEi "BEGIN CERTIFICATE|PRIVATE KEY" inspection-report) +if [ -n "$matches" ]; then + matches_comma_separated=$(echo "$matches" | tr '\n' ',') + log_warning_red 'Unexpected private key or certificate found in the report:' + log_warning_red "Found in the following files: ${matches_comma_separated%,}" + log_warning_red 'Please remove the private key or certificate from the report before sharing.' +fi + +printf -- 'Building the report tarball\n' +build_report_tarball diff --git a/src/k8s/cmd/k8sd/k8sd_sql.go b/src/k8s/cmd/k8sd/k8sd_sql.go index 2c365c5bd..c2380fc8e 100644 --- a/src/k8s/cmd/k8sd/k8sd_sql.go +++ b/src/k8s/cmd/k8sd/k8sd_sql.go @@ -23,13 +23,18 @@ func newSqlCmd(env cmdutil.ExecutionEnvironment) *cobra.Command { return } - _, batch, err := app.MicroCluster().SQL(cmd.Context(), args[0]) + str, batch, err := app.MicroCluster().SQL(cmd.Context(), args[0]) if err != nil { cmd.PrintErrf("Error: Failed to execute the SQL query.\n\nThe error was: %v\n", err) env.Exit(1) return } - cmd.Println(batch.Results[0].Rows) + + if args[0] == ".dump" || args[0] == ".schema" { + cmd.Println(str) + } else { + cmd.Println(batch.Results[0].Rows) + } }, } } diff --git a/src/k8s/cmd/util/args.go b/src/k8s/cmd/util/args.go index 6354a82d7..6a03c31db 100644 --- a/src/k8s/cmd/util/args.go +++ b/src/k8s/cmd/util/args.go @@ -6,7 +6,7 @@ import ( "github.com/spf13/cobra" ) -// MinimumNArgs requires at least N args to be passe. +// MinimumNArgs requires at least N args to be passed. func MinimumNArgs(env ExecutionEnvironment, n int) func(cmd *cobra.Command, args []string) error { return func(cmd *cobra.Command, args []string) error { if len(args) < n {