diff --git a/.ansible-lint b/.ansible-lint new file mode 100644 index 00000000..bc5c821b --- /dev/null +++ b/.ansible-lint @@ -0,0 +1,5 @@ +--- +warn_list: + - var-naming[no-role-prefix] + - yaml[comments-indentation] + - yaml[line-length] diff --git a/README.md b/README.md index 99f76205..e372f559 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ on processor architecture: ## System requirements Deployment environment must have Ansible 2.4.0+ -Master and nodes must have passwordless SSH access +Server and agent nodes must have passwordless SSH access ## Usage @@ -48,7 +48,7 @@ k3s_cluster: If needed, you can also edit `vars` section at the bottom to match your environment. If multiple hosts are in the server group the playbook will automatically setup k3s in HA mode with embedded etcd. -An odd number of server nodes is recommended (3,5,7). Read the offical documentation below for more information and options. +An odd number of server nodes is required (3,5,7). Read the offical documentation below for more information and options. https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/ Using a loadbalancer or VIP as the API endpoint is preferred but not covered here. @@ -61,8 +61,19 @@ ansible-playbook playbook/site.yml -i inventory.yml ## Kubeconfig -To confirm access to your **Kubernetes** cluster use the following: +After successful bringup, the kubeconfig of the cluster is copied to the control-node and set as default (`~/.kube/config`). +Assuming you have [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) installed, you to confirm access to your **Kubernetes** cluster use the following: ```bash kubectl get nodes ``` + +## Local Testing + +A Vagrantfile is provided that provision a 5 nodes cluster using LibVirt or Virtualbox and Vagrant. To use it: + +```bash +vagrant up +``` + +By default, each node is given 2 cores and 2GB of RAM and runs Ubuntu 20.04. You can customize these settings by editing the `Vagrantfile`. \ No newline at end of file diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 00000000..9c3c96f0 --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,56 @@ +# ENV['VAGRANT_NO_PARALLEL'] = 'no' +NODE_ROLES = ["server-0", "server-1", "server-2", "agent-0", "agent-1"] +NODE_BOXES = ['generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004'] +NODE_CPUS = 2 +NODE_MEMORY = 2048 +# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks +NETWORK_PREFIX = "10.10.10" + +def provision(vm, role, node_num) + vm.box = NODE_BOXES[node_num] + vm.hostname = role + # We use a private network because the default IPs are dynamicly assigned + # during provisioning. This makes it impossible to know the server-0 IP when + # provisioning subsequent servers and agents. A private network allows us to + # assign static IPs to each node, and thus provide a known IP for the API endpoint. + node_ip = "#{NETWORK_PREFIX}.#{100+node_num}" + # An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32 + vm.network "private_network", ip: node_ip, netmask: "255.255.255.0" + + vm.provision "ansible", run: 'once' do |ansible| + ansible.compatibility_mode = "2.0" + ansible.playbook = "playbook/site.yml" + ansible.groups = { + "server" => NODE_ROLES.grep(/^server/), + "agent" => NODE_ROLES.grep(/^agent/), + "k3s_cluster:children" => ["server", "agent"], + } + ansible.extra_vars = { + k3s_version: "v1.26.5+k3s1", + api_endpoint: "#{NETWORK_PREFIX}.100", + token: "myvagrant", + # Required to use the private network configured above + extra_server_args: "--node-external-ip #{node_ip} --flannel-iface eth1", + extra_agent_args: "--node-external-ip #{node_ip} --flannel-iface eth1", + } + end +end + +Vagrant.configure("2") do |config| + # Default provider is libvirt, virtualbox is only provided as a backup + config.vm.provider "libvirt" do |v| + v.cpus = NODE_CPUS + v.memory = NODE_MEMORY + end + config.vm.provider "virtualbox" do |v| + v.cpus = NODE_CPUS + v.memory = NODE_MEMORY + end + + NODE_ROLES.each_with_index do |name, i| + config.vm.define name do |node| + provision(node.vm, name, i) + end + end + +end diff --git a/inventory-sample.yml b/inventory-sample.yml index c32a10b1..afeca60d 100644 --- a/inventory-sample.yml +++ b/inventory-sample.yml @@ -9,14 +9,17 @@ k3s_cluster: 192.16.35.12 192.16.35.13 + # Required Vars vars: ansible_port: 22 ansible_user: debian k3s_version: v1.25.5+k3s2 - k3s_server_location: /var/lib/rancher/k3s - systemd_dir: /etc/systemd/system + token: "mytoken" # Use ansible vault if you want to keep it secret api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}" - api_port: 6443 extra_server_args: "" - extra_server_init_args: "" extra_agent_args: "" + + # Optional vars + # api_port: 6443 + # k3s_server_location: /var/lib/rancher/k3s + # systemd_dir: /etc/systemd/system diff --git a/roles/download/tasks/main.yml b/roles/download/tasks/main.yml index ee420672..787440ba 100644 --- a/roles/download/tasks/main.yml +++ b/roles/download/tasks/main.yml @@ -1,38 +1,17 @@ --- -- name: Download k3s binary x64 +- name: Download k3s install script ansible.builtin.get_url: - url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s - checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-amd64.txt + url: https://get.k3s.io/ timeout: 120 - dest: /usr/local/bin/k3s + dest: /usr/local/bin/k3s-install.sh owner: root group: root mode: 0755 - when: ansible_facts.architecture == "x86_64" -- name: Download k3s binary arm64 - ansible.builtin.get_url: - url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-arm64 - checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm64.txt - timeout: 120 - dest: /usr/local/bin/k3s - owner: root - group: root - mode: 0755 - when: - - ( ansible_facts.architecture is search("arm") and - ansible_facts.userspace_bits == "64" ) or - ansible_facts.architecture is search("aarch64") - -- name: Download k3s binary armhf - ansible.builtin.get_url: - url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-armhf - checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm.txt - timeout: 120 - dest: /usr/local/bin/k3s - owner: root - group: root - mode: 0755 - when: - - ansible_facts.architecture is search("arm") - - ansible_facts.userspace_bits == "32" +- name: Download k3s binary + ansible.builtin.command: + cmd: /usr/local/bin/k3s-install.sh + environment: + INSTALL_K3S_SKIP_START: "true" + INSTALL_K3S_VERSION: "{{ k3s_version }}" + changed_when: true diff --git a/roles/k3s/agent/defaults/main.yml b/roles/k3s/agent/defaults/main.yml new file mode 100644 index 00000000..cbcb1e96 --- /dev/null +++ b/roles/k3s/agent/defaults/main.yml @@ -0,0 +1,4 @@ +--- +k3s_server_location: "/var/lib/rancher/k3s" +systemd_dir: "/etc/systemd/system" +api_port: 6443 diff --git a/roles/k3s/agent/tasks/main.yml b/roles/k3s/agent/tasks/main.yml index 8167567c..b89520b5 100644 --- a/roles/k3s/agent/tasks/main.yml +++ b/roles/k3s/agent/tasks/main.yml @@ -1,4 +1,5 @@ --- + - name: Copy K3s service file ansible.builtin.template: src: "k3s-agent.service.j2" @@ -11,5 +12,5 @@ ansible.builtin.systemd: name: k3s-agent daemon_reload: true - state: restarted + state: started enabled: true diff --git a/roles/k3s/agent/templates/k3s-agent.service.j2 b/roles/k3s/agent/templates/k3s-agent.service.j2 index a806bbb9..59261e3e 100644 --- a/roles/k3s/agent/templates/k3s-agent.service.j2 +++ b/roles/k3s/agent/templates/k3s-agent.service.j2 @@ -1,13 +1,17 @@ [Unit] Description=Lightweight Kubernetes Documentation=https://k3s.io +Wants=network-online.target After=network-online.target +[Install] +WantedBy=multi-user.target + [Service] Type=notify -ExecStartPre=-/sbin/modprobe br_netfilter -ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args }} +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead @@ -19,6 +23,7 @@ TasksMax=infinity TimeoutStartSec=0 Restart=always RestartSec=5s - -[Install] -WantedBy=multi-user.target +ExecStartPre=/bin/sh -xc '! /usr/bin/systemctl is-enabled --quiet nm-cloud-setup.service' +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_agent_args }} \ No newline at end of file diff --git a/roles/k3s/server/defaults/main.yml b/roles/k3s/server/defaults/main.yml new file mode 100644 index 00000000..cbcb1e96 --- /dev/null +++ b/roles/k3s/server/defaults/main.yml @@ -0,0 +1,4 @@ +--- +k3s_server_location: "/var/lib/rancher/k3s" +systemd_dir: "/etc/systemd/system" +api_port: 6443 diff --git a/roles/k3s/server/tasks/main.yml b/roles/k3s/server/tasks/main.yml index 5f6bd2db..63196c27 100644 --- a/roles/k3s/server/tasks/main.yml +++ b/roles/k3s/server/tasks/main.yml @@ -2,50 +2,30 @@ - name: Init first server node when: ansible_hostname == groups['server'][0] block: - - name: Start temporary service for HA cluster - ansible.builtin.command: - cmd: > - systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --cluster-init --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args}} - # noqa: jinja[spacing] - creates: "{{ k3s_server_location }}/server/node-token" - when: groups['server'] | length > 1 - - - name: Start temporary service for single server cluster - ansible.builtin.command: - cmd: > - systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} - creates: "{{ k3s_server_location }}/server/node-token" + - name: Copy K3s service file [Single] when: groups['server'] | length == 1 - - - name: Wait for node-token - ansible.builtin.wait_for: - path: "{{ k3s_server_location }}/server/node-token" - - - name: Register node-token file access mode - ansible.builtin.stat: - path: "{{ k3s_server_location }}/server/node-token" - register: p - - - name: Change file access node-token - ansible.builtin.file: - path: "{{ k3s_server_location }}/server/node-token" - mode: "g+rx,o+rx" - - - name: Read node-token from server - ansible.builtin.slurp: - path: "{{ k3s_server_location }}/server/node-token" - register: node_token - - - name: Store server node-token - ansible.builtin.set_fact: - token: "{{ node_token.content | b64decode | regex_replace('\n', '') }}" - - - name: Restore node-token file access - ansible.builtin.file: - path: "{{ k3s_server_location }}/server/node-token" - mode: "{{ p.stat.mode }}" + ansible.builtin.template: + src: "k3s-single.service.j2" + dest: "{{ systemd_dir }}/k3s.service" + owner: root + group: root + mode: 0644 + + - name: Copy K3s service file [HA] + when: groups['server'] | length > 1 + ansible.builtin.template: + src: "k3s-cluster-init.service.j2" + dest: "{{ systemd_dir }}/k3s.service" + owner: root + group: root + mode: 0644 + + - name: Enable and check K3s service + ansible.builtin.systemd: + name: k3s + daemon_reload: true + state: started + enabled: true - name: Create directory .kube ansible.builtin.file: @@ -54,6 +34,10 @@ owner: "{{ ansible_user }}" mode: "u=rwx,g=rx,o=" + - name: Pause to allow server startup + ansible.builtin.pause: + seconds: 10 + - name: Copy config file to user home directory ansible.builtin.copy: src: /etc/rancher/k3s/k3s.yaml @@ -76,17 +60,28 @@ flat: true - name: Start other server if any and verify status + when: + - (groups['server'] | length) > 1 + - ansible_hostname != groups['server'][0] block: - - name: Init additonal server nodes - ansible.builtin.command: - cmd: > - systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --token "{{ hostvars[groups['server'][0]]['token'] }}" --server https://{{ api_endpoint }}:{{ api_port }} - --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} - creates: "{{ k3s_server_location }}/server/node-token" - when: ansible_hostname != groups['server'][0] + - name: Copy K3s service file [HA] + when: groups['server'] | length > 1 + ansible.builtin.template: + src: "k3s-ha.service.j2" + dest: "{{ systemd_dir }}/k3s.service" + owner: root + group: root + mode: 0644 + + - name: Enable and check K3s service + ansible.builtin.systemd: + name: k3s + daemon_reload: true + state: started + enabled: true - name: Verify that all server nodes joined + when: (groups['server'] | length) > 1 ansible.builtin.command: cmd: > k3s kubectl get nodes -l "node-role.kubernetes.io/control-plane=true" -o=jsonpath="{.items[*].metadata.name}" @@ -95,28 +90,6 @@ retries: 20 delay: 10 changed_when: false - always: - - name: Kill the temporary init service - ansible.builtin.systemd: - name: k3s-init - state: stopped - failed_when: false - -- name: Copy K3s service file - ansible.builtin.template: - src: "k3s-server.service.j2" - dest: "{{ systemd_dir }}/k3s-server.service" - owner: root - group: root - mode: 0644 - register: k3s_service - -- name: Enable and check K3s service - ansible.builtin.systemd: - name: k3s-server - daemon_reload: true - state: restarted - enabled: true - name: Create symlinks ansible.builtin.file: diff --git a/roles/k3s/server/templates/k3s-cluster-init.service.j2 b/roles/k3s/server/templates/k3s-cluster-init.service.j2 new file mode 100644 index 00000000..0b793058 --- /dev/null +++ b/roles/k3s/server/templates/k3s-cluster-init.service.j2 @@ -0,0 +1,28 @@ +[Unit] +Description=Lightweight Kubernetes +Documentation=https://k3s.io +Wants=network-online.target +After=network-online.target + +[Install] +WantedBy=multi-user.target + +[Service] +Type=notify +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env +KillMode=process +Delegate=yes +# Having non-zero Limit*s causes performance problems due to accounting overhead +# in the kernel. We recommend using cgroups to do container-local accounting. +LimitNOFILE=1048576 +LimitNPROC=infinity +LimitCORE=infinity +TasksMax=infinity +TimeoutStartSec=0 +Restart=always +RestartSec=5s +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s server --cluster-init --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }} \ No newline at end of file diff --git a/roles/k3s/server/templates/k3s-ha.service.j2 b/roles/k3s/server/templates/k3s-ha.service.j2 new file mode 100644 index 00000000..bf61e62c --- /dev/null +++ b/roles/k3s/server/templates/k3s-ha.service.j2 @@ -0,0 +1,28 @@ +[Unit] +Description=Lightweight Kubernetes +Documentation=https://k3s.io +Wants=network-online.target +After=network-online.target + +[Install] +WantedBy=multi-user.target + +[Service] +Type=notify +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env +KillMode=process +Delegate=yes +# Having non-zero Limit*s causes performance problems due to accounting overhead +# in the kernel. We recommend using cgroups to do container-local accounting. +LimitNOFILE=1048576 +LimitNPROC=infinity +LimitCORE=infinity +TasksMax=infinity +TimeoutStartSec=0 +Restart=always +RestartSec=5s +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_server_args }} \ No newline at end of file diff --git a/roles/k3s/server/templates/k3s-server.service.j2 b/roles/k3s/server/templates/k3s-single.service.j2 similarity index 75% rename from roles/k3s/server/templates/k3s-server.service.j2 rename to roles/k3s/server/templates/k3s-single.service.j2 index 6e898eb8..86909394 100644 --- a/roles/k3s/server/templates/k3s-server.service.j2 +++ b/roles/k3s/server/templates/k3s-single.service.j2 @@ -1,13 +1,17 @@ [Unit] Description=Lightweight Kubernetes Documentation=https://k3s.io +Wants=network-online.target After=network-online.target +[Install] +WantedBy=multi-user.target + [Service] Type=notify -ExecStartPre=-/sbin/modprobe br_netfilter -ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }} +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead @@ -19,6 +23,6 @@ TasksMax=infinity TimeoutStartSec=0 Restart=always RestartSec=5s - -[Install] -WantedBy=multi-user.target +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }} \ No newline at end of file diff --git a/roles/prereq/tasks/main.yml b/roles/prereq/tasks/main.yml index f6b74161..be707b05 100644 --- a/roles/prereq/tasks/main.yml +++ b/roles/prereq/tasks/main.yml @@ -4,6 +4,12 @@ state: disabled when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat'] +- name: Install Dependent Ubuntu Packages + when: ansible_distribution in ['Ubuntu'] + ansible.builtin.apt: + name: policycoreutils # Used by install script to restore SELinux context + update_cache: yes + - name: Enable IPv4 forwarding ansible.posix.sysctl: name: net.ipv4.ip_forward @@ -53,16 +59,12 @@ validate: 'visudo -cf %s' when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat'] -- name: Make k3s directory - ansible.builtin.file: - path: "/var/lib/rancher" - mode: 0755 - state: directory - - name: Create symlink ansible.builtin.file: dest: /var/lib/rancher/k3s src: "{{ k3s_server_location }}" force: true state: link - when: k3s_server_location != "/var/lib/rancher/k3s" + when: + - k3s_server_location is defined + - k3s_server_location != "/var/lib/rancher/k3s" diff --git a/roles/raspberrypi/tasks/main.yml b/roles/raspberrypi/tasks/main.yml index 53515071..0681f92b 100644 --- a/roles/raspberrypi/tasks/main.yml +++ b/roles/raspberrypi/tasks/main.yml @@ -34,7 +34,7 @@ ansible_facts.lsb.description|default("") is match("Debian") ) - name: Set detected_distribution to ArchLinux (ARM64) - set_fact: + ansible.builtin.set_fact: detected_distribution: Archlinux when: - ansible_facts.architecture is search("aarch64") diff --git a/roles/raspberrypi/tasks/prereq/Archlinux.yml b/roles/raspberrypi/tasks/prereq/Archlinux.yml index 8251abb9..367f44db 100644 --- a/roles/raspberrypi/tasks/prereq/Archlinux.yml +++ b/roles/raspberrypi/tasks/prereq/Archlinux.yml @@ -1,14 +1,15 @@ --- - name: Enable cgroup via boot commandline if not already enabled for Archlinux - lineinfile: + ansible.builtin.lineinfile: path: /boot/boot.txt search_string: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" line: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory register: kernel_cmdline_cgroup - name: Create - shell: ./mkscr + ansible.builtin.command: ./mkscr args: chdir: /boot notify: reboot - when: kernel_cmdline_cgroup.changed + changed_when: false + when: kernel_cmdline_cgroup.changed # noqa: no-handler