From 0cd9ffc17f6e3a22b2001b03cc5ba59cf7df3db0 Mon Sep 17 00:00:00 2001 From: Derek Nola Date: Wed, 8 Nov 2023 13:41:50 -0800 Subject: [PATCH 1/5] Use K3s install script instead of direct download Signed-off-by: Derek Nola --- roles/download/tasks/main.yml | 41 +++++++++-------------------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/roles/download/tasks/main.yml b/roles/download/tasks/main.yml index ee420672..787440ba 100644 --- a/roles/download/tasks/main.yml +++ b/roles/download/tasks/main.yml @@ -1,38 +1,17 @@ --- -- name: Download k3s binary x64 +- name: Download k3s install script ansible.builtin.get_url: - url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s - checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-amd64.txt + url: https://get.k3s.io/ timeout: 120 - dest: /usr/local/bin/k3s + dest: /usr/local/bin/k3s-install.sh owner: root group: root mode: 0755 - when: ansible_facts.architecture == "x86_64" -- name: Download k3s binary arm64 - ansible.builtin.get_url: - url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-arm64 - checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm64.txt - timeout: 120 - dest: /usr/local/bin/k3s - owner: root - group: root - mode: 0755 - when: - - ( ansible_facts.architecture is search("arm") and - ansible_facts.userspace_bits == "64" ) or - ansible_facts.architecture is search("aarch64") - -- name: Download k3s binary armhf - ansible.builtin.get_url: - url: https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/k3s-armhf - checksum: sha256:https://github.com/k3s-io/k3s/releases/download/{{ k3s_version }}/sha256sum-arm.txt - timeout: 120 - dest: /usr/local/bin/k3s - owner: root - group: root - mode: 0755 - when: - - ansible_facts.architecture is search("arm") - - ansible_facts.userspace_bits == "32" +- name: Download k3s binary + ansible.builtin.command: + cmd: /usr/local/bin/k3s-install.sh + environment: + INSTALL_K3S_SKIP_START: "true" + INSTALL_K3S_VERSION: "{{ k3s_version }}" + changed_when: true From 9ecdc933ca279f1a4243a203e0432be6c7d9ef22 Mon Sep 17 00:00:00 2001 From: Derek Nola Date: Wed, 8 Nov 2023 13:42:11 -0800 Subject: [PATCH 2/5] Add Vagrantfile for local testing Signed-off-by: Derek Nola --- .ansible-lint | 5 ++ Vagrantfile | 56 ++++++++++++++++++++ inventory-sample.yml | 7 ++- roles/k3s/agent/defaults/main.yml | 3 ++ roles/k3s/agent/tasks/main.yml | 1 + roles/k3s/server/defaults/main.yml | 3 ++ roles/k3s/server/tasks/main.yml | 1 + roles/prereq/tasks/main.yml | 10 +++- roles/raspberrypi/tasks/main.yml | 2 +- roles/raspberrypi/tasks/prereq/Archlinux.yml | 7 +-- 10 files changed, 88 insertions(+), 7 deletions(-) create mode 100644 .ansible-lint create mode 100644 Vagrantfile create mode 100644 roles/k3s/agent/defaults/main.yml create mode 100644 roles/k3s/server/defaults/main.yml diff --git a/.ansible-lint b/.ansible-lint new file mode 100644 index 00000000..bc5c821b --- /dev/null +++ b/.ansible-lint @@ -0,0 +1,5 @@ +--- +warn_list: + - var-naming[no-role-prefix] + - yaml[comments-indentation] + - yaml[line-length] diff --git a/Vagrantfile b/Vagrantfile new file mode 100644 index 00000000..795daa8a --- /dev/null +++ b/Vagrantfile @@ -0,0 +1,56 @@ +# ENV['VAGRANT_NO_PARALLEL'] = 'no' +NODE_ROLES = ["server-0", "server-1", "server-2", "agent-0", "agent-1"] +NODE_BOXES = ['generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004', 'generic/ubuntu2004'] +NODE_CPUS = 2 +NODE_MEMORY = 2048 +# Virtualbox >= 6.1.28 require `/etc/vbox/network.conf` for expanded private networks +NETWORK_PREFIX = "10.10.10" + +def provision(vm, role, node_num) + vm.box = NODE_BOXES[node_num] + vm.hostname = role + # An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32 + node_ip = "#{NETWORK_PREFIX}.#{100+node_num}" + vm.network "private_network", ip: node_ip, netmask: "255.255.255.0" + + vm.provision "ansible", run: 'once' do |ansible| + ansible.compatibility_mode = "2.0" + ansible.verbose = "vv" + ansible.playbook = "playbook/site.yml" + ansible.groups = { + "server" => NODE_ROLES.grep(/^server/), + "agent" => NODE_ROLES.grep(/^agent/), + "k3s_cluster:children" => ["server", "agent"], + } + ansible.extra_vars = { + k3s_version: "v1.26.5+k3s1", + api_endpoint: "#{NETWORK_PREFIX}.100", + api_port: 6443, + extra_server_args: "", + extra_server_init_args: "", + extra_agent_args: "", + } + end + +end + +Vagrant.configure("2") do |config| + # Default provider is libvirt, virtualbox is only provided as a backup + config.vm.provider "libvirt" do |v| + v.cpus = NODE_CPUS + v.memory = NODE_MEMORY + end + config.vm.provider "virtualbox" do |v| + v.cpus = NODE_CPUS + v.memory = NODE_MEMORY + end + + # Must iterate on the index, vagrant does not understand iterating + # over the node roles themselves + NODE_ROLES.each_with_index do |name, i| + config.vm.define name do |node| + provision(node.vm, name, i) + end + end + +end diff --git a/inventory-sample.yml b/inventory-sample.yml index c32a10b1..cf46422b 100644 --- a/inventory-sample.yml +++ b/inventory-sample.yml @@ -9,14 +9,17 @@ k3s_cluster: 192.16.35.12 192.16.35.13 + # Required Vars vars: ansible_port: 22 ansible_user: debian k3s_version: v1.25.5+k3s2 - k3s_server_location: /var/lib/rancher/k3s - systemd_dir: /etc/systemd/system api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}" api_port: 6443 extra_server_args: "" extra_server_init_args: "" extra_agent_args: "" + + # Optional vars + # k3s_server_location: /var/lib/rancher/k3s + # systemd_dir: /etc/systemd/system diff --git a/roles/k3s/agent/defaults/main.yml b/roles/k3s/agent/defaults/main.yml new file mode 100644 index 00000000..e0b678ef --- /dev/null +++ b/roles/k3s/agent/defaults/main.yml @@ -0,0 +1,3 @@ +--- +k3s_server_location: "/var/lib/rancher/k3s" +systemd_dir: "/etc/systemd/system" diff --git a/roles/k3s/agent/tasks/main.yml b/roles/k3s/agent/tasks/main.yml index 8167567c..2ab6b662 100644 --- a/roles/k3s/agent/tasks/main.yml +++ b/roles/k3s/agent/tasks/main.yml @@ -1,4 +1,5 @@ --- + - name: Copy K3s service file ansible.builtin.template: src: "k3s-agent.service.j2" diff --git a/roles/k3s/server/defaults/main.yml b/roles/k3s/server/defaults/main.yml new file mode 100644 index 00000000..e0b678ef --- /dev/null +++ b/roles/k3s/server/defaults/main.yml @@ -0,0 +1,3 @@ +--- +k3s_server_location: "/var/lib/rancher/k3s" +systemd_dir: "/etc/systemd/system" diff --git a/roles/k3s/server/tasks/main.yml b/roles/k3s/server/tasks/main.yml index 5f6bd2db..80283781 100644 --- a/roles/k3s/server/tasks/main.yml +++ b/roles/k3s/server/tasks/main.yml @@ -87,6 +87,7 @@ when: ansible_hostname != groups['server'][0] - name: Verify that all server nodes joined + when: (groups['server'] | length) > 1 ansible.builtin.command: cmd: > k3s kubectl get nodes -l "node-role.kubernetes.io/control-plane=true" -o=jsonpath="{.items[*].metadata.name}" diff --git a/roles/prereq/tasks/main.yml b/roles/prereq/tasks/main.yml index f6b74161..7a7932da 100644 --- a/roles/prereq/tasks/main.yml +++ b/roles/prereq/tasks/main.yml @@ -4,6 +4,12 @@ state: disabled when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat'] +- name: Install Dependent Ubuntu Packages + when: ansible_distribution in ['Ubuntu'] + ansible.builtin.apt: + name: policycoreutils # Used by install script to restore SELinux context + update_cache: yes + - name: Enable IPv4 forwarding ansible.posix.sysctl: name: net.ipv4.ip_forward @@ -65,4 +71,6 @@ src: "{{ k3s_server_location }}" force: true state: link - when: k3s_server_location != "/var/lib/rancher/k3s" + when: + - k3s_server_location is defined + - k3s_server_location != "/var/lib/rancher/k3s" diff --git a/roles/raspberrypi/tasks/main.yml b/roles/raspberrypi/tasks/main.yml index 53515071..0681f92b 100644 --- a/roles/raspberrypi/tasks/main.yml +++ b/roles/raspberrypi/tasks/main.yml @@ -34,7 +34,7 @@ ansible_facts.lsb.description|default("") is match("Debian") ) - name: Set detected_distribution to ArchLinux (ARM64) - set_fact: + ansible.builtin.set_fact: detected_distribution: Archlinux when: - ansible_facts.architecture is search("aarch64") diff --git a/roles/raspberrypi/tasks/prereq/Archlinux.yml b/roles/raspberrypi/tasks/prereq/Archlinux.yml index 8251abb9..367f44db 100644 --- a/roles/raspberrypi/tasks/prereq/Archlinux.yml +++ b/roles/raspberrypi/tasks/prereq/Archlinux.yml @@ -1,14 +1,15 @@ --- - name: Enable cgroup via boot commandline if not already enabled for Archlinux - lineinfile: + ansible.builtin.lineinfile: path: /boot/boot.txt search_string: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" line: setenv bootargs console=ttyS1,115200 console=tty0 root=PARTUUID=${uuid} rw rootwait smsc95xx.macaddr="${usbethaddr}" cgroup_enable=cpuset cgroup_memory=1 cgroup_enable=memory register: kernel_cmdline_cgroup - name: Create - shell: ./mkscr + ansible.builtin.command: ./mkscr args: chdir: /boot notify: reboot - when: kernel_cmdline_cgroup.changed + changed_when: false + when: kernel_cmdline_cgroup.changed # noqa: no-handler From 565c9fa0495a5d82893c87c113c416785b47b205 Mon Sep 17 00:00:00 2001 From: Derek Nola Date: Wed, 8 Nov 2023 14:00:52 -0800 Subject: [PATCH 3/5] Enforce use of a defined token. Simplifies additional server and agent joining process. Signed-off-by: Derek Nola --- Vagrantfile | 16 +++++----- inventory-sample.yml | 4 +-- roles/k3s/agent/defaults/main.yml | 1 + .../k3s/agent/templates/k3s-agent.service.j2 | 2 +- roles/k3s/server/defaults/main.yml | 1 + roles/k3s/server/tasks/main.yml | 32 ++----------------- .../server/templates/k3s-server.service.j2 | 2 +- 7 files changed, 16 insertions(+), 42 deletions(-) diff --git a/Vagrantfile b/Vagrantfile index 795daa8a..80cee412 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -9,13 +9,16 @@ NETWORK_PREFIX = "10.10.10" def provision(vm, role, node_num) vm.box = NODE_BOXES[node_num] vm.hostname = role - # An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32 + # We use a private network because the default IPs are dynamicly assigned + # during provisioning. This makes it impossible to know the server-0 IP when + # provisioning subsequent servers and agents. A private network allows us to + # assign static IPs to each node, and thus provide a known IP for the API endpoint. node_ip = "#{NETWORK_PREFIX}.#{100+node_num}" + # An expanded netmask is required to allow VM<-->VM communication, virtualbox defaults to /32 vm.network "private_network", ip: node_ip, netmask: "255.255.255.0" vm.provision "ansible", run: 'once' do |ansible| ansible.compatibility_mode = "2.0" - ansible.verbose = "vv" ansible.playbook = "playbook/site.yml" ansible.groups = { "server" => NODE_ROLES.grep(/^server/), @@ -25,13 +28,12 @@ def provision(vm, role, node_num) ansible.extra_vars = { k3s_version: "v1.26.5+k3s1", api_endpoint: "#{NETWORK_PREFIX}.100", - api_port: 6443, - extra_server_args: "", - extra_server_init_args: "", + token: "myyagrant", + # Required to use the private network configured above + extra_server_args: "--node-external-ip #{node_ip} --flannel-iface eth1", extra_agent_args: "", } end - end Vagrant.configure("2") do |config| @@ -45,8 +47,6 @@ Vagrant.configure("2") do |config| v.memory = NODE_MEMORY end - # Must iterate on the index, vagrant does not understand iterating - # over the node roles themselves NODE_ROLES.each_with_index do |name, i| config.vm.define name do |node| provision(node.vm, name, i) diff --git a/inventory-sample.yml b/inventory-sample.yml index cf46422b..afeca60d 100644 --- a/inventory-sample.yml +++ b/inventory-sample.yml @@ -14,12 +14,12 @@ k3s_cluster: ansible_port: 22 ansible_user: debian k3s_version: v1.25.5+k3s2 + token: "mytoken" # Use ansible vault if you want to keep it secret api_endpoint: "{{ hostvars[groups['server'][0]]['ansible_host'] | default(groups['server'][0]) }}" - api_port: 6443 extra_server_args: "" - extra_server_init_args: "" extra_agent_args: "" # Optional vars + # api_port: 6443 # k3s_server_location: /var/lib/rancher/k3s # systemd_dir: /etc/systemd/system diff --git a/roles/k3s/agent/defaults/main.yml b/roles/k3s/agent/defaults/main.yml index e0b678ef..cbcb1e96 100644 --- a/roles/k3s/agent/defaults/main.yml +++ b/roles/k3s/agent/defaults/main.yml @@ -1,3 +1,4 @@ --- k3s_server_location: "/var/lib/rancher/k3s" systemd_dir: "/etc/systemd/system" +api_port: 6443 diff --git a/roles/k3s/agent/templates/k3s-agent.service.j2 b/roles/k3s/agent/templates/k3s-agent.service.j2 index a806bbb9..e0157b29 100644 --- a/roles/k3s/agent/templates/k3s-agent.service.j2 +++ b/roles/k3s/agent/templates/k3s-agent.service.j2 @@ -7,7 +7,7 @@ After=network-online.target Type=notify ExecStartPre=-/sbin/modprobe br_netfilter ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ hostvars[groups['server'][0]]['token'] }} {{ extra_agent_args }} +ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_agent_args }} KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead diff --git a/roles/k3s/server/defaults/main.yml b/roles/k3s/server/defaults/main.yml index e0b678ef..cbcb1e96 100644 --- a/roles/k3s/server/defaults/main.yml +++ b/roles/k3s/server/defaults/main.yml @@ -1,3 +1,4 @@ --- k3s_server_location: "/var/lib/rancher/k3s" systemd_dir: "/etc/systemd/system" +api_port: 6443 diff --git a/roles/k3s/server/tasks/main.yml b/roles/k3s/server/tasks/main.yml index 80283781..2d8b0b05 100644 --- a/roles/k3s/server/tasks/main.yml +++ b/roles/k3s/server/tasks/main.yml @@ -6,7 +6,7 @@ ansible.builtin.command: cmd: > systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --cluster-init --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args}} + --cluster-init --token {{ token }} --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args}} # noqa: jinja[spacing] creates: "{{ k3s_server_location }}/server/node-token" when: groups['server'] | length > 1 @@ -15,38 +15,10 @@ ansible.builtin.command: cmd: > systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} + --token {{ token }} --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} creates: "{{ k3s_server_location }}/server/node-token" when: groups['server'] | length == 1 - - name: Wait for node-token - ansible.builtin.wait_for: - path: "{{ k3s_server_location }}/server/node-token" - - - name: Register node-token file access mode - ansible.builtin.stat: - path: "{{ k3s_server_location }}/server/node-token" - register: p - - - name: Change file access node-token - ansible.builtin.file: - path: "{{ k3s_server_location }}/server/node-token" - mode: "g+rx,o+rx" - - - name: Read node-token from server - ansible.builtin.slurp: - path: "{{ k3s_server_location }}/server/node-token" - register: node_token - - - name: Store server node-token - ansible.builtin.set_fact: - token: "{{ node_token.content | b64decode | regex_replace('\n', '') }}" - - - name: Restore node-token file access - ansible.builtin.file: - path: "{{ k3s_server_location }}/server/node-token" - mode: "{{ p.stat.mode }}" - - name: Create directory .kube ansible.builtin.file: path: ~{{ ansible_user }}/.kube diff --git a/roles/k3s/server/templates/k3s-server.service.j2 b/roles/k3s/server/templates/k3s-server.service.j2 index 6e898eb8..92a1220e 100644 --- a/roles/k3s/server/templates/k3s-server.service.j2 +++ b/roles/k3s/server/templates/k3s-server.service.j2 @@ -7,7 +7,7 @@ After=network-online.target Type=notify ExecStartPre=-/sbin/modprobe br_netfilter ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }} +ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }} --token {{ token }} KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead From 20afd4f19ed1eee80a3c72a7be8402e96f8172f9 Mon Sep 17 00:00:00 2001 From: Derek Nola Date: Wed, 8 Nov 2023 15:16:28 -0800 Subject: [PATCH 4/5] Simplify K3s service startup for HA Signed-off-by: Derek Nola --- Vagrantfile | 4 +- roles/k3s/agent/tasks/main.yml | 2 +- .../k3s/agent/templates/k3s-agent.service.j2 | 17 ++-- roles/k3s/server/tasks/main.yml | 88 +++++++++---------- .../templates/k3s-cluster-init.service.j2 | 28 ++++++ roles/k3s/server/templates/k3s-ha.service.j2 | 28 ++++++ ...erver.service.j2 => k3s-single.service.j2} | 16 ++-- roles/prereq/tasks/main.yml | 6 -- 8 files changed, 124 insertions(+), 65 deletions(-) create mode 100644 roles/k3s/server/templates/k3s-cluster-init.service.j2 create mode 100644 roles/k3s/server/templates/k3s-ha.service.j2 rename roles/k3s/server/templates/{k3s-server.service.j2 => k3s-single.service.j2} (75%) diff --git a/Vagrantfile b/Vagrantfile index 80cee412..9c3c96f0 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -28,10 +28,10 @@ def provision(vm, role, node_num) ansible.extra_vars = { k3s_version: "v1.26.5+k3s1", api_endpoint: "#{NETWORK_PREFIX}.100", - token: "myyagrant", + token: "myvagrant", # Required to use the private network configured above extra_server_args: "--node-external-ip #{node_ip} --flannel-iface eth1", - extra_agent_args: "", + extra_agent_args: "--node-external-ip #{node_ip} --flannel-iface eth1", } end end diff --git a/roles/k3s/agent/tasks/main.yml b/roles/k3s/agent/tasks/main.yml index 2ab6b662..b89520b5 100644 --- a/roles/k3s/agent/tasks/main.yml +++ b/roles/k3s/agent/tasks/main.yml @@ -12,5 +12,5 @@ ansible.builtin.systemd: name: k3s-agent daemon_reload: true - state: restarted + state: started enabled: true diff --git a/roles/k3s/agent/templates/k3s-agent.service.j2 b/roles/k3s/agent/templates/k3s-agent.service.j2 index e0157b29..59261e3e 100644 --- a/roles/k3s/agent/templates/k3s-agent.service.j2 +++ b/roles/k3s/agent/templates/k3s-agent.service.j2 @@ -1,13 +1,17 @@ [Unit] Description=Lightweight Kubernetes Documentation=https://k3s.io +Wants=network-online.target After=network-online.target +[Install] +WantedBy=multi-user.target + [Service] Type=notify -ExecStartPre=-/sbin/modprobe br_netfilter -ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_agent_args }} +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead @@ -19,6 +23,7 @@ TasksMax=infinity TimeoutStartSec=0 Restart=always RestartSec=5s - -[Install] -WantedBy=multi-user.target +ExecStartPre=/bin/sh -xc '! /usr/bin/systemctl is-enabled --quiet nm-cloud-setup.service' +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s agent --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_agent_args }} \ No newline at end of file diff --git a/roles/k3s/server/tasks/main.yml b/roles/k3s/server/tasks/main.yml index 2d8b0b05..63196c27 100644 --- a/roles/k3s/server/tasks/main.yml +++ b/roles/k3s/server/tasks/main.yml @@ -2,22 +2,30 @@ - name: Init first server node when: ansible_hostname == groups['server'][0] block: - - name: Start temporary service for HA cluster - ansible.builtin.command: - cmd: > - systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --cluster-init --token {{ token }} --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args}} - # noqa: jinja[spacing] - creates: "{{ k3s_server_location }}/server/node-token" + - name: Copy K3s service file [Single] + when: groups['server'] | length == 1 + ansible.builtin.template: + src: "k3s-single.service.j2" + dest: "{{ systemd_dir }}/k3s.service" + owner: root + group: root + mode: 0644 + + - name: Copy K3s service file [HA] when: groups['server'] | length > 1 + ansible.builtin.template: + src: "k3s-cluster-init.service.j2" + dest: "{{ systemd_dir }}/k3s.service" + owner: root + group: root + mode: 0644 - - name: Start temporary service for single server cluster - ansible.builtin.command: - cmd: > - systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --token {{ token }} --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} - creates: "{{ k3s_server_location }}/server/node-token" - when: groups['server'] | length == 1 + - name: Enable and check K3s service + ansible.builtin.systemd: + name: k3s + daemon_reload: true + state: started + enabled: true - name: Create directory .kube ansible.builtin.file: @@ -26,6 +34,10 @@ owner: "{{ ansible_user }}" mode: "u=rwx,g=rx,o=" + - name: Pause to allow server startup + ansible.builtin.pause: + seconds: 10 + - name: Copy config file to user home directory ansible.builtin.copy: src: /etc/rancher/k3s/k3s.yaml @@ -48,15 +60,25 @@ flat: true - name: Start other server if any and verify status + when: + - (groups['server'] | length) > 1 + - ansible_hostname != groups['server'][0] block: - - name: Init additonal server nodes - ansible.builtin.command: - cmd: > - systemd-run -p RestartSec=2 -p Restart=on-failure --unit=k3s-init k3s server - --token "{{ hostvars[groups['server'][0]]['token'] }}" --server https://{{ api_endpoint }}:{{ api_port }} - --tls-san {{ api_endpoint }} --data-dir {{ k3s_server_location }} {{ extra_server_args }} - creates: "{{ k3s_server_location }}/server/node-token" - when: ansible_hostname != groups['server'][0] + - name: Copy K3s service file [HA] + when: groups['server'] | length > 1 + ansible.builtin.template: + src: "k3s-ha.service.j2" + dest: "{{ systemd_dir }}/k3s.service" + owner: root + group: root + mode: 0644 + + - name: Enable and check K3s service + ansible.builtin.systemd: + name: k3s + daemon_reload: true + state: started + enabled: true - name: Verify that all server nodes joined when: (groups['server'] | length) > 1 @@ -68,28 +90,6 @@ retries: 20 delay: 10 changed_when: false - always: - - name: Kill the temporary init service - ansible.builtin.systemd: - name: k3s-init - state: stopped - failed_when: false - -- name: Copy K3s service file - ansible.builtin.template: - src: "k3s-server.service.j2" - dest: "{{ systemd_dir }}/k3s-server.service" - owner: root - group: root - mode: 0644 - register: k3s_service - -- name: Enable and check K3s service - ansible.builtin.systemd: - name: k3s-server - daemon_reload: true - state: restarted - enabled: true - name: Create symlinks ansible.builtin.file: diff --git a/roles/k3s/server/templates/k3s-cluster-init.service.j2 b/roles/k3s/server/templates/k3s-cluster-init.service.j2 new file mode 100644 index 00000000..0b793058 --- /dev/null +++ b/roles/k3s/server/templates/k3s-cluster-init.service.j2 @@ -0,0 +1,28 @@ +[Unit] +Description=Lightweight Kubernetes +Documentation=https://k3s.io +Wants=network-online.target +After=network-online.target + +[Install] +WantedBy=multi-user.target + +[Service] +Type=notify +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env +KillMode=process +Delegate=yes +# Having non-zero Limit*s causes performance problems due to accounting overhead +# in the kernel. We recommend using cgroups to do container-local accounting. +LimitNOFILE=1048576 +LimitNPROC=infinity +LimitCORE=infinity +TasksMax=infinity +TimeoutStartSec=0 +Restart=always +RestartSec=5s +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s server --cluster-init --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }} \ No newline at end of file diff --git a/roles/k3s/server/templates/k3s-ha.service.j2 b/roles/k3s/server/templates/k3s-ha.service.j2 new file mode 100644 index 00000000..bf61e62c --- /dev/null +++ b/roles/k3s/server/templates/k3s-ha.service.j2 @@ -0,0 +1,28 @@ +[Unit] +Description=Lightweight Kubernetes +Documentation=https://k3s.io +Wants=network-online.target +After=network-online.target + +[Install] +WantedBy=multi-user.target + +[Service] +Type=notify +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env +KillMode=process +Delegate=yes +# Having non-zero Limit*s causes performance problems due to accounting overhead +# in the kernel. We recommend using cgroups to do container-local accounting. +LimitNOFILE=1048576 +LimitNPROC=infinity +LimitCORE=infinity +TasksMax=infinity +TimeoutStartSec=0 +Restart=always +RestartSec=5s +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --server https://{{ api_endpoint }}:{{ api_port }} --token {{ token }} {{ extra_server_args }} \ No newline at end of file diff --git a/roles/k3s/server/templates/k3s-server.service.j2 b/roles/k3s/server/templates/k3s-single.service.j2 similarity index 75% rename from roles/k3s/server/templates/k3s-server.service.j2 rename to roles/k3s/server/templates/k3s-single.service.j2 index 92a1220e..86909394 100644 --- a/roles/k3s/server/templates/k3s-server.service.j2 +++ b/roles/k3s/server/templates/k3s-single.service.j2 @@ -1,13 +1,17 @@ [Unit] Description=Lightweight Kubernetes Documentation=https://k3s.io +Wants=network-online.target After=network-online.target +[Install] +WantedBy=multi-user.target + [Service] Type=notify -ExecStartPre=-/sbin/modprobe br_netfilter -ExecStartPre=-/sbin/modprobe overlay -ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} {{ extra_server_args }} --token {{ token }} +EnvironmentFile=-/etc/default/%N +EnvironmentFile=-/etc/sysconfig/%N +EnvironmentFile=-/etc/systemd/system/k3s.service.env KillMode=process Delegate=yes # Having non-zero Limit*s causes performance problems due to accounting overhead @@ -19,6 +23,6 @@ TasksMax=infinity TimeoutStartSec=0 Restart=always RestartSec=5s - -[Install] -WantedBy=multi-user.target +ExecStartPre=-/sbin/modprobe br_netfilter +ExecStartPre=-/sbin/modprobe overlay +ExecStart=/usr/local/bin/k3s server --data-dir {{ k3s_server_location }} --token {{ token }} {{ extra_server_args }} \ No newline at end of file diff --git a/roles/prereq/tasks/main.yml b/roles/prereq/tasks/main.yml index 7a7932da..be707b05 100644 --- a/roles/prereq/tasks/main.yml +++ b/roles/prereq/tasks/main.yml @@ -59,12 +59,6 @@ validate: 'visudo -cf %s' when: ansible_distribution in ['CentOS', 'Red Hat Enterprise Linux','RedHat'] -- name: Make k3s directory - ansible.builtin.file: - path: "/var/lib/rancher" - mode: 0755 - state: directory - - name: Create symlink ansible.builtin.file: dest: /var/lib/rancher/k3s From 08df1deff7126b839e82dfc904c93220536df1c0 Mon Sep 17 00:00:00 2001 From: Derek Nola Date: Wed, 8 Nov 2023 15:19:13 -0800 Subject: [PATCH 5/5] Update readme with local testing info, clarify kubectl Signed-off-by: Derek Nola --- README.md | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 99f76205..e372f559 100644 --- a/README.md +++ b/README.md @@ -22,7 +22,7 @@ on processor architecture: ## System requirements Deployment environment must have Ansible 2.4.0+ -Master and nodes must have passwordless SSH access +Server and agent nodes must have passwordless SSH access ## Usage @@ -48,7 +48,7 @@ k3s_cluster: If needed, you can also edit `vars` section at the bottom to match your environment. If multiple hosts are in the server group the playbook will automatically setup k3s in HA mode with embedded etcd. -An odd number of server nodes is recommended (3,5,7). Read the offical documentation below for more information and options. +An odd number of server nodes is required (3,5,7). Read the offical documentation below for more information and options. https://rancher.com/docs/k3s/latest/en/installation/ha-embedded/ Using a loadbalancer or VIP as the API endpoint is preferred but not covered here. @@ -61,8 +61,19 @@ ansible-playbook playbook/site.yml -i inventory.yml ## Kubeconfig -To confirm access to your **Kubernetes** cluster use the following: +After successful bringup, the kubeconfig of the cluster is copied to the control-node and set as default (`~/.kube/config`). +Assuming you have [kubectl](https://kubernetes.io/docs/tasks/tools/#kubectl) installed, you to confirm access to your **Kubernetes** cluster use the following: ```bash kubectl get nodes ``` + +## Local Testing + +A Vagrantfile is provided that provision a 5 nodes cluster using LibVirt or Virtualbox and Vagrant. To use it: + +```bash +vagrant up +``` + +By default, each node is given 2 cores and 2GB of RAM and runs Ubuntu 20.04. You can customize these settings by editing the `Vagrantfile`. \ No newline at end of file