feat: Commit initial

This commit is contained in:
Tellsanguis 2025-11-07 09:33:38 +01:00
commit 40dc0f4184
43 changed files with 1990 additions and 0 deletions

16
ansible/ansible.cfg Normal file
View file

@ -0,0 +1,16 @@
[defaults]
inventory = localhost,
host_key_checking = False
retry_files_enabled = False
stdout_callback = yaml
callback_whitelist = profile_tasks, timer
gathering = smart
fact_caching = jsonfile
fact_caching_connection = /tmp/ansible_fact_cache
fact_caching_timeout = 3600
[privilege_escalation]
become = True
become_method = sudo
become_user = root
become_ask_pass = False

View file

@ -0,0 +1,62 @@
---
# Global variables for all nodes
# K3s Configuration
k3s_version: "v1.28.5+k3s1"
k3s_install_url: "https://get.k3s.io"
# K3s Server Configuration
k3s_server_1_ip: "10.100.20.10"
k3s_server_2_ip: "10.100.20.20"
k3s_witness_ip: "10.100.20.30"
# K3s token (shared between servers)
# In production, this should be stored in a vault
k3s_token_file: "/etc/rancher/k3s/token"
# Network Configuration
pod_cidr: "10.42.0.0/16"
service_cidr: "10.43.0.0/16"
cluster_dns: "10.43.0.10"
# System Configuration
timezone: "Europe/Paris"
swap_enabled: false
# Unattended Upgrades Configuration
unattended_upgrades_enabled: true
unattended_upgrades_automatic_reboot: true
unattended_upgrades_automatic_reboot_with_users: false
# Reboot schedule (staggered to maintain availability)
reboot_schedule:
k3s-server-1: "02:00"
k3s-server-2: "04:00"
etcd-witness: "06:00"
# FluxCD Configuration
flux_version: "v2.2.0"
flux_namespace: "flux-system"
# System packages to install on all nodes
common_packages:
- curl
- wget
- git
- vim
- htop
- net-tools
- ca-certificates
- gnupg
- lsb-release
- python3
- python3-pip
# Kernel parameters for K3s
sysctl_config:
net.bridge.bridge-nf-call-iptables: 1
net.bridge.bridge-nf-call-ip6tables: 1
net.ipv4.ip_forward: 1
vm.swappiness: 0
fs.inotify.max_user_instances: 8192
fs.inotify.max_user_watches: 524288

View file

@ -0,0 +1,6 @@
---
# Default variables for common role
timezone: "Europe/Paris"
swap_enabled: false
unattended_upgrades_enabled: true

View file

@ -0,0 +1,11 @@
---
# Handlers for common role
- name: restart unattended-upgrades
systemd:
name: unattended-upgrades
state: restarted
- name: reload systemd
systemd:
daemon_reload: yes

View file

@ -0,0 +1,95 @@
---
# Common configuration for all nodes
- name: Set timezone
timezone:
name: "{{ timezone }}"
- name: Install common packages
apt:
name: "{{ common_packages }}"
state: present
update_cache: yes
- name: Disable swap
shell: |
swapoff -a
sed -i '/swap/d' /etc/fstab
when: not swap_enabled
changed_when: false
- name: Load kernel modules
modprobe:
name: "{{ item }}"
state: present
loop:
- overlay
- br_netfilter
- name: Configure kernel modules to load at boot
copy:
dest: /etc/modules-load.d/k3s.conf
content: |
overlay
br_netfilter
mode: '0644'
- name: Configure sysctl parameters
sysctl:
name: "{{ item.key }}"
value: "{{ item.value }}"
state: present
reload: yes
sysctl_file: /etc/sysctl.d/99-k3s.conf
loop: "{{ sysctl_config | dict2items }}"
- name: Configure unattended-upgrades
include_tasks: unattended-upgrades.yml
when: unattended_upgrades_enabled
- name: Create k3s directories
file:
path: "{{ item }}"
state: directory
mode: '0755'
loop:
- /etc/rancher/k3s
- /var/lib/rancher/k3s
- name: Configure firewall rules (ufw)
block:
- name: Install ufw
apt:
name: ufw
state: present
- name: Allow SSH
ufw:
rule: allow
port: '22'
proto: tcp
- name: Allow K3s API
ufw:
rule: allow
port: '6443'
proto: tcp
- name: Allow K3s etcd
ufw:
rule: allow
port: '2379:2380'
proto: tcp
- name: Allow K3s metrics
ufw:
rule: allow
port: '10250'
proto: tcp
- name: Enable ufw
ufw:
state: enabled
policy: deny
direction: incoming
when: false # Disabled by default, enable if needed

View file

@ -0,0 +1,40 @@
---
# Configure unattended-upgrades for automatic OS updates
- name: Install unattended-upgrades package
apt:
name:
- unattended-upgrades
- apt-listchanges
state: present
- name: Get hostname
set_fact:
current_hostname: "{{ ansible_hostname }}"
- name: Set reboot time based on hostname
set_fact:
reboot_time: "{{ reboot_schedule[current_hostname] | default('03:00') }}"
- name: Configure unattended-upgrades
template:
src: 50unattended-upgrades.j2
dest: /etc/apt/apt.conf.d/50unattended-upgrades
mode: '0644'
notify: restart unattended-upgrades
- name: Enable automatic updates
copy:
dest: /etc/apt/apt.conf.d/20auto-upgrades
content: |
APT::Periodic::Update-Package-Lists "1";
APT::Periodic::Download-Upgradeable-Packages "1";
APT::Periodic::AutocleanInterval "7";
APT::Periodic::Unattended-Upgrade "1";
mode: '0644'
- name: Start and enable unattended-upgrades service
systemd:
name: unattended-upgrades
state: started
enabled: yes

View file

@ -0,0 +1,47 @@
// Unattended-Upgrade configuration
// Managed by Ansible - do not edit manually
Unattended-Upgrade::Allowed-Origins {
"${distro_id}:${distro_codename}";
"${distro_id}:${distro_codename}-security";
"${distro_id}ESMApps:${distro_codename}-apps-security";
"${distro_id}ESM:${distro_codename}-infra-security";
};
// List of packages to not update
Unattended-Upgrade::Package-Blacklist {
};
// Automatically reboot if needed
Unattended-Upgrade::Automatic-Reboot "{{ unattended_upgrades_automatic_reboot | lower }}";
// Reboot time (staggered per node)
Unattended-Upgrade::Automatic-Reboot-Time "{{ reboot_time }}";
// Automatically reboot even if users are logged in
Unattended-Upgrade::Automatic-Reboot-WithUsers "{{ unattended_upgrades_automatic_reboot_with_users | lower }}";
// Remove unused kernel packages
Unattended-Upgrade::Remove-Unused-Kernel-Packages "true";
// Remove unused dependencies
Unattended-Upgrade::Remove-Unused-Dependencies "true";
// Send email on errors
Unattended-Upgrade::Mail "";
// Always send email
Unattended-Upgrade::MailReport "on-change";
// Update package lists
Unattended-Upgrade::Update-Days {"Mon";"Tue";"Wed";"Thu";"Fri";"Sat";"Sun";};
// Automatically fix dpkg interruptions
Dpkg::Options {
"--force-confdef";
"--force-confold";
};
// Logging
Unattended-Upgrade::SyslogEnable "true";
Unattended-Upgrade::SyslogFacility "daemon";

View file

@ -0,0 +1,6 @@
---
# Default variables for etcd-witness role
k3s_version: "v1.28.5+k3s1"
k3s_install_url: "https://get.k3s.io"
k3s_token_file: "/etc/rancher/k3s/token"

View file

@ -0,0 +1,35 @@
---
# etcd witness node configuration
# This node participates in etcd quorum but does not run K8s workloads
- name: Check if K3s is already installed
stat:
path: /usr/local/bin/k3s
register: k3s_binary
- name: Get K3s token from first server
set_fact:
k3s_token: "{{ lookup('file', k3s_token_file, errors='ignore') | default('PLACEHOLDER') }}"
- name: Install K3s as server (witness mode)
shell: |
curl -sfL {{ k3s_install_url }} | INSTALL_K3S_VERSION="{{ k3s_version }}" sh -s - server \
--server https://{{ k3s_server_1_ip }}:6443 \
--token {{ k3s_token }} \
--disable-apiserver \
--disable-controller-manager \
--disable-scheduler \
--node-ip {{ ansible_default_ipv4.address }}
when: not k3s_binary.stat.exists
environment:
INSTALL_K3S_SKIP_START: "false"
- name: Enable and start k3s service
systemd:
name: k3s
state: started
enabled: yes
- name: Display witness node information
debug:
msg: "etcd witness node configured at {{ ansible_default_ipv4.address }}"

View file

@ -0,0 +1,7 @@
---
# Default variables for k3s-server role
k3s_version: "v1.28.5+k3s1"
k3s_install_url: "https://get.k3s.io"
flux_version: "v2.2.0"
flux_namespace: "flux-system"

View file

@ -0,0 +1,32 @@
#!/bin/bash
# K3s pre-reboot script
# Drains the node before system reboot to migrate workloads gracefully
set -e
# Only run if k3s is active
if systemctl is-active --quiet k3s; then
NODE_NAME=$(hostname)
echo "$(date): Starting pre-reboot drain for node $NODE_NAME" | logger -t k3s-pre-reboot
# Set KUBECONFIG
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
# Drain the node (migrate pods to other nodes)
/usr/local/bin/k3s kubectl drain "$NODE_NAME" \
--ignore-daemonsets \
--delete-emptydir-data \
--force \
--timeout=300s 2>&1 | logger -t k3s-pre-reboot
if [ $? -eq 0 ]; then
echo "$(date): Node $NODE_NAME drained successfully" | logger -t k3s-pre-reboot
else
echo "$(date): Warning - Node drain failed or timed out" | logger -t k3s-pre-reboot
fi
else
echo "$(date): k3s service not active, skipping drain" | logger -t k3s-pre-reboot
fi
exit 0

View file

@ -0,0 +1,11 @@
---
# Handlers for k3s-server role
- name: restart k3s
systemd:
name: k3s
state: restarted
- name: reload systemd
systemd:
daemon_reload: yes

View file

@ -0,0 +1,47 @@
---
# Install and configure FluxCD
- name: Check if flux is already installed
command: k3s kubectl get namespace {{ flux_namespace }}
register: flux_installed
changed_when: false
failed_when: false
- name: Download Flux CLI
get_url:
url: "https://github.com/fluxcd/flux2/releases/download/{{ flux_version }}/flux_{{ flux_version | replace('v', '') }}_linux_amd64.tar.gz"
dest: /tmp/flux.tar.gz
mode: '0644'
when: flux_installed.rc != 0
- name: Extract Flux CLI
unarchive:
src: /tmp/flux.tar.gz
dest: /usr/local/bin
remote_src: yes
creates: /usr/local/bin/flux
when: flux_installed.rc != 0
- name: Install FluxCD in cluster
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
/usr/local/bin/flux install --namespace={{ flux_namespace }}
when: flux_installed.rc != 0
register: flux_install_result
changed_when: "'installed' in flux_install_result.stdout"
- name: Wait for FluxCD to be ready
shell: |
export KUBECONFIG=/etc/rancher/k3s/k3s.yaml
/usr/local/bin/flux check
register: flux_check
until: flux_check.rc == 0
retries: 30
delay: 10
changed_when: false
when: flux_installed.rc != 0
- name: Display FluxCD installation status
debug:
msg: "FluxCD installed successfully. Configure GitRepository in kubernetes/flux-system/"
when: flux_installed.rc != 0

View file

@ -0,0 +1,117 @@
---
# K3s server installation and configuration
- name: Check if K3s is already installed
stat:
path: /usr/local/bin/k3s
register: k3s_binary
- name: Get installed K3s version
command: k3s --version
register: installed_version
changed_when: false
failed_when: false
when: k3s_binary.stat.exists
- name: Determine if this is the first server
set_fact:
is_first_server: "{{ ansible_default_ipv4.address == k3s_server_1_ip }}"
- name: Install K3s on first server (cluster-init)
shell: |
curl -sfL {{ k3s_install_url }} | INSTALL_K3S_VERSION="{{ k3s_version }}" sh -s - server \
--cluster-init \
--tls-san {{ k3s_server_1_ip }} \
--tls-san {{ k3s_server_2_ip }} \
--write-kubeconfig-mode 644 \
--disable traefik \
--node-ip {{ ansible_default_ipv4.address }}
when:
- is_first_server
- not k3s_binary.stat.exists or (k3s_version not in installed_version.stdout)
environment:
INSTALL_K3S_SKIP_START: "false"
- name: Wait for first server to be ready
wait_for:
host: "{{ k3s_server_1_ip }}"
port: 6443
delay: 10
timeout: 300
when: is_first_server
- name: Get K3s token from first server
slurp:
src: /var/lib/rancher/k3s/server/node-token
register: k3s_token_encoded
when: is_first_server
run_once: true
- name: Save K3s token
set_fact:
k3s_token: "{{ k3s_token_encoded.content | b64decode | trim }}"
when: is_first_server
- name: Install K3s on second server (join cluster)
shell: |
curl -sfL {{ k3s_install_url }} | INSTALL_K3S_VERSION="{{ k3s_version }}" sh -s - server \
--server https://{{ k3s_server_1_ip }}:6443 \
--token {{ k3s_token | default('PLACEHOLDER') }} \
--tls-san {{ k3s_server_2_ip }} \
--write-kubeconfig-mode 644 \
--disable traefik \
--node-ip {{ ansible_default_ipv4.address }}
when:
- not is_first_server
- not k3s_binary.stat.exists or (k3s_version not in installed_version.stdout)
- name: Enable and start k3s service
systemd:
name: k3s
state: started
enabled: yes
- name: Wait for K3s to be ready
command: k3s kubectl get nodes
register: kubectl_result
until: kubectl_result.rc == 0
retries: 30
delay: 10
changed_when: false
- name: Create pre-reboot script
copy:
src: k3s-pre-reboot.sh
dest: /usr/local/bin/k3s-pre-reboot.sh
mode: '0755'
- name: Create systemd service for pre-reboot drain
copy:
dest: /etc/systemd/system/k3s-pre-reboot.service
content: |
[Unit]
Description=Drain k3s node before reboot
Before=reboot.target
Before=shutdown.target
DefaultDependencies=no
[Service]
Type=oneshot
ExecStart=/usr/local/bin/k3s-pre-reboot.sh
TimeoutStartSec=300
[Install]
WantedBy=reboot.target
WantedBy=shutdown.target
mode: '0644'
notify: reload systemd
- name: Enable pre-reboot service
systemd:
name: k3s-pre-reboot
enabled: yes
daemon_reload: yes
- name: Install FluxCD (only on first server)
include_tasks: flux.yml
when: is_first_server

45
ansible/site.yml Normal file
View file

@ -0,0 +1,45 @@
---
# Main playbook for K3s GitOps infrastructure
# This playbook is executed by ansible-pull on each VM
- name: Configure K3s Infrastructure
hosts: localhost
connection: local
become: true
vars:
# Read node role from file created by cloud-init
node_role: "{{ lookup('file', '/etc/node-role', errors='ignore') | default('undefined') }}"
pre_tasks:
- name: Display node information
debug:
msg: "Configuring node {{ ansible_hostname }} with role {{ node_role }}"
- name: Validate node role
assert:
that:
- node_role in ['server', 'witness']
fail_msg: "Invalid node role: {{ node_role }}. Expected 'server' or 'witness'"
- name: Update apt cache
apt:
update_cache: yes
cache_valid_time: 3600
roles:
# Common role applies to all nodes
- role: common
# K3s server role (server + worker)
- role: k3s-server
when: node_role == 'server'
# etcd witness role (etcd only, no k8s workloads)
- role: etcd-witness
when: node_role == 'witness'
post_tasks:
- name: Display completion message
debug:
msg: "Configuration complete for {{ ansible_hostname }} ({{ node_role }})"