diff --git a/.forgejo/workflows/deploy.yml b/.forgejo/workflows/deploy.yml index 45f0716..6d71344 100644 --- a/.forgejo/workflows/deploy.yml +++ b/.forgejo/workflows/deploy.yml @@ -4,9 +4,10 @@ on: push: branches: - main - workflow_dispatch: + workflow_dispatch: # Allow manual trigger jobs: + # Run Terraform validation first ci-terraform: name: Terraform Validation runs-on: self-hosted @@ -62,11 +63,11 @@ jobs: TF_VAR_k3s_network_bridge: "k3s" TF_VAR_k3s_gateway: "10.100.20.1" TF_VAR_k3s_dns: '["10.100.20.1", "1.1.1.1"]' - TF_VAR_k3s_token: ${{ secrets.K3S_TOKEN }} - TF_VAR_k3s_server_1_config: '{ ip = "10.100.20.10/24", cores = 6, memory = 12288, disk_size = "40G" }' - TF_VAR_k3s_server_2_config: '{ ip = "10.100.20.20/24", cores = 6, memory = 12288, disk_size = "40G" }' + TF_VAR_k3s_server_1_config: '{ ip = "10.100.20.10/24", cores = 6, memory = 12288, disk_size = "100G" }' + TF_VAR_k3s_server_2_config: '{ ip = "10.100.20.20/24", cores = 6, memory = 12288, disk_size = "100G" }' TF_VAR_etcd_witness_config: '{ ip = "10.100.20.30/24", cores = 2, memory = 2048, disk_size = "20G" }' + # Deploy infrastructure in parallel deploy-pve1: name: Deploy on pve1 runs-on: self-hosted @@ -80,6 +81,15 @@ jobs: if ! command -v tofu &> /dev/null; then curl -fsSL https://get.opentofu.org/install-opentofu.sh | bash -s -- --install-method standalone --opentofu-version 1.10.7 fi + - name: Setup Python + run: | + sudo apt-get update && sudo apt-get install -y python3 + - name: Prepare LINSTOR resources for pve1 + run: | + # Copie le script sur le noeud et exécute avec lecture automatique depuis Terraform + scp scripts/manage_linstor_resources.py root@acemagician:/tmp/ + scp -r terraform root@acemagician:/tmp/ + ssh root@acemagician "python3 /tmp/manage_linstor_resources.py --terraform-dir /tmp/terraform --verbose" - name: Terraform Apply on pve1 run: | cd terraform/pve1 @@ -90,15 +100,14 @@ jobs: forgejo_token = "${{ secrets.GIT_TOKEN }}" forgejo_repo_url = "${{ secrets.GIT_REPO_URL }}" k3s_version = "v1.28.5+k3s1" - k3s_token = "${{ secrets.K3S_TOKEN }}" ubuntu_template = "ubuntu-2404-cloudinit" storage_pool = "linstor_storage" - k3s_server_1_storage_pool = "local-nvme" + k3s_server_1_storage_pool = "linstor_storage" snippets_storage = "local" k3s_network_bridge = "k3s" k3s_gateway = "10.100.20.1" k3s_dns = ["10.100.20.1", "1.1.1.1"] - k3s_server_1_config = { ip = "10.100.20.10/24", cores = 6, memory = 12288, disk_size = "40G" } + k3s_server_1_config = { ip = "10.100.20.10/24", cores = 6, memory = 12288, disk_size = "100G" } EOF tofu init tofu apply -auto-approve @@ -116,6 +125,15 @@ jobs: if ! command -v tofu &> /dev/null; then curl -fsSL https://get.opentofu.org/install-opentofu.sh | bash -s -- --install-method standalone --opentofu-version 1.10.7 fi + - name: Setup Python + run: | + sudo apt-get update && sudo apt-get install -y python3 + - name: Prepare LINSTOR resources for pve2 + run: | + # Copie le script sur le noeud et exécute avec lecture automatique depuis Terraform + scp scripts/manage_linstor_resources.py root@elitedesk:/tmp/ + scp -r terraform root@elitedesk:/tmp/ + ssh root@elitedesk "python3 /tmp/manage_linstor_resources.py --terraform-dir /tmp/terraform --verbose" - name: Terraform Apply on pve2 run: | cd terraform/pve2 @@ -126,15 +144,14 @@ jobs: forgejo_token = "${{ secrets.GIT_TOKEN }}" forgejo_repo_url = "${{ secrets.GIT_REPO_URL }}" k3s_version = "v1.28.5+k3s1" - k3s_token = "${{ secrets.K3S_TOKEN }}" ubuntu_template = "ubuntu-2404-cloudinit" storage_pool = "linstor_storage" - k3s_server_2_storage_pool = "local-nvme" + k3s_server_2_storage_pool = "linstor_storage" snippets_storage = "local" k3s_network_bridge = "k3s" k3s_gateway = "10.100.20.1" k3s_dns = ["10.100.20.1", "1.1.1.1"] - k3s_server_2_config = { ip = "10.100.20.20/24", cores = 6, memory = 12288, disk_size = "40G" } + k3s_server_2_config = { ip = "10.100.20.20/24", cores = 6, memory = 12288, disk_size = "100G" } EOF tofu init tofu apply -auto-approve @@ -162,7 +179,6 @@ jobs: forgejo_token = "${{ secrets.GIT_TOKEN }}" forgejo_repo_url = "${{ secrets.GIT_REPO_URL }}" k3s_version = "v1.28.5+k3s1" - k3s_token = "${{ secrets.K3S_TOKEN }}" ubuntu_template = "ubuntu-2404-cloudinit" storage_pool = "linstor_storage" etcd_witness_storage_pool = "local-lvm" @@ -194,12 +210,13 @@ jobs: - name: Wait for K3s cluster run: | echo "Waiting for K3s cluster to be ready..." - sleep 300 - - name: Check cluster status + sleep 300 # Wait 5 minutes for ansible-pull to configure K3s + - name: Check cluster status (optional) run: | echo "Cluster validation completed" continue-on-error: true + # Notify on completion notify: name: Deployment Notification runs-on: self-hosted diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml index e766079..7ac4183 100644 --- a/ansible/group_vars/all.yml +++ b/ansible/group_vars/all.yml @@ -1,32 +1,44 @@ --- +# Global variables for all nodes + +# K3s Configuration k3s_version: "v1.28.5+k3s1" k3s_install_url: "https://get.k3s.io" +# K3s Server Configuration k3s_server_1_ip: "10.100.20.10" k3s_server_2_ip: "10.100.20.20" k3s_witness_ip: "10.100.20.30" +# K3s token (shared between servers) +# In production, this should be stored in a vault k3s_token_file: "/etc/rancher/k3s/token" +# Network Configuration pod_cidr: "10.42.0.0/16" service_cidr: "10.43.0.0/16" cluster_dns: "10.43.0.10" +# System Configuration timezone: "Europe/Paris" swap_enabled: false +# Unattended Upgrades Configuration unattended_upgrades_enabled: true unattended_upgrades_automatic_reboot: true unattended_upgrades_automatic_reboot_with_users: false +# Reboot schedule (staggered to maintain availability) reboot_schedule: k3s-server-1: "02:00" k3s-server-2: "04:00" etcd-witness: "06:00" +# FluxCD Configuration flux_version: "v2.2.0" flux_namespace: "flux-system" +# System packages to install on all nodes common_packages: - curl - wget @@ -40,6 +52,7 @@ common_packages: - python3 - python3-pip +# Kernel parameters for K3s sysctl_config: net.bridge.bridge-nf-call-iptables: 1 net.bridge.bridge-nf-call-ip6tables: 1 diff --git a/ansible/roles/etcd-witness/tasks/main.yml b/ansible/roles/etcd-witness/tasks/main.yml index b46e079..efd1a89 100644 --- a/ansible/roles/etcd-witness/tasks/main.yml +++ b/ansible/roles/etcd-witness/tasks/main.yml @@ -1,19 +1,19 @@ --- +# etcd witness node configuration +# This node participates in etcd quorum but does not run K8s workloads + - name: Check if K3s is already installed stat: path: /usr/local/bin/k3s register: k3s_binary -- name: Load K3s token from environment +- name: Get K3s token from first server set_fact: - k3s_token: "{{ lookup('env', 'K3S_TOKEN') }}" - -- name: Wait for first server API - wait_for: - host: "{{ k3s_server_1_ip }}" - port: 6443 - delay: 60 - timeout: 900 + k3s_token: >- + {{ + lookup('file', k3s_token_file, errors='ignore') + | default('PLACEHOLDER') + }} - name: Install K3s as server (witness mode) shell: > diff --git a/ansible/roles/k3s-server/files/k3s-pre-reboot.sh b/ansible/roles/k3s-server/files/k3s-pre-reboot.sh index aa359a0..e7538db 100644 --- a/ansible/roles/k3s-server/files/k3s-pre-reboot.sh +++ b/ansible/roles/k3s-server/files/k3s-pre-reboot.sh @@ -1,13 +1,19 @@ #!/bin/bash +# K3s pre-reboot script +# Drains the node before system reboot to migrate workloads gracefully + set -e +# Only run if k3s is active if systemctl is-active --quiet k3s; then NODE_NAME=$(hostname) echo "$(date): Starting pre-reboot drain for node $NODE_NAME" | logger -t k3s-pre-reboot + # Set KUBECONFIG export KUBECONFIG=/etc/rancher/k3s/k3s.yaml + # Drain the node (migrate pods to other nodes) /usr/local/bin/k3s kubectl drain "$NODE_NAME" \ --ignore-daemonsets \ --delete-emptydir-data \ diff --git a/ansible/roles/k3s-server/tasks/flux.yml b/ansible/roles/k3s-server/tasks/flux.yml index ba8e84a..f7dd1a7 100644 --- a/ansible/roles/k3s-server/tasks/flux.yml +++ b/ansible/roles/k3s-server/tasks/flux.yml @@ -1,4 +1,6 @@ --- +# Install and configure FluxCD + - name: Check if flux is already installed command: k3s kubectl get namespace {{ flux_namespace }} register: flux_installed @@ -42,73 +44,9 @@ changed_when: false when: flux_installed.rc != 0 -- name: Load Forgejo token from environment - set_fact: - forgejo_token: "{{ lookup('env', 'FORGEJO_TOKEN') }}" - forgejo_repo_url: "{{ lookup('env', 'REPO_URL') }}" - -- name: Create Forgejo secret for FluxCD - shell: | - export KUBECONFIG=/etc/rancher/k3s/k3s.yaml - k3s kubectl create secret generic forgejo-auth \ - --namespace={{ flux_namespace }} \ - --from-literal=username=git \ - --from-literal=password={{ forgejo_token }} \ - --dry-run=client -o yaml | k3s kubectl apply -f - - when: flux_installed.rc != 0 - -- name: Create GitRepository manifest - copy: - dest: /tmp/gitrepository.yaml - content: | - apiVersion: source.toolkit.fluxcd.io/v1 - kind: GitRepository - metadata: - name: infra - namespace: {{ flux_namespace }} - spec: - interval: 1m - url: {{ forgejo_repo_url }} - ref: - branch: main - secretRef: - name: forgejo-auth - mode: '0644' - when: flux_installed.rc != 0 - -- name: Apply GitRepository - shell: | - export KUBECONFIG=/etc/rancher/k3s/k3s.yaml - k3s kubectl apply -f /tmp/gitrepository.yaml - when: flux_installed.rc != 0 - -- name: Create Kustomization manifest - copy: - dest: /tmp/kustomization.yaml - content: | - apiVersion: kustomize.toolkit.fluxcd.io/v1 - kind: Kustomization - metadata: - name: apps - namespace: {{ flux_namespace }} - spec: - interval: 1m - sourceRef: - kind: GitRepository - name: infra - path: ./k8s - prune: true - wait: true - mode: '0644' - when: flux_installed.rc != 0 - -- name: Apply Kustomization - shell: | - export KUBECONFIG=/etc/rancher/k3s/k3s.yaml - k3s kubectl apply -f /tmp/kustomization.yaml - when: flux_installed.rc != 0 - - name: Display FluxCD installation status debug: - msg: "FluxCD configured to sync from {{ forgejo_repo_url }}" + msg: >- + FluxCD installed successfully. + Configure GitRepository in kubernetes/flux-system/ when: flux_installed.rc != 0 diff --git a/ansible/roles/k3s-server/tasks/main.yml b/ansible/roles/k3s-server/tasks/main.yml index 83a50c6..4ddc3d4 100644 --- a/ansible/roles/k3s-server/tasks/main.yml +++ b/ansible/roles/k3s-server/tasks/main.yml @@ -1,4 +1,6 @@ --- +# K3s server installation and configuration + - name: Check if K3s is already installed stat: path: /usr/local/bin/k3s @@ -15,15 +17,10 @@ set_fact: is_first_server: "{{ ansible_default_ipv4.address == k3s_server_1_ip }}" -- name: Load K3s token from environment - set_fact: - k3s_token: "{{ lookup('env', 'K3S_TOKEN') }}" - - name: Install K3s on first server (cluster-init) shell: > curl -sfL {{ k3s_install_url }} | INSTALL_K3S_VERSION="{{ k3s_version }}" - K3S_TOKEN="{{ k3s_token }}" sh -s - server --cluster-init --tls-san {{ k3s_server_1_ip }} @@ -47,13 +44,17 @@ timeout: 300 when: is_first_server -- name: Wait for first server API (second server) - wait_for: - host: "{{ k3s_server_1_ip }}" - port: 6443 - delay: 30 - timeout: 600 - when: not is_first_server +- name: Get K3s token from first server + slurp: + src: /var/lib/rancher/k3s/server/node-token + register: k3s_token_encoded + when: is_first_server + run_once: true + +- name: Save K3s token + set_fact: + k3s_token: "{{ k3s_token_encoded.content | b64decode | trim }}" + when: is_first_server - name: Install K3s on second server (join cluster) shell: > @@ -61,7 +62,7 @@ INSTALL_K3S_VERSION="{{ k3s_version }}" sh -s - server --server https://{{ k3s_server_1_ip }}:6443 - --token {{ k3s_token }} + --token {{ k3s_token | default('PLACEHOLDER') }} --tls-san {{ k3s_server_2_ip }} --write-kubeconfig-mode 644 --disable traefik diff --git a/ansible/site.yml b/ansible/site.yml index 92859c0..51ceac8 100644 --- a/ansible/site.yml +++ b/ansible/site.yml @@ -1,10 +1,14 @@ --- +# Main playbook for K3s GitOps infrastructure +# This playbook is executed by ansible-pull on each VM + - name: Configure K3s Infrastructure hosts: localhost connection: local become: true vars: + # Read node role from file created by cloud-init node_role: >- {{ lookup('file', '/etc/node-role', errors='ignore') @@ -30,11 +34,14 @@ cache_valid_time: 3600 roles: + # Common role applies to all nodes - role: common + # K3s server role (server + worker) - role: k3s-server when: node_role == 'server' + # etcd witness role (etcd only, no k8s workloads) - role: etcd-witness when: node_role == 'witness' diff --git a/k8s/hello-world/deployment.yaml b/k8s/hello-world/deployment.yaml deleted file mode 100644 index ea03ea8..0000000 --- a/k8s/hello-world/deployment.yaml +++ /dev/null @@ -1,37 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: hello-world - namespace: demo -spec: - replicas: 3 - selector: - matchLabels: - app: hello-world - template: - metadata: - labels: - app: hello-world - spec: - containers: - - name: hello-world - image: bashofmann/rancher-demo:1.0.0 - imagePullPolicy: Always - resources: - requests: - memory: "12Mi" - cpu: "2m" - ports: - - containerPort: 8080 - name: web - env: - - name: COW_COLOR - value: purple - readinessProbe: - httpGet: - path: / - port: web - livenessProbe: - httpGet: - path: / - port: web diff --git a/k8s/hello-world/namespace.yaml b/k8s/hello-world/namespace.yaml deleted file mode 100644 index 18434a6..0000000 --- a/k8s/hello-world/namespace.yaml +++ /dev/null @@ -1,4 +0,0 @@ -apiVersion: v1 -kind: Namespace -metadata: - name: demo diff --git a/k8s/hello-world/service.yaml b/k8s/hello-world/service.yaml deleted file mode 100644 index 56ec09c..0000000 --- a/k8s/hello-world/service.yaml +++ /dev/null @@ -1,13 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - name: hello-world-service - namespace: demo -spec: - type: LoadBalancer - selector: - app: hello-world - ports: - - protocol: TCP - port: 8080 - targetPort: 8080 diff --git a/scripts/manage_linstor_resources.py b/scripts/manage_linstor_resources.py new file mode 100644 index 0000000..52eb66f --- /dev/null +++ b/scripts/manage_linstor_resources.py @@ -0,0 +1,442 @@ +#!/usr/bin/env python3 +""" +Script de Gestion des Ressources LINSTOR pour Proxmox + +Auteur: BENE Maël +Version: 1.0 +Date: 2025-11-27 + +Description: + Ce script gère automatiquement les ressources LINSTOR pour les VMs Proxmox. + Il assure que les ressources existent avec la taille correcte avant le déploiement. + +Fonctionnalités: + - Vérifie l'existence d'une ressource + - Crée la ressource si elle n'existe pas + - Redimensionne la ressource si la taille ne correspond pas (uniquement augmentation) + - Opérations idempotentes (peut être exécuté plusieurs fois en toute sécurité) +""" + +import subprocess +import json +import sys +import argparse +import re +import os +from pathlib import Path + + +# Configuration des ressources par défaut +# Ces valeurs peuvent être modifiées selon vos besoins +RESOURCE_CONFIG = { + 'vm-1000-disk-0': { + 'node': 'acemagician', + 'size': '100G', + 'storage_pool': 'pve-storage' + }, + 'vm-1001-disk-0': { + 'node': 'elitedesk', + 'size': '100G', + 'storage_pool': 'pve-storage' + }, + 'vm-1002-disk-0': { + 'node': 'thinkpad', + 'size': '20G', + 'storage_pool': 'local-lvm' + } +} + + +class LinstorManager: + """Gestionnaire des ressources LINSTOR""" + + def __init__(self, verbose=False): + self.verbose = verbose + + def log(self, message): + """Affiche un message de log si le mode verbose est activé""" + if self.verbose: + print(f"[INFO] {message}") + + def run_command(self, command): + """Exécute une commande shell et retourne la sortie""" + self.log(f"Exécution: {' '.join(command)}") + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True + ) + return result.stdout.strip() + except subprocess.CalledProcessError as e: + print(f"[ERREUR] Échec de la commande: {' '.join(command)}", file=sys.stderr) + print(f"[ERREUR] Code de sortie: {e.returncode}", file=sys.stderr) + print(f"[ERREUR] Stdout: {e.stdout}", file=sys.stderr) + print(f"[ERREUR] Stderr: {e.stderr}", file=sys.stderr) + return None + + def parse_size(self, size_str): + """Convertit une chaîne de taille (ex: '100G', '1024M') en octets""" + size_str = size_str.strip().upper() + + # Correspond au nombre et à l'unité + match = re.match(r'^(\d+(?:\.\d+)?)\s*([KMGT]?)I?B?$', size_str) + if not match: + raise ValueError(f"Format de taille invalide: {size_str}") + + number, unit = match.groups() + number = float(number) + + multipliers = { + '': 1, + 'K': 1024, + 'M': 1024**2, + 'G': 1024**3, + 'T': 1024**4, + } + + return int(number * multipliers.get(unit, 1)) + + def format_size(self, bytes_value): + """Formate les octets en taille lisible""" + for unit in ['', 'K', 'M', 'G', 'T']: + if bytes_value < 1024.0: + return f"{bytes_value:.0f}{unit}iB" if unit else f"{bytes_value:.0f}B" + bytes_value /= 1024.0 + return f"{bytes_value:.2f}PiB" + + def resource_exists(self, resource_name): + """Vérifie si une définition de ressource LINSTOR existe""" + output = self.run_command(['linstor', 'resource-definition', 'list', '--machine-readable']) + if output is None: + return False + + try: + data = json.loads(output) + for item in data: + if isinstance(item, dict) and item.get('name') == resource_name: + self.log(f"La définition de ressource '{resource_name}' existe") + return True + except json.JSONDecodeError: + self.log("Échec de l'analyse de la sortie resource-definition list") + + return False + + def get_resource_size(self, resource_name): + """Récupère la taille actuelle d'un volume de ressource (en octets)""" + output = self.run_command(['linstor', 'volume-definition', 'list', '--machine-readable']) + if output is None: + return None + + try: + data = json.loads(output) + for item in data: + if isinstance(item, dict): + if item.get('resource_name') == resource_name and item.get('volume_number') == 0: + # La taille est en KiB dans LINSTOR + size_kib = item.get('size_kib', 0) + size_bytes = size_kib * 1024 + self.log(f"Taille actuelle de '{resource_name}': {self.format_size(size_bytes)}") + return size_bytes + except json.JSONDecodeError: + self.log("Échec de l'analyse de la sortie volume-definition list") + + return None + + def get_resource_nodes(self, resource_name): + """Récupère la liste des nœuds où la ressource est déployée""" + output = self.run_command(['linstor', 'resource', 'list', '--machine-readable']) + if output is None: + return [] + + nodes = set() + try: + data = json.loads(output) + for item in data: + if isinstance(item, dict) and item.get('name') == resource_name: + node = item.get('node_name') + if node: + nodes.add(node) + except json.JSONDecodeError: + self.log("Échec de l'analyse de la sortie resource list") + + return list(nodes) + + def create_resource_definition(self, resource_name): + """Crée une définition de ressource LINSTOR""" + self.log(f"Création de la définition de ressource '{resource_name}'") + output = self.run_command(['linstor', 'resource-definition', 'create', resource_name]) + if output is None: + return False + print(f"✓ Définition de ressource '{resource_name}' créée") + return True + + def create_volume_definition(self, resource_name, size): + """Crée une définition de volume LINSTOR""" + self.log(f"Création de la définition de volume pour '{resource_name}' avec taille {size}") + output = self.run_command(['linstor', 'volume-definition', 'create', resource_name, size]) + if output is None: + return False + print(f"✓ Définition de volume créée pour '{resource_name}' avec taille {size}") + return True + + def create_resource(self, node, resource_name, storage_pool): + """Crée une ressource LINSTOR sur un nœud spécifique""" + self.log(f"Création de la ressource '{resource_name}' sur le nœud '{node}' avec le pool de stockage '{storage_pool}'") + output = self.run_command([ + 'linstor', 'resource', 'create', + node, resource_name, + '--storage-pool', storage_pool + ]) + if output is None: + return False + print(f"✓ Ressource '{resource_name}' créée sur le nœud '{node}'") + return True + + def resize_volume(self, resource_name, new_size): + """Redimensionne un volume LINSTOR (uniquement augmentation)""" + self.log(f"Redimensionnement du volume '{resource_name}' à {new_size}") + output = self.run_command([ + 'linstor', 'volume-definition', 'set-size', + resource_name, '0', new_size + ]) + if output is None: + return False + print(f"✓ Volume '{resource_name}' redimensionné à {new_size}") + return True + + def ensure_resource(self, resource_name, node, size, storage_pool): + """ + Assure qu'une ressource LINSTOR existe avec la taille correcte + + Args: + resource_name: Nom de la ressource (ex: 'vm-1000-disk-0') + node: Nom du nœud cible (ex: 'acemagician') + size: Taille désirée (ex: '100G') + storage_pool: Nom du pool de stockage (ex: 'pve-storage') + + Returns: + True si succès, False sinon + """ + print(f"\n=== Gestion de la ressource '{resource_name}' sur le nœud '{node}' ===") + + # Vérifie si la définition de ressource existe + if not self.resource_exists(resource_name): + print(f"La ressource '{resource_name}' n'existe pas. Création...") + + # Crée la définition de ressource + if not self.create_resource_definition(resource_name): + return False + + # Crée la définition de volume + if not self.create_volume_definition(resource_name, size): + return False + + # Crée la ressource sur le nœud + if not self.create_resource(node, resource_name, storage_pool): + return False + + print(f"✓ Ressource '{resource_name}' créée avec succès") + return True + + # La ressource existe - vérifier la taille + print(f"La ressource '{resource_name}' existe déjà. Vérification de la taille...") + current_size = self.get_resource_size(resource_name) + desired_size_bytes = self.parse_size(size) + + if current_size is None: + print(f"[ATTENTION] Impossible de déterminer la taille actuelle de '{resource_name}'") + return False + + if current_size < desired_size_bytes: + print(f"Taille actuelle ({self.format_size(current_size)}) inférieure à la taille désirée ({size})") + print(f"Redimensionnement de la ressource '{resource_name}' à {size}...") + if not self.resize_volume(resource_name, size): + return False + print(f"✓ Ressource '{resource_name}' redimensionnée avec succès") + elif current_size > desired_size_bytes: + print(f"[ATTENTION] Taille actuelle ({self.format_size(current_size)}) supérieure à la taille désirée ({size})") + print(f"[ATTENTION] LINSTOR ne supporte pas la réduction de volumes. Conservation de la taille actuelle.") + else: + print(f"✓ Taille correspondante ({self.format_size(current_size)}). Aucune action nécessaire.") + + # Vérifie si la ressource est déployée sur le bon nœud + deployed_nodes = self.get_resource_nodes(resource_name) + if node not in deployed_nodes: + print(f"Ressource '{resource_name}' non déployée sur le nœud '{node}'. Déploiement...") + if not self.create_resource(node, resource_name, storage_pool): + return False + print(f"✓ Ressource '{resource_name}' déployée avec succès sur le nœud '{node}'") + else: + self.log(f"Ressource '{resource_name}' déjà déployée sur le nœud '{node}'") + + return True + + +def parse_terraform_config(terraform_dir): + """ + Parse les fichiers Terraform pour extraire la configuration des ressources + + Args: + terraform_dir: Chemin vers le répertoire racine Terraform + + Returns: + Dictionnaire de configuration des ressources + """ + config = {} + terraform_path = Path(terraform_dir) + + # Cherche tous les répertoires pve* + for pve_dir in terraform_path.glob('pve*'): + if not pve_dir.is_dir(): + continue + + main_tf = pve_dir / 'main.tf' + if not main_tf.exists(): + continue + + try: + with open(main_tf, 'r') as f: + content = f.read() + + # Extrait VMID + vmid_match = re.search(r'vmid\s*=\s*(\d+)', content) + if not vmid_match: + continue + vmid = vmid_match.group(1) + + # Extrait target_node + node_match = re.search(r'target_node\s*=\s*"([^"]+)"', content) + if not node_match: + continue + node = node_match.group(1) + + # Extrait disk size depuis var reference + size_match = re.search(r'size\s*=\s*var\.(\w+)\.disk_size', content) + if size_match: + var_name = size_match.group(1) + # Cherche la valeur par défaut dans variables.tf ou dans le workflow + # Pour simplifier, on utilise les valeurs par défaut + if 'etcd' in var_name.lower(): + size = '20G' + else: + size = '100G' + else: + size = '100G' + + # Extrait storage pool + storage_match = re.search(r'storage\s*=\s*var\.(\w+)', content) + if storage_match: + storage_var = storage_match.group(1) + if 'etcd' in storage_var.lower(): + storage_pool = 'local-lvm' + else: + storage_pool = 'linstor_storage' + else: + storage_pool = 'linstor_storage' + + resource_name = f"vm-{vmid}-disk-0" + config[resource_name] = { + 'node': node, + 'size': size, + 'storage_pool': storage_pool + } + + except Exception as e: + print(f"[ATTENTION] Erreur lors de la lecture de {main_tf}: {e}", file=sys.stderr) + continue + + return config + + +def main(): + parser = argparse.ArgumentParser( + description='Gestion des ressources LINSTOR pour les VMs Proxmox', + formatter_class=argparse.RawDescriptionHelpFormatter + ) + + parser.add_argument('--resource', help='Nom de la ressource (ex: vm-1000-disk-0)') + parser.add_argument('--node', help='Nom du nœud cible (ex: acemagician)') + parser.add_argument('--size', help='Taille désirée (ex: 100G)') + parser.add_argument('--storage-pool', help='Nom du pool de stockage (ex: pve-storage)') + parser.add_argument('--terraform-dir', help='Chemin vers le répertoire Terraform pour lecture automatique de la config') + parser.add_argument('--all', action='store_true', help='Traiter toutes les ressources configurées') + parser.add_argument('--verbose', '-v', action='store_true', help='Active la sortie détaillée') + + args = parser.parse_args() + + manager = LinstorManager(verbose=args.verbose) + + # Mode 1: Lecture automatique depuis Terraform + if args.terraform_dir: + print("=== Lecture de la configuration depuis les fichiers Terraform ===") + config = parse_terraform_config(args.terraform_dir) + if not config: + print("[ERREUR] Aucune configuration trouvée dans les fichiers Terraform", file=sys.stderr) + sys.exit(1) + + print(f"Configuration trouvée pour {len(config)} ressource(s)") + all_success = True + for resource_name, res_config in config.items(): + success = manager.ensure_resource( + resource_name=resource_name, + node=res_config['node'], + size=res_config['size'], + storage_pool=res_config['storage_pool'] + ) + if not success: + all_success = False + + if all_success: + print("\n✓ Toutes les ressources sont prêtes") + sys.exit(0) + else: + print("\n✗ Certaines ressources ont échoué", file=sys.stderr) + sys.exit(1) + + # Mode 2: Traiter toutes les ressources de la configuration par défaut + elif args.all: + print("=== Traitement de toutes les ressources configurées ===") + all_success = True + for resource_name, config in RESOURCE_CONFIG.items(): + success = manager.ensure_resource( + resource_name=resource_name, + node=config['node'], + size=config['size'], + storage_pool=config['storage_pool'] + ) + if not success: + all_success = False + + if all_success: + print("\n✓ Toutes les ressources sont prêtes") + sys.exit(0) + else: + print("\n✗ Certaines ressources ont échoué", file=sys.stderr) + sys.exit(1) + + # Mode 3: Traiter une ressource spécifique + elif args.resource and args.node and args.size and args.storage_pool: + success = manager.ensure_resource( + resource_name=args.resource, + node=args.node, + size=args.size, + storage_pool=args.storage_pool + ) + + if success: + print(f"\n✓ Ressource '{args.resource}' prête sur le nœud '{args.node}'") + sys.exit(0) + else: + print(f"\n✗ Échec de la gestion de la ressource '{args.resource}'", file=sys.stderr) + sys.exit(1) + + else: + parser.print_help() + print("\n[ERREUR] Arguments insuffisants. Utilisez --terraform-dir, --all, ou spécifiez --resource --node --size --storage-pool", file=sys.stderr) + sys.exit(1) + + +if __name__ == '__main__': + main() diff --git a/snippets/README.md b/snippets/README.md deleted file mode 100644 index 251f895..0000000 --- a/snippets/README.md +++ /dev/null @@ -1,34 +0,0 @@ -# Cloud-Init Snippets pour Proxmox - -## Avant l'upload - -Remplace les placeholders dans chaque fichier YAML : - -- `YOUR_SSH_PUBLIC_KEY` : Ta clé SSH publique -- `YOUR_FORGEJO_REPO_URL` : URL du dépôt Forgejo (ex: https://forgejo.tellserv.fr/Tellsanguis/Homelab.git) -- `YOUR_FORGEJO_TOKEN` : Token Forgejo -- `YOUR_K3S_TOKEN` : Token K3S cluster - -## Upload via interface Proxmox - -### acemagician (k3s-server-1) -1. Proxmox → acemagician → Datacenter → Storage → local -2. Content → Snippets → Upload -3. Upload `cloud-init-k3s-server-1.yaml` - -### elitedesk (k3s-server-2) -1. Proxmox → elitedesk → Datacenter → Storage → local -2. Content → Snippets → Upload -3. Upload `cloud-init-k3s-server-2.yaml` - -### thinkpad (etcd-witness) -1. Proxmox → thinkpad → Datacenter → Storage → local -2. Content → Snippets → Upload -3. Upload `cloud-init-etcd-witness.yaml` - -## Vérification - -Après upload, les fichiers doivent être présents dans : -- `/var/lib/vz/snippets/cloud-init-k3s-server-1.yaml` (acemagician) -- `/var/lib/vz/snippets/cloud-init-k3s-server-2.yaml` (elitedesk) -- `/var/lib/vz/snippets/cloud-init-etcd-witness.yaml` (thinkpad) diff --git a/snippets/cloud-init-etcd-witness.yaml b/snippets/cloud-init-etcd-witness.yaml deleted file mode 100644 index c7b5ccd..0000000 --- a/snippets/cloud-init-etcd-witness.yaml +++ /dev/null @@ -1,50 +0,0 @@ -package_upgrade: true -packages: - - ansible - - git - - curl - - wget - - ca-certificates - - gnupg - - lsb-release -users: - - name: ansible - sudo: ALL=(ALL) NOPASSWD:ALL - shell: /bin/bash - ssh_authorized_keys: - - YOUR_SSH_PUBLIC_KEY - groups: sudo -timezone: Europe/Paris -write_files: - - path: /etc/node-role - content: witness - permissions: "0644" - - path: /etc/ansible-pull.conf - content: | - REPO_URL=YOUR_FORGEJO_REPO_URL - FORGEJO_TOKEN=YOUR_FORGEJO_TOKEN - K3S_VERSION=v1.28.5+k3s1 - K3S_TOKEN=YOUR_K3S_TOKEN - permissions: "0600" - - path: /usr/local/bin/ansible-pull-wrapper.sh - content: | - #!/bin/bash - set -e - source /etc/ansible-pull.conf - export K3S_TOKEN - export FORGEJO_TOKEN - export REPO_URL - WORK_DIR="/var/lib/ansible-local" - mkdir -p $WORK_DIR - cd $WORK_DIR - REPO_WITH_AUTH=$(echo $REPO_URL | sed "s|https://|https://git:$FORGEJO_TOKEN@|") - if [ -d ".git" ]; then - git pull origin main 2>&1 | logger -t ansible-pull - else - git clone $REPO_WITH_AUTH . 2>&1 | logger -t ansible-pull - fi - ansible-playbook ansible/site.yml -i localhost, --connection=local -e "k3s_version=$K3S_VERSION" 2>&1 | logger -t ansible-pull - permissions: "0755" -runcmd: - - echo '*/15 * * * * root /usr/local/bin/ansible-pull-wrapper.sh' > /etc/cron.d/ansible-pull - - sleep 60 && /usr/local/bin/ansible-pull-wrapper.sh & diff --git a/snippets/cloud-init-k3s-server-1.yaml b/snippets/cloud-init-k3s-server-1.yaml deleted file mode 100644 index 4d55fbf..0000000 --- a/snippets/cloud-init-k3s-server-1.yaml +++ /dev/null @@ -1,50 +0,0 @@ -package_upgrade: true -packages: - - ansible - - git - - curl - - wget - - ca-certificates - - gnupg - - lsb-release -users: - - name: ansible - sudo: ALL=(ALL) NOPASSWD:ALL - shell: /bin/bash - ssh_authorized_keys: - - YOUR_SSH_PUBLIC_KEY - groups: sudo -timezone: Europe/Paris -write_files: - - path: /etc/node-role - content: server - permissions: "0644" - - path: /etc/ansible-pull.conf - content: | - REPO_URL=YOUR_FORGEJO_REPO_URL - FORGEJO_TOKEN=YOUR_FORGEJO_TOKEN - K3S_VERSION=v1.28.5+k3s1 - K3S_TOKEN=YOUR_K3S_TOKEN - permissions: "0600" - - path: /usr/local/bin/ansible-pull-wrapper.sh - content: | - #!/bin/bash - set -e - source /etc/ansible-pull.conf - export K3S_TOKEN - export FORGEJO_TOKEN - export REPO_URL - WORK_DIR="/var/lib/ansible-local" - mkdir -p $WORK_DIR - cd $WORK_DIR - REPO_WITH_AUTH=$(echo $REPO_URL | sed "s|https://|https://git:$FORGEJO_TOKEN@|") - if [ -d ".git" ]; then - git pull origin main 2>&1 | logger -t ansible-pull - else - git clone $REPO_WITH_AUTH . 2>&1 | logger -t ansible-pull - fi - ansible-playbook ansible/site.yml -i localhost, --connection=local -e "k3s_version=$K3S_VERSION" 2>&1 | logger -t ansible-pull - permissions: "0755" -runcmd: - - echo '*/15 * * * * root /usr/local/bin/ansible-pull-wrapper.sh' > /etc/cron.d/ansible-pull - - sleep 60 && /usr/local/bin/ansible-pull-wrapper.sh & diff --git a/snippets/cloud-init-k3s-server-2.yaml b/snippets/cloud-init-k3s-server-2.yaml deleted file mode 100644 index 4d55fbf..0000000 --- a/snippets/cloud-init-k3s-server-2.yaml +++ /dev/null @@ -1,50 +0,0 @@ -package_upgrade: true -packages: - - ansible - - git - - curl - - wget - - ca-certificates - - gnupg - - lsb-release -users: - - name: ansible - sudo: ALL=(ALL) NOPASSWD:ALL - shell: /bin/bash - ssh_authorized_keys: - - YOUR_SSH_PUBLIC_KEY - groups: sudo -timezone: Europe/Paris -write_files: - - path: /etc/node-role - content: server - permissions: "0644" - - path: /etc/ansible-pull.conf - content: | - REPO_URL=YOUR_FORGEJO_REPO_URL - FORGEJO_TOKEN=YOUR_FORGEJO_TOKEN - K3S_VERSION=v1.28.5+k3s1 - K3S_TOKEN=YOUR_K3S_TOKEN - permissions: "0600" - - path: /usr/local/bin/ansible-pull-wrapper.sh - content: | - #!/bin/bash - set -e - source /etc/ansible-pull.conf - export K3S_TOKEN - export FORGEJO_TOKEN - export REPO_URL - WORK_DIR="/var/lib/ansible-local" - mkdir -p $WORK_DIR - cd $WORK_DIR - REPO_WITH_AUTH=$(echo $REPO_URL | sed "s|https://|https://git:$FORGEJO_TOKEN@|") - if [ -d ".git" ]; then - git pull origin main 2>&1 | logger -t ansible-pull - else - git clone $REPO_WITH_AUTH . 2>&1 | logger -t ansible-pull - fi - ansible-playbook ansible/site.yml -i localhost, --connection=local -e "k3s_version=$K3S_VERSION" 2>&1 | logger -t ansible-pull - permissions: "0755" -runcmd: - - echo '*/15 * * * * root /usr/local/bin/ansible-pull-wrapper.sh' > /etc/cron.d/ansible-pull - - sleep 60 && /usr/local/bin/ansible-pull-wrapper.sh & diff --git a/terraform/pve1/cloud-init.tf b/terraform/pve1/cloud-init.tf index b5ee9ed..3479f34 100644 --- a/terraform/pve1/cloud-init.tf +++ b/terraform/pve1/cloud-init.tf @@ -27,9 +27,6 @@ locals { #!/bin/bash set -e source /etc/ansible-pull.conf - export K3S_TOKEN - export FORGEJO_TOKEN - export REPO_URL WORK_DIR="/var/lib/ansible-local" mkdir -p $WORK_DIR cd $WORK_DIR @@ -51,7 +48,7 @@ locals { }, { path = "/etc/ansible-pull.conf" - content = "REPO_URL=${var.forgejo_repo_url}\nFORGEJO_TOKEN=${var.forgejo_token}\nK3S_VERSION=${var.k3s_version}\nK3S_TOKEN=${var.k3s_token}" + content = "REPO_URL=${var.forgejo_repo_url}\nFORGEJO_TOKEN=${var.forgejo_token}\nK3S_VERSION=${var.k3s_version}" permissions = "0600" }, { diff --git a/terraform/pve1/main.tf b/terraform/pve1/main.tf index 3700953..bdbc32e 100644 --- a/terraform/pve1/main.tf +++ b/terraform/pve1/main.tf @@ -28,15 +28,26 @@ resource "proxmox_vm_qemu" "k3s_server_1" { clone = var.ubuntu_template full_clone = true + # Gère les VMs existantes + force_create = false + cpu { cores = var.k3s_server_1_config.cores sockets = 1 + type = "host" } memory = var.k3s_server_1_config.memory agent = 1 - boot = "order=scsi0" + bios = "seabios" + boot = "order=scsi0" + + # Utilise Standard VGA pour la compatibilité + vga { + type = "std" + } + scsihw = "virtio-scsi-single" onboot = true @@ -54,18 +65,14 @@ resource "proxmox_vm_qemu" "k3s_server_1" { iothread = true } - disk { - slot = "ide2" - type = "cloudinit" - storage = var.k3s_server_1_storage_pool - } - ipconfig0 = "ip=${var.k3s_server_1_config.ip},gw=${var.k3s_gateway}" cicustom = "user=${var.snippets_storage}:snippets/cloud-init-k3s-server-1.yaml" nameserver = join(" ", var.k3s_dns) lifecycle { - ignore_changes = [network] + ignore_changes = [ + clone + ] } depends_on = [local_file.k3s_server_cloud_init] diff --git a/terraform/pve1/variables.tf b/terraform/pve1/variables.tf index d7ed579..d746444 100644 --- a/terraform/pve1/variables.tf +++ b/terraform/pve1/variables.tf @@ -54,9 +54,9 @@ variable "storage_pool" { } variable "k3s_server_1_storage_pool" { - description = "Storage pool for k3s-server-1 disk (local-nvme for acemagician)" + description = "Storage pool for k3s-server-1 disk (linstor_storage for HA)" type = string - default = "local-nvme" + default = "linstor_storage" } variable "snippets_storage" { @@ -88,9 +88,3 @@ variable "k3s_server_1_config" { disk_size = string }) } - -variable "k3s_token" { - description = "K3s cluster token" - type = string - sensitive = true -} diff --git a/terraform/pve2/cloud-init.tf b/terraform/pve2/cloud-init.tf index 0931fc7..2eab5cb 100644 --- a/terraform/pve2/cloud-init.tf +++ b/terraform/pve2/cloud-init.tf @@ -27,9 +27,6 @@ locals { #!/bin/bash set -e source /etc/ansible-pull.conf - export K3S_TOKEN - export FORGEJO_TOKEN - export REPO_URL WORK_DIR="/var/lib/ansible-local" mkdir -p $WORK_DIR cd $WORK_DIR @@ -51,7 +48,7 @@ locals { }, { path = "/etc/ansible-pull.conf" - content = "REPO_URL=${var.forgejo_repo_url}\nFORGEJO_TOKEN=${var.forgejo_token}\nK3S_VERSION=${var.k3s_version}\nK3S_TOKEN=${var.k3s_token}" + content = "REPO_URL=${var.forgejo_repo_url}\nFORGEJO_TOKEN=${var.forgejo_token}\nK3S_VERSION=${var.k3s_version}" permissions = "0600" }, { diff --git a/terraform/pve2/main.tf b/terraform/pve2/main.tf index 22e973d..861fbc6 100644 --- a/terraform/pve2/main.tf +++ b/terraform/pve2/main.tf @@ -28,15 +28,26 @@ resource "proxmox_vm_qemu" "k3s_server_2" { clone = var.ubuntu_template full_clone = true + # Gère les VMs existantes + force_create = false + cpu { cores = var.k3s_server_2_config.cores sockets = 1 + type = "host" } memory = var.k3s_server_2_config.memory agent = 1 - boot = "order=scsi0" + bios = "seabios" + boot = "order=scsi0" + + # Utilise Standard VGA pour la compatibilité + vga { + type = "std" + } + scsihw = "virtio-scsi-single" onboot = true @@ -54,18 +65,14 @@ resource "proxmox_vm_qemu" "k3s_server_2" { iothread = true } - disk { - slot = "ide2" - type = "cloudinit" - storage = var.k3s_server_2_storage_pool - } - ipconfig0 = "ip=${var.k3s_server_2_config.ip},gw=${var.k3s_gateway}" cicustom = "user=${var.snippets_storage}:snippets/cloud-init-k3s-server-2.yaml" nameserver = join(" ", var.k3s_dns) lifecycle { - ignore_changes = [network] + ignore_changes = [ + clone + ] } depends_on = [local_file.k3s_server_cloud_init] diff --git a/terraform/pve2/variables.tf b/terraform/pve2/variables.tf index 7ba47ea..5983230 100644 --- a/terraform/pve2/variables.tf +++ b/terraform/pve2/variables.tf @@ -54,9 +54,9 @@ variable "storage_pool" { } variable "k3s_server_2_storage_pool" { - description = "Storage pool for k3s-server-2 disk (local-nvme for elitedesk)" + description = "Storage pool for k3s-server-2 disk (linstor_storage for HA)" type = string - default = "local-nvme" + default = "linstor_storage" } variable "snippets_storage" { @@ -88,9 +88,3 @@ variable "k3s_server_2_config" { disk_size = string }) } - -variable "k3s_token" { - description = "K3s cluster token" - type = string - sensitive = true -} diff --git a/terraform/pve3/cloud-init.tf b/terraform/pve3/cloud-init.tf index e61efc4..b9e2036 100644 --- a/terraform/pve3/cloud-init.tf +++ b/terraform/pve3/cloud-init.tf @@ -27,9 +27,6 @@ locals { #!/bin/bash set -e source /etc/ansible-pull.conf - export K3S_TOKEN - export FORGEJO_TOKEN - export REPO_URL WORK_DIR="/var/lib/ansible-local" mkdir -p $WORK_DIR cd $WORK_DIR @@ -51,7 +48,7 @@ locals { }, { path = "/etc/ansible-pull.conf" - content = "REPO_URL=${var.forgejo_repo_url}\nFORGEJO_TOKEN=${var.forgejo_token}\nK3S_VERSION=${var.k3s_version}\nK3S_TOKEN=${var.k3s_token}" + content = "REPO_URL=${var.forgejo_repo_url}\nFORGEJO_TOKEN=${var.forgejo_token}\nK3S_VERSION=${var.k3s_version}" permissions = "0600" }, { diff --git a/terraform/pve3/main.tf b/terraform/pve3/main.tf index f9ce1c5..e2425b4 100644 --- a/terraform/pve3/main.tf +++ b/terraform/pve3/main.tf @@ -28,15 +28,26 @@ resource "proxmox_vm_qemu" "etcd_witness" { clone = var.ubuntu_template full_clone = true + # Gère les VMs existantes - force recréation si VM existe déjà + force_create = true + cpu { cores = var.etcd_witness_config.cores sockets = 1 + type = "host" } memory = var.etcd_witness_config.memory agent = 1 - boot = "order=scsi0" + bios = "seabios" + boot = "order=scsi0" + + # Utilise Standard VGA pour la compatibilité + vga { + type = "std" + } + scsihw = "virtio-scsi-single" onboot = true @@ -54,18 +65,14 @@ resource "proxmox_vm_qemu" "etcd_witness" { iothread = true } - disk { - slot = "ide2" - type = "cloudinit" - storage = var.etcd_witness_storage_pool - } - ipconfig0 = "ip=${var.etcd_witness_config.ip},gw=${var.k3s_gateway}" cicustom = "user=${var.snippets_storage}:snippets/cloud-init-etcd-witness.yaml" nameserver = join(" ", var.k3s_dns) lifecycle { - ignore_changes = [network] + ignore_changes = [ + clone + ] } depends_on = [local_file.etcd_witness_cloud_init] diff --git a/terraform/pve3/variables.tf b/terraform/pve3/variables.tf index e2e4d58..4d3d7a6 100644 --- a/terraform/pve3/variables.tf +++ b/terraform/pve3/variables.tf @@ -88,9 +88,3 @@ variable "etcd_witness_config" { disk_size = string }) } - -variable "k3s_token" { - description = "K3s cluster token" - type = string - sensitive = true -} diff --git a/terraform/terraform.tfvars.example b/terraform/terraform.tfvars.example index 06ffcf7..7b80187 100644 --- a/terraform/terraform.tfvars.example +++ b/terraform/terraform.tfvars.example @@ -1,36 +1,44 @@ +# Copy this file to terraform.tfvars and fill in your values + +# Proxmox Configuration proxmox_api_url = "https://192.168.100.10:8006/api2/json" proxmox_token_id = "root@pam!opentofu" proxmox_token_secret = "your-proxmox-token-secret" proxmox_tls_insecure = true +# SSH Access ssh_public_key = "ssh-ed25519 AAAAC3... your-email@example.com" +# Forgejo Configuration forgejo_token = "your-forgejo-token" forgejo_repo_url = "ssh://git@forgejo.tellserv.fr:222/Tellsanguis/infra.git" +# K3s Version k3s_version = "v1.28.5+k3s1" -k3s_token = "your-k3s-cluster-token" +# Template and Storage ubuntu_template = "ubuntu-2404-cloudinit" storage_pool = "linstor_storage" snippets_storage = "local" +# Network k3s_network_bridge = "k3s" k3s_gateway = "10.100.20.1" k3s_dns = ["10.100.20.1", "1.1.1.1"] +# VM Configurations k3s_server_1_config = { ip = "10.100.20.10/24" cores = 6 memory = 12288 - disk_size = "40G" + disk_size = "100G" } k3s_server_2_config = { ip = "10.100.20.20/24" cores = 6 memory = 12288 - disk_size = "40G" + disk_size = "100G" } etcd_witness_config = {