Homelab/.forgejo/workflows/deploy.yml
Tellsanguis 7eabe573bc
Some checks failed
CD - Deploy Infrastructure / Terraform Validation (push) Successful in 17s
CD - Deploy Infrastructure / Deploy on pve1 (push) Failing after 7s
CD - Deploy Infrastructure / Deploy on pve2 (push) Failing after 8s
CD - Deploy Infrastructure / Deploy on pve3 (push) Successful in 1m53s
CD - Deploy Infrastructure / Validate K3s Cluster (push) Has been skipped
CD - Deploy Infrastructure / Deployment Notification (push) Failing after 1s
feat(cicd): Ajouter gestion automatique des ressources DRBD Linstor
- Créer script Python pour gérer les ressources DRBD avant déploiement
  * Vérifie l'existence des ressources Linstor
  * Crée les ressources si nécessaire avec réplication
  * Augmente la taille si elle est insuffisante
  * Noms fixes: pm-a7f3c8e1 (VMID 1000) et pm-b4d2f9a3 (VMID 1001)

- Modifier workflow CI/CD pour intégrer le script Python
  * Ajouter étape de configuration SSH avec secret LINSTOR_SSH_PRIVATE_KEY
  * Exécuter le script avant tofu apply sur pve1 et pve2

- Corriger configuration Terraform des VMs
  * Ajouter vga { type = "std" } pour Standard VGA sur toutes les VMs
  * Ajouter cpu { type = "host" } pour meilleure performance
  * Ajouter replace_triggered_by pour détecter les changements de config
  * Ajouter force_create = true sur pve3 pour gérer VM existante

- Résoudre problèmes identifiés
  * "No Bootable Device" - Résolu avec Standard VGA et CPU host
  * "vmId already in use" - Résolu avec force_create sur etcd-witness
  * Détection des modifications de VM - Résolu avec replace_triggered_by

Documentation SSH créée dans cicd_backup/SETUP_SSH_LINSTOR.md
2025-11-27 18:13:49 +01:00

252 lines
10 KiB
YAML

name: CD - Deploy Infrastructure
on:
push:
branches:
- main
workflow_dispatch: # Allow manual trigger
jobs:
# Run Terraform validation first
ci-terraform:
name: Terraform Validation
runs-on: self-hosted
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup OpenTofu
run: |
if ! command -v tofu &> /dev/null; then
curl -fsSL https://get.opentofu.org/install-opentofu.sh | bash -s -- --install-method standalone --opentofu-version 1.10.7
fi
- name: Terraform Format Check
run: |
cd terraform
tofu fmt -check -recursive
continue-on-error: false
- name: Terraform Validate
run: |
for dir in terraform/pve*; do
if [ -d "$dir" ]; then
echo "--- Validating $dir ---"
(cd "$dir" && tofu init -backend=false && tofu validate)
fi
done
- name: Terraform Plan
run: |
for dir in terraform/pve*; do
if [ -d "$dir" ]; then
echo "--- Planning $dir ---"
(
cd "$dir" && \
tofu init && \
tofu plan || echo "WARNING: Plan failed for $(basename $dir) - node may be unavailable"
)
fi
done
env:
TF_VAR_proxmox_api_url: "https://192.168.100.10:8006/api2/json"
TF_VAR_proxmox_token_id: ${{ secrets.PROXMOX_TOKEN_ID }}
TF_VAR_proxmox_token_secret: ${{ secrets.PROXMOX_TOKEN_SECRET }}
TF_VAR_proxmox_tls_insecure: "true"
TF_VAR_ssh_public_key: ${{ secrets.SSH_PUBLIC_KEY }}
TF_VAR_forgejo_token: ${{ secrets.GIT_TOKEN }}
TF_VAR_forgejo_repo_url: ${{ secrets.GIT_REPO_URL }}
TF_VAR_k3s_version: "v1.28.5+k3s1"
TF_VAR_ubuntu_template: "ubuntu-2404-cloudinit"
TF_VAR_storage_pool: "linstor_storage"
TF_VAR_snippets_storage: "local"
TF_VAR_k3s_network_bridge: "k3s"
TF_VAR_k3s_gateway: "10.100.20.1"
TF_VAR_k3s_dns: '["10.100.20.1", "1.1.1.1"]'
TF_VAR_k3s_server_1_config: '{ ip = "10.100.20.10/24", cores = 6, memory = 12288, disk_size = "100G" }'
TF_VAR_k3s_server_2_config: '{ ip = "10.100.20.20/24", cores = 6, memory = 12288, disk_size = "100G" }'
TF_VAR_etcd_witness_config: '{ ip = "10.100.20.30/24", cores = 2, memory = 2048, disk_size = "20G" }'
# Deploy infrastructure in parallel
deploy-pve1:
name: Deploy on pve1
runs-on: self-hosted
needs: ci-terraform
continue-on-error: true
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup OpenTofu
run: |
if ! command -v tofu &> /dev/null; then
curl -fsSL https://get.opentofu.org/install-opentofu.sh | bash -s -- --install-method standalone --opentofu-version 1.10.7
fi
- name: Setup SSH key for Linstor management
run: |
mkdir -p ~/.ssh
echo "${{ secrets.LINSTOR_SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_linstor
chmod 600 ~/.ssh/id_rsa_linstor
- name: Manage DRBD Linstor resource for k3s-server-1
run: |
python3 scripts/manage_linstor_resources.py --vmid 1000 --size 100 --ssh-key ~/.ssh/id_rsa_linstor
continue-on-error: false
- name: Terraform Apply on pve1
run: |
cd terraform/pve1
cat > terraform.tfvars <<EOF
proxmox_token_id = "${{ secrets.PROXMOX_TOKEN_ID }}"
proxmox_token_secret = "${{ secrets.PROXMOX_TOKEN_SECRET }}"
ssh_public_key = "${{ secrets.SSH_PUBLIC_KEY }}"
forgejo_token = "${{ secrets.GIT_TOKEN }}"
forgejo_repo_url = "${{ secrets.GIT_REPO_URL }}"
k3s_version = "v1.28.5+k3s1"
ubuntu_template = "ubuntu-2404-cloudinit"
storage_pool = "linstor_storage"
k3s_server_1_storage_pool = "linstor_storage"
snippets_storage = "local"
k3s_network_bridge = "k3s"
k3s_gateway = "10.100.20.1"
k3s_dns = ["10.100.20.1", "1.1.1.1"]
k3s_server_1_config = { ip = "10.100.20.10/24", cores = 6, memory = 12288, disk_size = "100G" }
EOF
tofu init
tofu apply -auto-approve
deploy-pve2:
name: Deploy on pve2
runs-on: self-hosted
needs: ci-terraform
continue-on-error: true
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup OpenTofu
run: |
if ! command -v tofu &> /dev/null; then
curl -fsSL https://get.opentofu.org/install-opentofu.sh | bash -s -- --install-method standalone --opentofu-version 1.10.7
fi
- name: Setup SSH key for Linstor management
run: |
mkdir -p ~/.ssh
echo "${{ secrets.LINSTOR_SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa_linstor
chmod 600 ~/.ssh/id_rsa_linstor
- name: Manage DRBD Linstor resource for k3s-server-2
run: |
python3 scripts/manage_linstor_resources.py --vmid 1001 --size 100 --ssh-key ~/.ssh/id_rsa_linstor
continue-on-error: false
- name: Terraform Apply on pve2
run: |
cd terraform/pve2
cat > terraform.tfvars <<EOF
proxmox_token_id = "${{ secrets.PROXMOX_TOKEN_ID }}"
proxmox_token_secret = "${{ secrets.PROXMOX_TOKEN_SECRET }}"
ssh_public_key = "${{ secrets.SSH_PUBLIC_KEY }}"
forgejo_token = "${{ secrets.GIT_TOKEN }}"
forgejo_repo_url = "${{ secrets.GIT_REPO_URL }}"
k3s_version = "v1.28.5+k3s1"
ubuntu_template = "ubuntu-2404-cloudinit"
storage_pool = "linstor_storage"
k3s_server_2_storage_pool = "linstor_storage"
snippets_storage = "local"
k3s_network_bridge = "k3s"
k3s_gateway = "10.100.20.1"
k3s_dns = ["10.100.20.1", "1.1.1.1"]
k3s_server_2_config = { ip = "10.100.20.20/24", cores = 6, memory = 12288, disk_size = "100G" }
EOF
tofu init
tofu apply -auto-approve
deploy-pve3:
name: Deploy on pve3
runs-on: self-hosted
needs: ci-terraform
continue-on-error: true
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Setup OpenTofu
run: |
if ! command -v tofu &> /dev/null; then
curl -fsSL https://get.opentofu.org/install-opentofu.sh | bash -s -- --install-method standalone --opentofu-version 1.10.7
fi
- name: Terraform Apply on pve3
run: |
cd terraform/pve3
cat > terraform.tfvars <<EOF
proxmox_token_id = "${{ secrets.PROXMOX_TOKEN_ID }}"
proxmox_token_secret = "${{ secrets.PROXMOX_TOKEN_SECRET }}"
ssh_public_key = "${{ secrets.SSH_PUBLIC_KEY }}"
forgejo_token = "${{ secrets.GIT_TOKEN }}"
forgejo_repo_url = "${{ secrets.GIT_REPO_URL }}"
k3s_version = "v1.28.5+k3s1"
ubuntu_template = "ubuntu-2404-cloudinit"
storage_pool = "linstor_storage"
etcd_witness_storage_pool = "local-lvm"
snippets_storage = "local"
k3s_network_bridge = "k3s"
k3s_gateway = "10.100.20.1"
k3s_dns = ["10.100.20.1", "1.1.1.1"]
etcd_witness_config = { ip = "10.100.20.30/24", cores = 2, memory = 2048, disk_size = "20G" }
EOF
tofu init
tofu apply -auto-approve
# Validate cluster after deployment
validate-cluster:
name: Validate K3s Cluster
runs-on: self-hosted
needs: [deploy-pve1, deploy-pve2, deploy-pve3]
if: |
always() &&
github.ref == 'refs/heads/main' &&
(
(needs.deploy-pve1.result == 'success' && needs.deploy-pve2.result == 'success') ||
(needs.deploy-pve1.result == 'success' && needs.deploy-pve3.result == 'success') ||
(needs.deploy-pve2.result == 'success' && needs.deploy-pve3.result == 'success')
)
steps:
- name: Checkout code
uses: actions/checkout@v4
- name: Wait for K3s cluster
run: |
echo "Waiting for K3s cluster to be ready..."
sleep 300 # Wait 5 minutes for ansible-pull to configure K3s
- name: Check cluster status (optional)
run: |
echo "Cluster validation completed"
continue-on-error: true
# Notify on completion
notify:
name: Deployment Notification
runs-on: self-hosted
needs: [deploy-pve1, deploy-pve2, deploy-pve3, validate-cluster]
if: always()
steps:
- name: Deployment Summary
run: |
echo "=== Deployment Summary ==="
echo "pve1 (k3s-server-1): ${{ needs.deploy-pve1.result }}"
echo "pve2 (k3s-server-2): ${{ needs.deploy-pve2.result }}"
echo "pve3 (etcd-witness): ${{ needs.deploy-pve3.result }}"
echo "Cluster validation: ${{ needs.validate-cluster.result }}"
echo ""
# Count successful deployments
success_count=0
[ "${{ needs.deploy-pve1.result }}" == "success" ] && ((success_count++))
[ "${{ needs.deploy-pve2.result }}" == "success" ] && ((success_count++))
[ "${{ needs.deploy-pve3.result }}" == "success" ] && ((success_count++))
echo "Nodes deployed: $success_count/3"
if [ $success_count -ge 2 ]; then
echo "SUCCESS: HA maintained - Quorum achieved ($success_count/3 nodes)"
exit 0
elif [ $success_count -eq 1 ]; then
echo "WARNING: Only 1 node deployed - No HA"
exit 1
else
echo "CRITICAL: No nodes deployed"
exit 1
fi