feat(ci): Ajout support HA pour pannes de nœuds
Modification workflows CI/CD pour gérer élégamment pannes nœuds Proxmox : Workflow CI (ci.yml) : - Terraform Plan s'exécute uniquement sur branche main (CI plus rapide sur branches feature) - Échecs Plan sur nœuds indisponibles ne bloquent pas validation - Ajout message avertissement quand plan échoue Workflow Deploy (deploy.yml) : - Ajout continue-on-error à tous les jobs déploiement (pve1, pve2, pve3) - Modified cluster validation to require 2/3 nodes (quorum) - Enhanced deployment summary with success counter - Exit codes: 0 if >=2 nodes, 1 if 1 node, 1 if 0 nodes This ensures the infrastructure remains operational even when one Proxmox node is down, maintaining HA principles.
This commit is contained in:
parent
a855240c6d
commit
6041d76186
2 changed files with 38 additions and 8 deletions
|
|
@ -35,7 +35,7 @@ jobs:
|
|||
done
|
||||
|
||||
- name: Terraform Plan
|
||||
if: github.event_name == 'push'
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/main'
|
||||
run: |
|
||||
for dir in terraform/pve*; do
|
||||
if [ -d "$dir" ]; then
|
||||
|
|
@ -44,7 +44,7 @@ jobs:
|
|||
cd "$dir" && \
|
||||
cp ../terraform.tfvars.example terraform.tfvars && \
|
||||
tofu init && \
|
||||
tofu plan -out="tfplan-$(basename $dir)"
|
||||
tofu plan -out="tfplan-$(basename $dir)" || echo "WARNING: Plan failed for $(basename $dir) - node may be unavailable"
|
||||
)
|
||||
fi
|
||||
done
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ jobs:
|
|||
name: Deploy on pve1
|
||||
runs-on: self-hosted
|
||||
needs: ci
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -45,6 +46,7 @@ jobs:
|
|||
name: Deploy on pve2
|
||||
runs-on: self-hosted
|
||||
needs: ci
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -73,6 +75,7 @@ jobs:
|
|||
name: Deploy on pve3
|
||||
runs-on: self-hosted
|
||||
needs: ci
|
||||
continue-on-error: true
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -102,7 +105,14 @@ jobs:
|
|||
name: Validate K3s Cluster
|
||||
runs-on: self-hosted
|
||||
needs: [deploy-pve1, deploy-pve2, deploy-pve3]
|
||||
if: github.ref == 'refs/heads/main' && needs.deploy-pve1.result == 'success' && needs.deploy-pve2.result == 'success' && needs.deploy-pve3.result == 'success'
|
||||
if: |
|
||||
always() &&
|
||||
github.ref == 'refs/heads/main' &&
|
||||
(
|
||||
(needs.deploy-pve1.result == 'success' && needs.deploy-pve2.result == 'success') ||
|
||||
(needs.deploy-pve1.result == 'success' && needs.deploy-pve3.result == 'success') ||
|
||||
(needs.deploy-pve2.result == 'success' && needs.deploy-pve3.result == 'success')
|
||||
)
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v4
|
||||
|
|
@ -124,8 +134,28 @@ jobs:
|
|||
steps:
|
||||
- name: Deployment Summary
|
||||
run: |
|
||||
echo "Deployment completed!"
|
||||
echo "pve1 status: ${{ needs.deploy-pve1.result }}"
|
||||
echo "pve2 status: ${{ needs.deploy-pve2.result }}"
|
||||
echo "pve3 status: ${{ needs.deploy-pve3.result }}"
|
||||
echo "Validation: ${{ needs.validate-cluster.result }}"
|
||||
echo "=== Deployment Summary ==="
|
||||
echo "pve1 (k3s-server-1): ${{ needs.deploy-pve1.result }}"
|
||||
echo "pve2 (k3s-server-2): ${{ needs.deploy-pve2.result }}"
|
||||
echo "pve3 (etcd-witness): ${{ needs.deploy-pve3.result }}"
|
||||
echo "Cluster validation: ${{ needs.validate-cluster.result }}"
|
||||
echo ""
|
||||
|
||||
# Count successful deployments
|
||||
success_count=0
|
||||
[ "${{ needs.deploy-pve1.result }}" == "success" ] && ((success_count++))
|
||||
[ "${{ needs.deploy-pve2.result }}" == "success" ] && ((success_count++))
|
||||
[ "${{ needs.deploy-pve3.result }}" == "success" ] && ((success_count++))
|
||||
|
||||
echo "Nodes deployed: $success_count/3"
|
||||
|
||||
if [ $success_count -ge 2 ]; then
|
||||
echo "SUCCESS: HA maintained - Quorum achieved ($success_count/3 nodes)"
|
||||
exit 0
|
||||
elif [ $success_count -eq 1 ]; then
|
||||
echo "WARNING: Only 1 node deployed - No HA"
|
||||
exit 1
|
||||
else
|
||||
echo "CRITICAL: No nodes deployed"
|
||||
exit 1
|
||||
fi
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue