feat(azure): add support provider

2026-01-27 02:20:25 +00:00 · 2025-06-10 16:52:42 +02:00
parent 9eadb35497
commit 5ed2bd8eae
14 changed files with 1469 additions and 1 deletions
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ The machines created by this project automatically will create a secure bootstra
 | **Proxmox** | Virtual Machines | Virtual Machines on Proxmox VE | Manual | Available |
 | **vSphere** | Virtual Machines | Virtual Machines on VMware vSphere/vCenter | Manual | Available |
 | **vCloud** | vApps | Multi-tenant Virtual Machines on VMware Cloud Director with vApp isolation | Manual | Available |
-| **Azure** | Scale Sets | Azure VMs with automatic scaling and availability zones | Automatic | Planned |
+| **Azure** | Virtual Machine Scale Sets | Azure VMs with automatic scaling and availability zones | Automatic | Available |


 ## Bootstrap Token Management
--- a/examples/azure/example.tf
+++ b/examples/azure/example.tf
@@ -0,0 +1,134 @@
+# =============================================================================
+# AZURE KAMAJI NODE POOL EXAMPLE
+# =============================================================================
+#
+# This example demonstrates multi-pool Azure node configuration with both
+# manual and automatic scaling options:
+#
+# SCALING MODES:
+# - enable_autoscaling = true:  Azure manages scaling based on CPU metrics
+# - enable_autoscaling = false: Terraform directly controls pool_size
+#
+# For manual control, set enable_autoscaling = false and adjust pool_size
+# in your configuration files.
+#
+# =============================================================================
+
+# Example: Azure Provider Usage
+# This example shows how to use the Azure provider wrapper
+
+terraform {
+  required_providers {
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = "~> 2.35.0"
+    }
+    azurerm = {
+      source  = "hashicorp/azurerm"
+      version = "~> 4.0"
+    }
+  }
+}
+
+# Configure the Azure provider
+provider "azurerm" {
+  subscription_id = var.azure_subscription_id
+  features {}
+}
+
+# Configure the Kubernetes provider
+provider "kubernetes" {
+  config_path = var.tenant_kubeconfig_path
+}
+
+# Use the Azure provider module
+module "azure_kamaji_node_pools" {
+  source = "../../providers/azure"
+
+  # Cluster configuration
+  tenant_cluster_name    = "my-azure-cluster"
+  tenant_kubeconfig_path = "~/.kube/my-cluster.kubeconfig"
+  yaki_url              = "https://goyaki.clastix.io"
+
+  # Node pools configuration
+  node_pools = [
+    {
+      name           = "default"
+      size           = 3
+      min_size       = 2
+      max_size       = 10
+      node_disk_size = 50
+      node_disk_type = "Premium_LRS"
+      vm_size        = "Standard_D2s_v3"
+      assign_public_ip = true
+      enable_autoscaling = false
+      scale_out_cpu_threshold = 75
+      scale_in_cpu_threshold  = 25
+      enable_automatic_instance_repair = false
+      automatic_instance_repair_grace_period = "PT30M"
+      upgrade_mode = "Manual"
+    },
+    {
+      name           = "system"
+      size           = 2
+      min_size       = 1
+      max_size       = 5
+      node_disk_size = 100
+      node_disk_type = "Premium_LRS"
+      vm_size        = "Standard_D4s_v3"
+      assign_public_ip = false
+      enable_autoscaling = true
+      scale_out_cpu_threshold = 80
+      scale_in_cpu_threshold  = 30
+      enable_automatic_instance_repair = true
+      automatic_instance_repair_grace_period = "PT15M"
+      upgrade_mode = "Automatic"
+    }
+  ]
+
+  # Azure configuration
+  azure_subscription_id      = var.azure_subscription_id
+  azure_location            = var.azure_location
+  azure_resource_group_name = "kamaji"
+  azure_vnet_name           = "kamaji-vnet"
+  azure_subnet_name         = "kamaji-subnet"
+  vnet_subnet_address_prefix = "10.10.10.0/24"
+  tags = {
+    "ManagedBy"   = "Terraform"
+    "Environment" = "production"
+    "Provider"    = "Azure"
+  }
+
+  # SSH configuration
+  ssh_user            = "ubuntu"
+  ssh_public_key_path = "~/.ssh/id_rsa.pub"
+}
+
+# Variables
+variable "azure_subscription_id" {
+  description = "Azure subscription ID"
+  type        = string
+}
+
+variable "azure_location" {
+  description = "Azure region"
+  type        = string
+  default     = "italynorth"
+}
+
+variable "tenant_kubeconfig_path" {
+  description = "Path to tenant cluster kubeconfig"
+  type        = string
+  default     = "~/.kube/config"
+}
+
+# Outputs
+output "cluster_info" {
+  description = "Cluster information"
+  value       = module.azure_kamaji_node_pools.cluster_info
+}
+
+output "node_pools" {
+  description = "Node pools details"
+  value       = module.azure_kamaji_node_pools.node_pools
+} 
--- a/modules/azure-node-pool/README.md
+++ b/modules/azure-node-pool/README.md
@@ -0,0 +1,138 @@
+# Azure Node Pool Module
+
+Creates Azure Virtual Machine Scale Sets for Kamaji tenant cluster worker nodes with automatic scaling capabilities.
+
+## Features
+
+- **Virtual Machine Scale Sets** with automatic scaling
+- **Network Security Groups** with Kubernetes-optimized rules
+- **Ubuntu 24.04 LTS** support
+- **Automatic instance repair** for failed VMs
+- **CPU-based autoscaling** with configurable thresholds
+- **Bootstrap token integration** via cloud-init
+
+## Usage
+
+```hcl
+module "azure_node_pool" {
+  source = "../../modules/azure-node-pool"
+
+  # Cluster configuration
+  tenant_cluster_name = "charlie"
+  pool_name          = "default"
+
+  # Pool sizing
+  pool_size     = 3
+  pool_min_size = 1
+  pool_max_size = 9
+
+  # Azure configuration
+  azure_location            = "italynorth"
+  azure_resource_group_name = "kamaji"
+  azure_vnet_name          = "kamaji-vnet"
+  azure_subnet_name        = "kamaji-subnet"
+
+  # VM configuration
+  vm_size            = "Standard_D2s_v3"
+  assign_public_ip   = true
+  node_disk_size     = 30
+  node_disk_type     = "Premium_LRS"
+
+  # Autoscaling
+  enable_autoscaling        = true
+  scale_out_cpu_threshold   = 75
+  scale_in_cpu_threshold    = 25
+
+  # Bootstrap command
+  runcmd = module.bootstrap_token.join_cmd
+
+  tags = {
+    Environment = "production"
+    Project     = "kamaji"
+  }
+}
+```
+
+## Requirements
+
+| Name | Version |
+|------|---------|
+| terraform | >= 1.0 |
+| azurerm | >= 3.0 |
+| cloudinit | >= 2.0 |
+
+## Providers
+
+| Name | Version |
+|------|---------|
+| azurerm | >= 3.0 |
+| cloudinit | >= 2.0 |
+
+## Resources
+
+- `azurerm_linux_virtual_machine_scale_set` - Main VMSS resource
+- `azurerm_network_security_group` - Security group for nodes
+- `azurerm_network_security_rule` - Security rules
+- `azurerm_monitor_autoscale_setting` - Autoscaling configuration
+
+## Variables
+
+| Name | Description | Type | Default |
+|------|-------------|------|---------|
+| `tenant_cluster_name` | Name of the tenant cluster | `string` | `"charlie"` |
+| `pool_name` | Name of the node pool | `string` | `"default"` |
+| `pool_size` | The size of the node pool | `number` | `3` |
+| `pool_min_size` | The minimum size of the node pool | `number` | `1` |
+| `pool_max_size` | The maximum size of the node pool | `number` | `9` |
+| `azure_location` | Azure region where resources are created | `string` | `"italynorth"` |
+| `azure_resource_group_name` | Name of the Azure resource group | `string` | `"kamaji"` |
+| `azure_vnet_name` | Name of the Azure virtual network | `string` | `"kamaji-vnet"` |
+| `azure_subnet_name` | Name of the Azure subnet | `string` | `"kamaji-subnet"` |
+| `vm_size` | Size of the virtual machines | `string` | `"Standard_D2s_v3"` |
+| `assign_public_ip` | Whether to assign public IP addresses to VMs | `bool` | `true` |
+| `node_disk_size` | Disk size for each node in GB | `number` | `30` |
+| `node_disk_type` | Storage account type for each node | `string` | `"Premium_LRS"` |
+| `ssh_user` | SSH user for the nodes | `string` | `"ubuntu"` |
+| `ssh_public_key_path` | Path to the SSH public key | `string` | `"~/.ssh/id_rsa.pub"` |
+| `enable_autoscaling` | Enable automatic scaling based on CPU metrics | `bool` | `true` |
+| `scale_out_cpu_threshold` | CPU threshold percentage to trigger scale out | `number` | `75` |
+| `scale_in_cpu_threshold` | CPU threshold percentage to trigger scale in | `number` | `25` |
+| `runcmd` | Command to run on the node at first boot time | `string` | `"echo 'Hello, World!'"` |
+
+## Outputs
+
+| Name | Description |
+|------|-------------|
+| `vmss_details` | Virtual Machine Scale Set details |
+| `autoscale_settings` | Autoscale settings details |
+| `network_security_group` | Network Security Group details |
+
+## Security Groups
+
+The module creates a Network Security Group with the following rules:
+
+- **Outbound**: Allow all outbound traffic
+- **SSH**: Allow inbound SSH (port 22) from anywhere
+- **Cluster Internal**: Allow all traffic within the subnet
+
+## Scaling Behavior
+
+This module supports both manual and automatic scaling modes:
+
+### Manual Scaling (`enable_autoscaling = false`)
+- **Direct Control**: Terraform directly manages VMSS instance count
+- **pool_size Changes**: Changing `pool_size` will update the VMSS immediately on `terraform apply`
+- **No Lifecycle Rules**: No `ignore_changes` applied to instances
+- **Use Case**: Predictable workloads requiring manual capacity control
+
+### Automatic Scaling (`enable_autoscaling = true`)
+- **CPU-Based**: Azure autoscaler manages instance count based on CPU metrics
+- **Scale Out**: When average CPU > 75% for 5 minutes
+- **Scale In**: When average CPU < 25% for 5 minutes
+- **Cooldown**: 1 minute between scaling actions
+- **Default Capacity**: `pool_size` sets the initial/default capacity
+- **Lifecycle Protection**: Terraform ignores instance count changes made by autoscaler
+
+## Instance Repair
+
+Automatic instance repair is enabled by default with a 30-minute grace period for failed VMs. 
--- a/modules/azure-node-pool/data.tf
+++ b/modules/azure-node-pool/data.tf
@@ -0,0 +1,38 @@
+# =============================================================================
+# DATA SOURCES
+# =============================================================================
+
+data "azurerm_resource_group" "tenant" {
+  name = var.azure_resource_group_name
+}
+
+data "azurerm_virtual_network" "tenant_vnet" {
+  name                = var.azure_vnet_name
+  resource_group_name = var.azure_resource_group_name
+}
+
+data "azurerm_subnet" "tenant_subnet" {
+  name                 = var.azure_subnet_name
+  virtual_network_name = var.azure_vnet_name
+  resource_group_name  = var.azure_resource_group_name
+}
+
+# =============================================================================
+# CLOUD-INIT CONFIGURATION
+# =============================================================================
+
+data "cloudinit_config" "node_cloud_init" {
+  gzip          = true
+  base64_encode = true
+
+  part {
+    filename     = "cloud-config.yaml"
+    content_type = "text/cloud-config"
+    content = templatefile("${path.module}/../templates/cloud-init/userdata.yml.tpl", {
+      hostname       = ""
+      runcmd         = var.runcmd
+      ssh_user       = var.ssh_user
+      ssh_public_key = file(pathexpand(var.ssh_public_key_path))
+    })
+  }
+} 
--- a/modules/azure-node-pool/main.tf
+++ b/modules/azure-node-pool/main.tf
@@ -0,0 +1,206 @@
+# =============================================================================
+# TERRAFORM CONFIGURATION
+# =============================================================================
+
+terraform {
+  required_providers {
+    azurerm = {
+      source = "hashicorp/azurerm"
+    }
+    cloudinit = {
+      source = "hashicorp/cloudinit"
+    }
+  }
+}
+
+# =============================================================================
+# NETWORK SECURITY GROUP CONFIGURATION
+# =============================================================================
+
+# Network Security Group for Kubernetes Nodes
+resource "azurerm_network_security_group" "kubernetes" {
+  name                = "${var.tenant_cluster_name}-${var.pool_name}-nsg"
+  location            = var.azure_location
+  resource_group_name = var.azure_resource_group_name
+
+  tags = merge(
+    {
+      "Name" = "${var.tenant_cluster_name}-${var.pool_name}"
+    },
+    var.tags,
+  )
+}
+
+# Allow outgoing connectivity
+resource "azurerm_network_security_rule" "allow_all_outbound" {
+  name                        = "AllowAllOutbound"
+  priority                    = 100
+  direction                   = "Outbound"
+  access                      = "Allow"
+  protocol                    = "*"
+  source_port_range           = "*"
+  destination_port_range      = "*"
+  source_address_prefix       = "*"
+  destination_address_prefix  = "*"
+  resource_group_name         = var.azure_resource_group_name
+  network_security_group_name = azurerm_network_security_group.kubernetes.name
+}
+
+# Allow SSH access
+resource "azurerm_network_security_rule" "allow_ssh_inbound" {
+  name                        = "AllowSSHInbound"
+  priority                    = 1000
+  direction                   = "Inbound"
+  access                      = "Allow"
+  protocol                    = "Tcp"
+  source_port_range           = "*"
+  destination_port_range      = "22"
+  source_address_prefix       = "*"
+  destination_address_prefix  = "*"
+  resource_group_name         = var.azure_resource_group_name
+  network_security_group_name = azurerm_network_security_group.kubernetes.name
+}
+
+# Allow cluster internal communication
+resource "azurerm_network_security_rule" "allow_cluster_internal" {
+  name                        = "AllowClusterInternal"
+  priority                    = 1100
+  direction                   = "Inbound"
+  access                      = "Allow"
+  protocol                    = "*"
+  source_port_range           = "*"
+  destination_port_range      = "*"
+  source_address_prefix       = var.vnet_subnet_address_prefix
+  destination_address_prefix  = var.vnet_subnet_address_prefix
+  resource_group_name         = var.azure_resource_group_name
+  network_security_group_name = azurerm_network_security_group.kubernetes.name
+}
+
+# =============================================================================
+# VIRTUAL MACHINE SCALE SET
+# =============================================================================
+
+resource "azurerm_linux_virtual_machine_scale_set" "nodes" {
+  name                = "${var.tenant_cluster_name}-${var.pool_name}-${var.enable_autoscaling ? "auto" : "manual"}-vmss"
+  resource_group_name = var.azure_resource_group_name
+  location            = var.azure_location
+  sku                 = var.vm_size
+  instances           = var.pool_size
+
+  admin_username                  = var.ssh_user
+  disable_password_authentication = true
+
+  admin_ssh_key {
+    username   = var.ssh_user
+    public_key = file(pathexpand(var.ssh_public_key_path))
+  }
+
+  source_image_reference {
+    publisher = var.vm_image_publisher
+    offer     = var.vm_image_offer
+    sku       = var.vm_image_sku
+    version   = var.vm_image_version
+  }
+
+  os_disk {
+    storage_account_type = var.node_disk_type
+    caching              = "ReadWrite"
+    disk_size_gb         = var.node_disk_size
+  }
+
+  network_interface {
+    name    = "primary"
+    primary = true
+
+    ip_configuration {
+      name      = "primary"
+      primary   = true
+      subnet_id = data.azurerm_subnet.tenant_subnet.id
+
+      dynamic "public_ip_address" {
+        for_each = var.assign_public_ip ? [1] : []
+        content {
+          name = "primary"
+        }
+      }
+    }
+
+    network_security_group_id = azurerm_network_security_group.kubernetes.id
+  }
+
+  custom_data = data.cloudinit_config.node_cloud_init.rendered
+
+  # Configure upgrade policy
+  upgrade_mode = var.upgrade_mode
+
+  tags = merge(
+    {
+      "Name" = "${var.tenant_cluster_name}-${var.pool_name}"
+    },
+    var.tags,
+  )
+}
+
+# =============================================================================
+# AUTO SCALING CONFIGURATION
+# =============================================================================
+
+resource "azurerm_monitor_autoscale_setting" "nodes" {
+  count               = var.enable_autoscaling ? 1 : 0
+  name                = "${var.tenant_cluster_name}-${var.pool_name}-autoscale"
+  resource_group_name = var.azure_resource_group_name
+  location            = var.azure_location
+  target_resource_id  = azurerm_linux_virtual_machine_scale_set.nodes.id
+
+  profile {
+    name = "AutoScale"
+
+    capacity {
+      default = var.pool_size
+      minimum = var.pool_min_size
+      maximum = var.pool_max_size
+    }
+
+    rule {
+      metric_trigger {
+        metric_name        = "Percentage CPU"
+        metric_resource_id = azurerm_linux_virtual_machine_scale_set.nodes.id
+        time_grain         = "PT1M"
+        statistic          = "Average"
+        time_window        = "PT5M"
+        time_aggregation   = "Average"
+        operator           = "GreaterThan"
+        threshold          = var.scale_out_cpu_threshold
+      }
+
+      scale_action {
+        direction = "Increase"
+        type      = "ChangeCount"
+        value     = "1"
+        cooldown  = "PT1M"
+      }
+    }
+
+    rule {
+      metric_trigger {
+        metric_name        = "Percentage CPU"
+        metric_resource_id = azurerm_linux_virtual_machine_scale_set.nodes.id
+        time_grain         = "PT1M"
+        statistic          = "Average"
+        time_window        = "PT5M"
+        time_aggregation   = "Average"
+        operator           = "LessThan"
+        threshold          = var.scale_in_cpu_threshold
+      }
+
+      scale_action {
+        direction = "Decrease"
+        type      = "ChangeCount"
+        value     = "1"
+        cooldown  = "PT1M"
+      }
+    }
+  }
+
+  tags = var.tags
+} 
--- a/modules/azure-node-pool/outputs.tf
+++ b/modules/azure-node-pool/outputs.tf
@@ -0,0 +1,43 @@
+# =============================================================================
+# VIRTUAL MACHINE SCALE SET
+# =============================================================================
+
+output "vmss_details" {
+  description = "Virtual Machine Scale Set details"
+  value = {
+    name             = azurerm_linux_virtual_machine_scale_set.nodes.name
+    id               = azurerm_linux_virtual_machine_scale_set.nodes.id
+    resource_group   = azurerm_linux_virtual_machine_scale_set.nodes.resource_group_name
+    location         = azurerm_linux_virtual_machine_scale_set.nodes.location
+    instances        = azurerm_linux_virtual_machine_scale_set.nodes.instances
+    vm_size          = azurerm_linux_virtual_machine_scale_set.nodes.sku
+  }
+}
+
+# =============================================================================
+# AUTOSCALING SETTINGS
+# =============================================================================
+
+output "autoscale_settings" {
+  description = "Autoscale settings details"
+  value = var.enable_autoscaling ? {
+    name               = azurerm_monitor_autoscale_setting.nodes[0].name
+    id                 = azurerm_monitor_autoscale_setting.nodes[0].id
+    min_size           = var.pool_min_size
+    max_size           = var.pool_max_size
+    target_resource_id = azurerm_monitor_autoscale_setting.nodes[0].target_resource_id
+  } : null
+}
+
+# =============================================================================
+# NETWORK SECURITY GROUP
+# =============================================================================
+
+output "network_security_group" {
+  description = "Network Security Group details"
+  value = {
+    name                = azurerm_network_security_group.kubernetes.name
+    id                  = azurerm_network_security_group.kubernetes.id
+    resource_group_name = azurerm_network_security_group.kubernetes.resource_group_name
+  }
+} 
--- a/modules/azure-node-pool/vars.tf
+++ b/modules/azure-node-pool/vars.tf
@@ -0,0 +1,203 @@
+# =============================================================================
+# CLUSTER CONFIGURATION
+# =============================================================================
+
+# Name of the tenant cluster
+variable "tenant_cluster_name" {
+  description = "Name of the tenant cluster"
+  type        = string
+  default     = "charlie"
+}
+
+# =============================================================================
+# POOL CONFIGURATION
+# =============================================================================
+
+variable "runcmd" {
+  description = "Command to run on the node at first boot time"
+  type        = string
+  default     = "echo 'Hello, World!'"
+}
+
+variable "pool_name" {
+  description = "Name of the node pool"
+  type        = string
+  default     = "default"
+}
+
+variable "pool_size" {
+  description = "The size of the node pool"
+  type        = number
+  default     = 3
+}
+
+variable "pool_min_size" {
+  description = "The minimum size of the node pool"
+  type        = number
+  default     = 1
+}
+
+variable "pool_max_size" {
+  description = "The maximum size of the node pool"
+  type        = number
+  default     = 9
+}
+
+# =============================================================================
+# AZURE CONFIGURATION
+# =============================================================================
+
+variable "azure_location" {
+  description = "Azure region where resources are created"
+  type        = string
+  default     = "italynorth"
+}
+
+variable "azure_resource_group_name" {
+  description = "Name of the Azure resource group"
+  type        = string
+  default     = "kamaji"
+}
+
+variable "azure_vnet_name" {
+  description = "Name of the Azure virtual network"
+  type        = string
+  default     = "kamaji-vnet"
+}
+
+variable "azure_subnet_name" {
+  description = "Name of the Azure subnet"
+  type        = string
+  default     = "kamaji-subnet"
+}
+
+variable "vnet_subnet_address_prefix" {
+  description = "Address prefix for the subnet (used for security group rules)"
+  type        = string
+  default     = "10.10.10.0/24"
+}
+
+variable "vm_size" {
+  description = "Size of the virtual machines"
+  type        = string
+  default     = "Standard_D2s_v3"
+}
+
+variable "assign_public_ip" {
+  description = "Whether to assign public IP addresses to VMs"
+  type        = bool
+  default     = true
+}
+
+variable "tags" {
+  description = "Tags used for Azure resources"
+  type        = map(string)
+  default = {
+    "ManagedBy" = "Clastix"
+    "CreatedBy" = "Terraform"
+  }
+}
+
+# =============================================================================
+# VM IMAGE CONFIGURATION
+# =============================================================================
+
+variable "vm_image_publisher" {
+  description = "Publisher of the VM image"
+  type        = string
+  default     = "Canonical"
+}
+
+variable "vm_image_offer" {
+  description = "Offer of the VM image"
+  type        = string
+  default     = "ubuntu-24_04-lts"
+}
+
+variable "vm_image_sku" {
+  description = "SKU of the VM image"
+  type        = string
+  default     = "server"
+}
+
+variable "vm_image_version" {
+  description = "Version of the VM image"
+  type        = string
+  default     = "latest"
+}
+
+# =============================================================================
+# NODE CONFIGURATION
+# =============================================================================
+
+variable "node_disk_size" {
+  description = "Disk size for each node in GB"
+  type        = number
+  default     = 30
+}
+
+variable "node_disk_type" {
+  description = "Storage account type for each node (Standard_LRS, Premium_LRS)"
+  type        = string
+  default     = "Premium_LRS"
+}
+
+# =============================================================================
+# SSH CONFIGURATION
+# =============================================================================
+
+variable "ssh_user" {
+  description = "SSH user for the nodes"
+  type        = string
+  default     = "ubuntu"
+}
+
+variable "ssh_public_key_path" {
+  description = "Path to the SSH public key"
+  type        = string
+  default     = "~/.ssh/id_rsa.pub"
+}
+
+# =============================================================================
+# AUTO SCALING CONFIGURATION
+# =============================================================================
+
+variable "enable_autoscaling" {
+  description = "Enable automatic scaling based on CPU metrics"
+  type        = bool
+  default     = true
+}
+
+variable "scale_out_cpu_threshold" {
+  description = "CPU threshold percentage to trigger scale out"
+  type        = number
+  default     = 75
+}
+
+variable "scale_in_cpu_threshold" {
+  description = "CPU threshold percentage to trigger scale in"
+  type        = number
+  default     = 25
+}
+
+# =============================================================================
+# INSTANCE REPAIR CONFIGURATION
+# =============================================================================
+
+variable "enable_automatic_instance_repair" {
+  description = "Enable automatic instance repair for failed VMs"
+  type        = bool
+  default     = true
+}
+
+variable "automatic_instance_repair_grace_period" {
+  description = "Grace period for automatic instance repair (in minutes)"
+  type        = string
+  default     = "PT30M"
+}
+
+variable "upgrade_mode" {
+  description = "Upgrade mode for the scale set (Manual, Automatic, Rolling)"
+  type        = string
+  default     = "Manual"
+} 
--- a/providers/azure/README.md
+++ b/providers/azure/README.md
@@ -0,0 +1,356 @@
+# Azure Provider for Kamaji Node Pools
+
+Ready-to-use Terraform implementation for creating multiple Kubernetes worker node pools on Microsoft Azure using Virtual Machine Scale Sets (VMSS).
+
+## Features
+
+- **Multiple Node Pools** with different configurations per pool
+- **Virtual Machine Scale Sets** with automatic scaling per pool
+- **CPU-based autoscaling** with configurable thresholds per pool
+- **Automatic instance repair** for failed VMs (configurable per pool)
+- **Network Security Groups** with Kubernetes-optimized rules
+- **Ubuntu 24.04 LTS** support
+- **Bootstrap token integration** with automatic cluster joining via YAKI
+
+## Prerequisites
+
+### Required Infrastructure
+
+Before using this provider, you must have:
+
+1. **Azure Resource Group** - Where all resources will be created
+2. **Virtual Network (VNet)** - Network for the node pools
+3. **Subnet** - Subnet within the VNet for VM instances
+4. **Kamaji tenant cluster** - Running cluster with accessible kubeconfig
+
+### Required Tools
+
+- Terraform >= 1.0
+- Azure CLI >= 2.0 (authenticated)
+- SSH key pair for node access
+- [direnv](https://direnv.net/) (optional, for automatic environment management)
+
+## Quick Start
+
+### 1. Authentication
+
+```bash
+# Login to Azure
+az login
+
+# Set subscription (if needed)
+az account set --subscription "your-subscription-id"
+
+# Verify access
+az account show
+```
+
+### 2. Environment Configuration
+
+**Option A: Using .envrc (Recommended)**
+
+```bash
+# Install direnv (if not already installed)
+# macOS: brew install direnv
+# Ubuntu: apt install direnv
+
+# Add direnv hook to your shell
+echo 'eval "$(direnv hook bash)"' >> ~/.bashrc  # for bash
+echo 'eval "$(direnv hook zsh)"' >> ~/.zshrc    # for zsh
+
+# Copy and edit .envrc file
+cp .envrc.sample .envrc
+vim .envrc
+
+# Allow direnv to load the environment
+direnv allow
+```
+
+**Option B: Using tfvars file**
+
+```bash
+# Copy sample configuration
+cp main.auto.tfvars.sample main.auto.tfvars
+
+# Edit configuration - MAKE SURE TO SET azure_subscription_id
+vim main.auto.tfvars
+```
+
+### 3. Deploy
+
+```bash
+# Initialize Terraform
+terraform init
+
+# Review plan
+terraform plan
+
+# Apply configuration
+terraform apply
+```
+
+## Configuration
+
+### Required Variables
+
+```hcl
+# Azure subscription (REQUIRED - no default)
+azure_subscription_id = "your-subscription-id"
+
+# Cluster identity
+tenant_cluster_name = "your-cluster-name"
+
+# Bootstrap configuration
+tenant_kubeconfig_path = "/path/to/your/cluster.kubeconfig"
+
+# Azure infrastructure (must exist)
+azure_resource_group_name = "your-resource-group"
+azure_vnet_name          = "your-vnet"
+azure_subnet_name        = "your-subnet"
+
+# Node pools configuration
+node_pools = [
+  {
+    name           = "default"
+    size           = 3
+    node_disk_size = 30
+    vm_size        = "Standard_D2s_v3"
+    # ... other pool-specific settings
+  }
+]
+```
+
+### Node Pool Configuration
+
+Each node pool supports the following configuration options:
+
+```hcl
+node_pools = [
+  {
+    # Required fields
+    name           = "pool-name"        # Unique name for this pool
+    size           = 3                  # Number of nodes in the pool
+    node_disk_size = 30                 # Disk size in GB
+    vm_size        = "Standard_D2s_v3"  # Azure VM size
+
+    # Optional fields with defaults
+    min_size               = 1          # Minimum nodes for autoscaling
+    max_size               = 9          # Maximum nodes for autoscaling
+    node_disk_type         = "Premium_LRS"  # Storage type
+    assign_public_ip       = true       # Assign public IPs
+    enable_autoscaling     = false      # Enable CPU-based autoscaling
+    scale_out_cpu_threshold = 75        # CPU threshold to scale out
+    scale_in_cpu_threshold  = 25        # CPU threshold to scale in
+    enable_automatic_instance_repair = false  # Enable automatic repair
+    automatic_instance_repair_grace_period = "PT30M"  # Repair grace period
+    upgrade_mode           = "Manual"   # Scale set upgrade mode
+  }
+]
+```
+
+### Multiple Node Pools Example
+
+```hcl
+node_pools = [
+  {
+    name           = "default"
+    size           = 3
+    node_disk_size = 50
+    vm_size        = "Standard_D2s_v3"
+    assign_public_ip = true
+    enable_autoscaling = false
+  },
+  {
+    name           = "system"
+    size           = 2
+    node_disk_size = 100
+    vm_size        = "Standard_D4s_v3"
+    assign_public_ip = false
+    enable_autoscaling = true
+    scale_out_cpu_threshold = 80
+    scale_in_cpu_threshold  = 30
+    enable_automatic_instance_repair = true
+  }
+]
+```
+
+### Environment Variable Options
+
+You can override any Terraform variable using environment variables:
+
+```bash
+# Using .envrc file
+export TF_VAR_azure_subscription_id="your-subscription-id"
+export TF_VAR_azure_location="italynorth"
+export TF_VAR_tenant_cluster_name="customer-azure"
+export TF_VAR_tenant_kubeconfig_path="/path/to/kubeconfig"
+
+# Or set Azure provider environment variables
+export ARM_SUBSCRIPTION_ID="your-subscription-id"
+export ARM_CLIENT_ID="your-service-principal-id"        # if using service principal
+export ARM_CLIENT_SECRET="your-service-principal-secret" # if using service principal
+export ARM_TENANT_ID="your-tenant-id"                   # if using service principal
+```
+
+### Infrastructure Setup
+
+If you haven't created the required Azure infrastructure:
+
+```bash
+# Create resource group
+az group create --name "kamaji" --location "italynorth"
+
+# Create virtual network with subnet
+az network vnet create \
+  --resource-group "kamaji" \
+  --name "kamaji-vnet" \
+  --address-prefix "10.10.0.0/16" \
+  --subnet-name "kamaji-subnet" \
+  --subnet-prefix "10.10.10.0/24" \
+  --location "italynorth"
+```
+
+## Scaling Configuration
+
+Each node pool supports both manual and automatic scaling modes:
+
+### Manual Scaling (enable_autoscaling = false)
+- **Terraform Control**: Direct control over instance count via `size`
+- **No Autoscaler**: Azure autoscaler is disabled for this pool
+- **Use Case**: When you want predictable, manual control over node count
+- **Scaling**: Change `size` in tfvars and run `terraform apply`
+
+### Automatic Scaling (enable_autoscaling = true)
+- **CPU-Based**: Automatic scaling based on CPU utilization
+- **Scale Out**: When average CPU > threshold for 5 minutes
+- **Scale In**: When average CPU < threshold for 5 minutes  
+- **Cooldown**: 1 minute between scaling actions
+- **Range**: min_size to max_size instances
+- **Note**: `size` sets the default capacity; actual scaling is managed by Azure
+
+## Security
+
+### Network Security Groups
+
+Automatically creates security group rules for:
+
+- **Outbound**: Allow all outbound traffic
+- **SSH**: Allow inbound SSH (port 22) from anywhere
+- **Cluster Internal**: Allow all traffic within the subnet
+
+### VM Security
+
+- SSH key-based authentication (password disabled)
+- Premium SSD storage by default
+- Automatic security updates via cloud-init
+
+## Monitoring
+
+### Instance Health
+
+- Automatic instance repair configurable per pool
+- Failed instances can be manually replaced or automatically via Azure policies
+
+### Scaling Metrics
+
+- CPU percentage monitoring every minute
+- 5-minute evaluation windows for scaling decisions
+- Configurable thresholds per pool for scale-out/scale-in
+
+## Troubleshooting
+
+### Common Issues
+
+1. **Subscription ID Error**
+   ```bash
+   # Make sure subscription ID is set
+   export TF_VAR_azure_subscription_id="your-subscription-id"
+   # Or update main.auto.tfvars
+   ```
+
+2. **Authentication Errors**
+   ```bash
+   az login
+   az account set --subscription "your-subscription-id"
+   ```
+
+3. **Resource Group Not Found**
+   ```bash
+   az group create --name "kamaji" --location "italynorth"
+   ```
+
+4. **VNet/Subnet Not Found**
+   ```bash
+   az network vnet create --resource-group "kamaji" --name "kamaji-vnet" --address-prefix "10.10.0.0/16"
+   ```
+
+5. **Kubeconfig Issues**
+   - Verify kubeconfig path exists
+   - Test cluster connectivity: `kubectl --kubeconfig=/path/to/config get nodes`
+
+### Debugging
+
+```bash
+# Enable detailed logging
+export TF_LOG=DEBUG
+
+# Check Azure resources
+az resource list --resource-group "kamaji" --output table
+
+# Verify VM scale sets (one per pool)
+az vmss list --resource-group "kamaji" --output table
+```
+
+## Outputs
+
+After successful deployment:
+
+```bash
+# View outputs
+terraform output
+
+# Get cluster information
+terraform output cluster_info
+
+# Get all node pools details  
+terraform output node_pools
+```
+
+Example output structure:
+```bash
+cluster_info = {
+  "azure_location" = "italynorth"
+  "node_pools_count" = 2
+  "node_pools_names" = ["default", "system"]
+  "tenant_cluster_name" = "customer-azure"
+}
+
+node_pools = {
+  "default" = {
+    "autoscale_settings" = { ... }
+    "network_security_group" = { ... }
+    "vmss_details" = { ... }
+  }
+  "system" = {
+    "autoscale_settings" = { ... }
+    "network_security_group" = { ... }
+    "vmss_details" = { ... }
+  }
+}
+```
+
+## Cleanup
+
+```bash
+# Destroy infrastructure
+terraform destroy
+
+# Confirm removal
+az resource list --resource-group "kamaji" --output table
+```
+
+## Support
+
+- **Documentation**: [Kamaji Documentation](https://kamaji.clastix.io)
+- **Bootstrap Script**: [YAKI](https://goyaki.clastix.io)
+- **Issues**: Report issues in the project repository 
--- a/providers/azure/backend.tf
+++ b/providers/azure/backend.tf
@@ -0,0 +1,9 @@
+# =============================================================================
+# TERRAFORM BACKEND
+# =============================================================================
+
+terraform {
+  backend "local" {
+    path = "tfstate/terraform.tfstate"
+  }
+} 
--- a/providers/azure/main.auto.tfvars.sample
+++ b/providers/azure/main.auto.tfvars.sample
@@ -0,0 +1,47 @@
+# Azure Configuration
+azure_subscription_id       = ""    # Azure subscription ID (REQUIRED)
+azure_location              = ""    # Azure region (e.g., "eastus", "westeurope", "italynorth")
+azure_resource_group_name   = ""    # Resource group name (e.g., "kamaji")
+azure_vnet_name             = ""    # Virtual network name (e.g., "kamaji-vnet")
+azure_subnet_name           = ""    # Subnet name (e.g., "kamaji-subnet")
+vnet_subnet_address_prefix  = ""    # Subnet CIDR (e.g., "10.10.10.0/24")
+
+# SSH Configuration
+ssh_user            = ""           # SSH username (e.g., "ubuntu", "azureuser")
+ssh_public_key_path = ""           # Path to SSH public key (e.g., "~/.ssh/id_rsa.pub")
+
+# Tenant Cluster Configuration
+tenant_cluster_name    = ""        # Name of the tenant cluster
+tenant_kubeconfig_path = ""        # Path to kubeconfig file (e.g., "~/.kube/config")
+
+# Node Pool Configuration
+node_pools = [
+  {
+    name           = ""             # Name of the node pool (e.g., "default", "workers")
+    size           = 0              # Number of nodes in the pool
+    node_disk_size = 0              # Disk size for each node (in GB)
+    vm_size        = ""             # Azure VM size (e.g., "Standard_D2s_v3", "Standard_D4s_v3")
+    min_size       = 0              # Minimum number of nodes (for autoscaling)
+    max_size       = 0              # Maximum number of nodes (for autoscaling)
+    node_disk_type = ""             # Storage type (Standard_LRS, Premium_LRS)
+    assign_public_ip = false        # Whether to assign public IP addresses
+    enable_autoscaling = false      # Enable CPU-based autoscaling
+    scale_out_cpu_threshold = 0     # CPU threshold percentage to trigger scale out
+    scale_in_cpu_threshold  = 0     # CPU threshold percentage to trigger scale in
+    enable_automatic_instance_repair = false  # Enable automatic instance repair
+    automatic_instance_repair_grace_period = ""  # Grace period (e.g., "PT30M")
+    upgrade_mode = ""               # Upgrade mode (Manual, Automatic, Rolling)
+  },
+  # Add more node pools here as needed.
+]
+
+# Example: List available VM sizes in your region
+# az vm list-sizes --location italynorth --output table
+
+# Tags for Azure resources
+tags = {
+  "ManagedBy"   = ""               # Who manages these resources (e.g., "Terraform", "Clastix")
+  "CreatedBy"   = ""               # What created these resources (e.g., "Terraform")
+  "Environment" = ""               # Environment name (e.g., "dev", "staging", "prod")
+  "Project"     = ""               # Project name (e.g., "Kamaji", "MyProject")
+} 
--- a/providers/azure/main.tf
+++ b/providers/azure/main.tf
@@ -0,0 +1,86 @@
+# =============================================================================
+# PROVIDERS
+# =============================================================================
+
+# Configure the Kubernetes provider
+provider "kubernetes" {
+  # Path to the kubeconfig file for accessing the tenant cluster
+  config_path = var.tenant_kubeconfig_path
+}
+
+# Configure the Azure Provider
+provider "azurerm" {
+  subscription_id = var.azure_subscription_id
+  features {}
+}
+
+# =============================================================================
+# BOOTSTRAP TOKEN
+# =============================================================================
+
+# Call the shared bootstrap-token module to generate the join command
+module "bootstrap_token" {
+  source          = "../../modules/bootstrap-token" # Updated to use shared module
+  kubeconfig_path = var.tenant_kubeconfig_path      # Pass the kubeconfig path to the module
+  yaki_url        = var.yaki_url                    # Pass the YAKI URL to the module
+}
+
+# =============================================================================
+# NODE POOLS
+# =============================================================================
+
+module "azure_node_pools" {
+  source = "../../modules/azure-node-pool" # Updated path to the azure-node-pool module
+
+  # Iterate over the list of node pools and call the module for each pool
+  for_each = { for pool in var.node_pools : pool.name => pool }
+
+  # Tenant cluster configuration
+  tenant_cluster_name = var.tenant_cluster_name
+
+  # Pool configuration
+  pool_name     = each.value.name
+  pool_size     = each.value.size
+  pool_min_size = each.value.min_size
+  pool_max_size = each.value.max_size
+
+  # Node configuration
+  node_disk_size = each.value.node_disk_size
+  node_disk_type = each.value.node_disk_type
+
+  # Azure configuration
+  azure_location                = var.azure_location
+  azure_resource_group_name     = var.azure_resource_group_name
+  azure_vnet_name              = var.azure_vnet_name
+  azure_subnet_name            = var.azure_subnet_name
+  vnet_subnet_address_prefix   = var.vnet_subnet_address_prefix
+  vm_size                      = each.value.vm_size
+  assign_public_ip             = each.value.assign_public_ip
+  vm_image_publisher           = var.vm_image_publisher
+  vm_image_offer              = var.vm_image_offer
+  vm_image_sku                = var.vm_image_sku
+  vm_image_version            = var.vm_image_version
+  tags                        = var.tags
+
+  # SSH configuration
+  ssh_user            = var.ssh_user
+  ssh_public_key_path = var.ssh_public_key_path
+
+  # Autoscaling configuration
+  enable_autoscaling           = each.value.enable_autoscaling
+  scale_out_cpu_threshold      = each.value.scale_out_cpu_threshold
+  scale_in_cpu_threshold       = each.value.scale_in_cpu_threshold
+
+  # Instance repair configuration
+  enable_automatic_instance_repair = each.value.enable_automatic_instance_repair
+  automatic_instance_repair_grace_period = each.value.automatic_instance_repair_grace_period
+  upgrade_mode                 = each.value.upgrade_mode
+
+  # Join command for bootstrapping nodes
+  runcmd = module.bootstrap_token.join_cmd
+
+  # Ensure the azure-node-pool module depends on the bootstrap-token module
+  depends_on = [
+    module.bootstrap_token
+  ]
+} 
--- a/providers/azure/outputs.tf
+++ b/providers/azure/outputs.tf
@@ -0,0 +1,40 @@
+# =============================================================================
+# BOOTSTRAP TOKEN OUTPUTS
+# =============================================================================
+
+output "bootstrap_token" {
+  description = "Bootstrap token details"
+  value = {
+    join_cmd = module.bootstrap_token.join_cmd
+  }
+  sensitive = true
+}
+
+# =============================================================================
+# AZURE NODE POOLS OUTPUTS
+# =============================================================================
+
+output "node_pools" {
+  description = "Azure node pools details"
+  value = {
+    for name, pool in module.azure_node_pools : name => {
+      vmss_details            = pool.vmss_details
+      autoscale_settings      = pool.autoscale_settings
+      network_security_group  = pool.network_security_group
+    }
+  }
+}
+
+# =============================================================================
+# CLUSTER INFORMATION
+# =============================================================================
+
+output "cluster_info" {
+  description = "Cluster information"
+  value = {
+    tenant_cluster_name = var.tenant_cluster_name
+    azure_location     = var.azure_location
+    node_pools_count   = length(var.node_pools)
+    node_pools_names   = [for pool in var.node_pools : pool.name]
+  }
+} 
--- a/providers/azure/vars.tf
+++ b/providers/azure/vars.tf
@@ -0,0 +1,144 @@
+# =============================================================================
+# CLUSTER CONFIGURATION
+# =============================================================================
+
+variable "tenant_cluster_name" {
+  description = "Name of the tenant cluster"
+  type        = string
+}
+
+variable "tenant_kubeconfig_path" {
+  description = "Path to the kubeconfig file for the tenant cluster"
+  type        = string
+  default     = "~/.kube/config"
+}
+
+# =============================================================================
+# BOOTSTRAP CONFIGURATION
+# =============================================================================
+
+variable "yaki_url" {
+  description = "URL to the YAKI script for node bootstrapping"
+  type        = string
+  default     = "https://goyaki.clastix.io"
+}
+
+# =============================================================================
+# NODE POOL CONFIGURATION
+# =============================================================================
+
+variable "node_pools" {
+  description = "List of Azure node pools with their configurations"
+  type = list(object({
+    name                   = string
+    size                   = number
+    node_disk_size         = number
+    vm_size                = string
+    min_size               = optional(number, 1)
+    max_size               = optional(number, 9)
+    node_disk_type         = optional(string, "Premium_LRS")
+    assign_public_ip       = optional(bool, true)
+    enable_autoscaling     = optional(bool, false)
+    scale_out_cpu_threshold = optional(number, 75)
+    scale_in_cpu_threshold  = optional(number, 25)
+    enable_automatic_instance_repair = optional(bool, false)
+    automatic_instance_repair_grace_period = optional(string, "PT30M")
+    upgrade_mode          = optional(string, "Manual")
+  }))
+}
+
+# =============================================================================
+# AZURE CONFIGURATION
+# =============================================================================
+
+variable "azure_subscription_id" {
+  description = "Azure subscription ID"
+  type        = string
+}
+
+variable "azure_location" {
+  description = "Azure region where resources will be created"
+  type        = string
+  default     = "italynorth"
+}
+
+variable "azure_resource_group_name" {
+  description = "Name of the Azure resource group"
+  type        = string
+  default     = "kamaji"
+}
+
+variable "azure_vnet_name" {
+  description = "Name of the Azure virtual network"
+  type        = string
+  default     = "kamaji-vnet"
+}
+
+variable "azure_subnet_name" {
+  description = "Name of the Azure subnet"
+  type        = string
+  default     = "kamaji-subnet"
+}
+
+variable "vnet_subnet_address_prefix" {
+  description = "CIDR block for the Azure subnet"
+  type        = string
+  default     = "10.10.10.0/24"
+}
+
+# =============================================================================
+# VM IMAGE CONFIGURATION
+# =============================================================================
+
+variable "vm_image_publisher" {
+  description = "Azure VM image publisher"
+  type        = string
+  default     = "Canonical"
+}
+
+variable "vm_image_offer" {
+  description = "Azure VM image offer"
+  type        = string
+  default     = "ubuntu-24_04-lts"
+}
+
+variable "vm_image_sku" {
+  description = "Azure VM image SKU"
+  type        = string
+  default     = "server"
+}
+
+variable "vm_image_version" {
+  description = "Azure VM image version"
+  type        = string
+  default     = "latest"
+}
+
+# =============================================================================
+# SSH CONFIGURATION
+# =============================================================================
+
+variable "ssh_user" {
+  description = "SSH user for node access"
+  type        = string
+  default     = "ubuntu"
+}
+
+variable "ssh_public_key_path" {
+  description = "Path to the SSH public key"
+  type        = string
+  default     = "~/.ssh/id_rsa.pub"
+}
+
+# =============================================================================
+# TAGS
+# =============================================================================
+
+variable "tags" {
+  description = "Tags applied to Azure resources"
+  type        = map(string)
+  default = {
+    "ManagedBy" = "Clastix"
+    "CreatedBy" = "Terraform"
+  }
+} 
--- a/providers/azure/versions.tf
+++ b/providers/azure/versions.tf
@@ -0,0 +1,24 @@
+terraform {
+  required_version = ">= 1.0"
+
+  required_providers {
+    azurerm = {
+      source  = "hashicorp/azurerm"
+      version = ">= 3.0"
+    }
+    cloudinit = {
+      source  = "hashicorp/cloudinit"
+      version = ">= 2.0"
+    }
+    kubernetes = {
+      source  = "hashicorp/kubernetes"
+      version = ">= 2.0"
+    }
+    tls = {
+      source  = "hashicorp/tls"
+      version = ">= 3.0"
+    }
+  }
+}
+
+