From aed278365810c96a31ffa0642c3e879a93af9900 Mon Sep 17 00:00:00 2001 From: Ryan Cragun Date: Mon, 26 Jun 2023 16:06:03 -0600 Subject: [PATCH] enos: use on-demand targets (#21459) Add an updated `target_ec2_instances` module that is capable of dynamically splitting target instances over subnet/az's that are compatible with the AMI architecture and the associated instance type for the architecture. Use the `target_ec2_instances` module where necessary. Ensure that `raft` storage scenarios don't provision unnecessary infrastructure with a new `target_ec2_shim` module. After a lot of trial, the state of Ec2 spot instance capacity, their associated APIs, and current support for different fleet types in AWS Terraform provider, have proven to make using spot instances for scenario targets too unreliable. The current state of each method: * `target_ec2_fleet`: unusable due to the fact that the `instant` type does not guarantee fulfillment of either `spot` or `on-demand` instance request types. The module does support both `on-demand` and `spot` request types and is capable of bidding across a maximum of four availability zones, which makes it an attractive choice if the `instant` type would always fulfill requests. Perhaps a `request` type with `wait_for_fulfillment` option like `aws_spot_fleet_request` would make it more viable for future consideration. * `target_ec2_spot_fleet`: more reliable if bidding for target instances that have capacity in the chosen zone. Issues in the AWS provider prevent us from bidding across multiple zones succesfully. Over the last 2-3 months target capacity for the instance types we'd prefer to use has dropped dramatically and the price is near-or-at on-demand. The volatility for nearly no cost savings means we should put this option on the shelf for now. * `target_ec2_instances`: the most reliable method we've got. It is now capable of automatically determing which subnets and availability zones to provision targets in and has been updated to be usable for both Vault and Consul targets. By default we use the cheapest medium instance types that we've found are reliable to test vault. * Update .gitignore * enos/modules/create_vpc: create a subnet for every availability zone * enos/modules/target_ec2_fleet: bid across the maximum of four availability zones for targets * enos/modules/target_ec2_spot_fleet: attempt to make the spot fleet bid across more availability zones for targets * enos/modules/target_ec2_instances: create module to use ec2:RunInstances for scenario targets * enos/modules/target_ec2_shim: create shim module to satisfy the target module interface * enos/scenarios: use target_ec2_shim for backend targets on raft storage scenarios * enos/modules/az_finder: remove unsed module Signed-off-by: Ryan Cragun --- .gitignore | 20 +- enos/enos-modules.hcl | 39 ++- enos/enos-scenario-agent.hcl | 2 +- enos/enos-scenario-autopilot.hcl | 4 +- enos/enos-scenario-replication.hcl | 14 +- enos/enos-scenario-smoke.hcl | 6 +- enos/enos-scenario-ui.hcl | 6 +- enos/enos-scenario-upgrade.hcl | 6 +- enos/modules/az_finder/main.tf | 28 -- enos/modules/create_vpc/main.tf | 15 +- enos/modules/create_vpc/outputs.tf | 5 - enos/modules/target_ec2_fleet/main.tf | 49 ++-- enos/modules/target_ec2_fleet/variables.tf | 2 +- enos/modules/target_ec2_instances/main.tf | 248 ++++++++++++++++++ enos/modules/target_ec2_instances/outputs.tf | 11 + .../modules/target_ec2_instances/variables.tf | 67 +++++ enos/modules/target_ec2_shim/main.tf | 46 ++++ enos/modules/target_ec2_spot_fleet/main.tf | 61 +++-- 18 files changed, 511 insertions(+), 118 deletions(-) delete mode 100644 enos/modules/az_finder/main.tf create mode 100644 enos/modules/target_ec2_instances/main.tf create mode 100644 enos/modules/target_ec2_instances/outputs.tf create mode 100644 enos/modules/target_ec2_instances/variables.tf create mode 100644 enos/modules/target_ec2_shim/main.tf diff --git a/.gitignore b/.gitignore index 81e0bbeb74..c8d0ffbb95 100644 --- a/.gitignore +++ b/.gitignore @@ -60,18 +60,13 @@ Vagrantfile !enos/**/*.hcl # Enos -enos/.enos -enos/enos-local.vars.hcl -enos/support -# Enos local Terraform files -enos/.terraform/* -enos/.terraform.lock.hcl -enos/*.tfstate -enos/*.tfstate.* -enos/**/.terraform/* -enos/**/.terraform.lock.hcl -enos/**/*.tfstate -enos/**/*.tfstate.* +.enos +enos-local.vars.hcl +enos/**/support +enos/**/kubeconfig +.terraform +.terraform.lock.hcl +.tfstate.* .DS_Store .idea @@ -134,4 +129,3 @@ website/components/node_modules tools/godoctests/.bin tools/gonilnilfunctions/.bin - diff --git a/enos/enos-modules.hcl b/enos/enos-modules.hcl index a1aa485ff2..e808f2fe98 100644 --- a/enos/enos-modules.hcl +++ b/enos/enos-modules.hcl @@ -61,27 +61,40 @@ module "shutdown_multiple_nodes" { source = "./modules/shutdown_multiple_nodes" } +# create target instances using ec2:CreateFleet module "target_ec2_fleet" { source = "./modules/target_ec2_fleet" - capacity_type = "on-demand" // or "spot", use on-demand until we can stabilize spot fleets - common_tags = var.tags - instance_mem_min = 4096 - instance_cpu_min = 2 - max_price = "0.1432" // On-demand cost for RHEL amd64 on t3.medium in us-east - project_name = var.project_name - ssh_keypair = var.aws_ssh_keypair_name + common_tags = var.tags + project_name = var.project_name + ssh_keypair = var.aws_ssh_keypair_name } +# create target instances using ec2:RunInstances +module "target_ec2_instances" { + source = "./modules/target_ec2_instances" + + common_tags = var.tags + project_name = var.project_name + ssh_keypair = var.aws_ssh_keypair_name +} + +# don't create instances but satisfy the module interface +module "target_ec2_shim" { + source = "./modules/target_ec2_shim" + + common_tags = var.tags + project_name = var.project_name + ssh_keypair = var.aws_ssh_keypair_name +} + +# create target instances using ec2:RequestSpotFleet module "target_ec2_spot_fleet" { source = "./modules/target_ec2_spot_fleet" - common_tags = var.tags - instance_mem_min = 4096 - instance_cpu_min = 2 - max_price = "0.1432" // On-demand cost for RHEL amd64 on t3.medium in us-east - project_name = var.project_name - ssh_keypair = var.aws_ssh_keypair_name + common_tags = var.tags + project_name = var.project_name + ssh_keypair = var.aws_ssh_keypair_name } module "vault_agent" { diff --git a/enos/enos-scenario-agent.hcl b/enos/enos-scenario-agent.hcl index 1cda1be038..eec1f800f7 100644 --- a/enos/enos-scenario-agent.hcl +++ b/enos/enos-scenario-agent.hcl @@ -93,7 +93,7 @@ scenario "agent" { } step "create_vault_cluster_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [step.create_vpc] providers = { diff --git a/enos/enos-scenario-autopilot.hcl b/enos/enos-scenario-autopilot.hcl index 7f8893b7f6..91d24d02eb 100644 --- a/enos/enos-scenario-autopilot.hcl +++ b/enos/enos-scenario-autopilot.hcl @@ -104,7 +104,7 @@ scenario "autopilot" { } step "create_vault_cluster_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [step.create_vpc] providers = { @@ -197,7 +197,7 @@ scenario "autopilot" { } step "create_vault_cluster_upgrade_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [step.create_vpc] providers = { diff --git a/enos/enos-scenario-replication.hcl b/enos/enos-scenario-replication.hcl index 0b69367727..0d9cd81d0a 100644 --- a/enos/enos-scenario-replication.hcl +++ b/enos/enos-scenario-replication.hcl @@ -113,7 +113,7 @@ scenario "replication" { # Create all of our instances for both primary and secondary clusters step "create_primary_cluster_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [ step.create_vpc, ] @@ -132,7 +132,7 @@ scenario "replication" { } step "create_primary_cluster_backend_targets" { - module = module.target_ec2_spot_fleet + module = matrix.primary_backend == "consul" ? module.target_ec2_instances : module.target_ec2_shim depends_on = [ step.create_vpc, ] @@ -142,7 +142,7 @@ scenario "replication" { } variables { - ami_id = step.ec2_info.ami_ids["amd64"]["ubuntu"]["22.04"] + ami_id = step.ec2_info.ami_ids["arm64"]["ubuntu"]["22.04"] awskms_unseal_key_arn = step.create_vpc.kms_key_arn cluster_tag_key = local.backend_tag_key common_tags = local.tags @@ -151,7 +151,7 @@ scenario "replication" { } step "create_primary_cluster_additional_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [ step.create_vpc, step.create_primary_cluster_targets, @@ -172,7 +172,7 @@ scenario "replication" { } step "create_secondary_cluster_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [step.create_vpc] providers = { @@ -189,7 +189,7 @@ scenario "replication" { } step "create_secondary_cluster_backend_targets" { - module = module.target_ec2_spot_fleet + module = matrix.secondary_backend == "consul" ? module.target_ec2_instances : module.target_ec2_shim depends_on = [step.create_vpc] providers = { @@ -197,7 +197,7 @@ scenario "replication" { } variables { - ami_id = step.ec2_info.ami_ids["amd64"]["ubuntu"]["22.04"] + ami_id = step.ec2_info.ami_ids["arm64"]["ubuntu"]["22.04"] awskms_unseal_key_arn = step.create_vpc.kms_key_arn cluster_tag_key = local.backend_tag_key common_tags = local.tags diff --git a/enos/enos-scenario-smoke.hcl b/enos/enos-scenario-smoke.hcl index eab5819c69..1dfa52bacf 100644 --- a/enos/enos-scenario-smoke.hcl +++ b/enos/enos-scenario-smoke.hcl @@ -114,7 +114,7 @@ scenario "smoke" { } step "create_vault_cluster_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [step.create_vpc] providers = { @@ -131,7 +131,7 @@ scenario "smoke" { } step "create_vault_cluster_backend_targets" { - module = module.target_ec2_spot_fleet + module = matrix.backend == "consul" ? module.target_ec2_instances : module.target_ec2_shim depends_on = [step.create_vpc] providers = { @@ -139,7 +139,7 @@ scenario "smoke" { } variables { - ami_id = step.ec2_info.ami_ids["amd64"]["ubuntu"]["22.04"] + ami_id = step.ec2_info.ami_ids["arm64"]["ubuntu"]["22.04"] awskms_unseal_key_arn = step.create_vpc.kms_key_arn cluster_tag_key = local.backend_tag_key common_tags = local.tags diff --git a/enos/enos-scenario-ui.hcl b/enos/enos-scenario-ui.hcl index 4da4ba2cbb..db5aff3adf 100644 --- a/enos/enos-scenario-ui.hcl +++ b/enos/enos-scenario-ui.hcl @@ -81,7 +81,7 @@ scenario "ui" { } step "create_vault_cluster_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [step.create_vpc] providers = { @@ -98,7 +98,7 @@ scenario "ui" { } step "create_vault_cluster_backend_targets" { - module = module.target_ec2_spot_fleet + module = matrix.backend == "consul" ? module.target_ec2_instances : module.target_ec2_shim depends_on = [step.create_vpc] providers = { @@ -106,7 +106,7 @@ scenario "ui" { } variables { - ami_id = step.ec2_info.ami_ids["amd64"]["ubuntu"]["22.04"] + ami_id = step.ec2_info.ami_ids["arm64"]["ubuntu"]["22.04"] awskms_unseal_key_arn = step.create_vpc.kms_key_arn cluster_tag_key = local.backend_tag_key common_tags = local.tags diff --git a/enos/enos-scenario-upgrade.hcl b/enos/enos-scenario-upgrade.hcl index 927d67857a..b61828110d 100644 --- a/enos/enos-scenario-upgrade.hcl +++ b/enos/enos-scenario-upgrade.hcl @@ -109,7 +109,7 @@ scenario "upgrade" { } step "create_vault_cluster_targets" { - module = module.target_ec2_spot_fleet + module = module.target_ec2_instances depends_on = [step.create_vpc] providers = { @@ -126,7 +126,7 @@ scenario "upgrade" { } step "create_vault_cluster_backend_targets" { - module = module.target_ec2_spot_fleet + module = matrix.backend == "consul" ? module.target_ec2_instances : module.target_ec2_shim depends_on = [step.create_vpc] providers = { @@ -134,7 +134,7 @@ scenario "upgrade" { } variables { - ami_id = step.ec2_info.ami_ids["amd64"]["ubuntu"]["22.04"] + ami_id = step.ec2_info.ami_ids["arm64"]["ubuntu"]["22.04"] awskms_unseal_key_arn = step.create_vpc.kms_key_arn cluster_tag_key = local.backend_tag_key common_tags = local.tags diff --git a/enos/modules/az_finder/main.tf b/enos/modules/az_finder/main.tf deleted file mode 100644 index 3508ff0cc3..0000000000 --- a/enos/modules/az_finder/main.tf +++ /dev/null @@ -1,28 +0,0 @@ -# Copyright (c) HashiCorp, Inc. -# SPDX-License-Identifier: MPL-2.0 - -terraform { - required_providers { - aws = { - source = "hashicorp/aws" - } - } -} - -variable "instance_type" { - default = ["t3.small"] - type = list(string) -} - -data "aws_ec2_instance_type_offerings" "infra" { - filter { - name = "instance-type" - values = var.instance_type - } - - location_type = "availability-zone" -} - -output "availability_zones" { - value = data.aws_ec2_instance_type_offerings.infra.locations -} diff --git a/enos/modules/create_vpc/main.tf b/enos/modules/create_vpc/main.tf index 0ebc8bd32f..a571fab9a1 100644 --- a/enos/modules/create_vpc/main.tf +++ b/enos/modules/create_vpc/main.tf @@ -1,4 +1,11 @@ -data "aws_region" "current" {} +data "aws_availability_zones" "available" { + state = "available" + + filter { + name = "zone-name" + values = ["*"] + } +} resource "random_string" "cluster_id" { length = 8 @@ -34,14 +41,16 @@ resource "aws_vpc" "vpc" { } resource "aws_subnet" "subnet" { + count = length(data.aws_availability_zones.available.names) vpc_id = aws_vpc.vpc.id - cidr_block = var.cidr + cidr_block = cidrsubnet(var.cidr, 8, count.index) + availability_zone = data.aws_availability_zones.available.names[count.index] map_public_ip_on_launch = true tags = merge( var.common_tags, { - "Name" = "${var.name}-subnet" + "Name" = "${var.name}-subnet-${data.aws_availability_zones.available.names[count.index]}" }, ) } diff --git a/enos/modules/create_vpc/outputs.tf b/enos/modules/create_vpc/outputs.tf index 5886406eed..e4e53d9b8d 100644 --- a/enos/modules/create_vpc/outputs.tf +++ b/enos/modules/create_vpc/outputs.tf @@ -1,8 +1,3 @@ -output "aws_region" { - description = "AWS Region for resources" - value = data.aws_region.current.name -} - output "vpc_id" { description = "Created VPC ID" value = aws_vpc.vpc.id diff --git a/enos/modules/target_ec2_fleet/main.tf b/enos/modules/target_ec2_fleet/main.tf index 24ac95a574..1dac694567 100644 --- a/enos/modules/target_ec2_fleet/main.tf +++ b/enos/modules/target_ec2_fleet/main.tf @@ -76,15 +76,23 @@ resource "random_string" "unique_id" { special = false } +// ec2:CreateFleet only allows up to 4 InstanceRequirements overrides so we can only ever request +// a fleet across 4 or fewer subnets if we want to bid with InstanceRequirements instead of +// weighted instance types. +resource "random_shuffle" "subnets" { + input = data.aws_subnets.vpc.ids + result_count = 4 +} + locals { - spot_allocation_strategy = "price-capacity-optimized" + spot_allocation_strategy = "lowestPrice" on_demand_allocation_strategy = "lowestPrice" instances = toset([for idx in range(var.instance_count) : tostring(idx)]) cluster_name = coalesce(var.cluster_name, random_string.random_cluster_name.result) name_prefix = "${var.project_name}-${local.cluster_name}-${random_string.unique_id.result}" fleet_tag = "${local.name_prefix}-spot-fleet-target" fleet_tags = { - Name = "${local.name_prefix}-target" + Name = "${local.name_prefix}-${var.cluster_tag_key}-target" "${var.cluster_tag_key}" = local.cluster_name Fleet = local.fleet_tag } @@ -218,6 +226,20 @@ resource "aws_launch_template" "target" { name = aws_iam_instance_profile.target.name } + instance_requirements { + burstable_performance = "included" + + memory_mib { + min = var.instance_mem_min + max = var.instance_mem_max + } + + vcpu_count { + min = var.instance_cpu_min + max = var.instance_cpu_max + } + } + network_interfaces { associate_public_ip_address = true delete_on_termination = true @@ -251,7 +273,9 @@ resource "aws_launch_template" "target" { # Unless we see capacity issues or instances being shut down then we ought to # stick with that strategy. resource "aws_ec2_fleet" "targets" { - terminate_instances = true // termiante instances when we "delete" the fleet + replace_unhealthy_instances = false + terminate_instances = true // terminate instances when we "delete" the fleet + terminate_instances_with_expiration = false tags = merge( var.common_tags, local.fleet_tags, @@ -264,22 +288,11 @@ resource "aws_ec2_fleet" "targets" { version = aws_launch_template.target.latest_version } - override { - max_price = var.max_price - subnet_id = data.aws_subnets.vpc.ids[0] + dynamic "override" { + for_each = random_shuffle.subnets.result - instance_requirements { - burstable_performance = "included" - - memory_mib { - min = var.instance_mem_min - max = var.instance_mem_max - } - - vcpu_count { - min = var.instance_cpu_min - max = var.instance_cpu_max - } + content { + subnet_id = override.value } } } diff --git a/enos/modules/target_ec2_fleet/variables.tf b/enos/modules/target_ec2_fleet/variables.tf index 377c59de75..678cd70ee6 100644 --- a/enos/modules/target_ec2_fleet/variables.tf +++ b/enos/modules/target_ec2_fleet/variables.tf @@ -25,7 +25,7 @@ variable "common_tags" { description = "Common tags for cloud resources" type = map(string) default = { - Project = "Vault" + Project = "vault-ci" } } diff --git a/enos/modules/target_ec2_instances/main.tf b/enos/modules/target_ec2_instances/main.tf new file mode 100644 index 0000000000..7eb506a3bf --- /dev/null +++ b/enos/modules/target_ec2_instances/main.tf @@ -0,0 +1,248 @@ +terraform { + required_providers { + # We need to specify the provider source in each module until we publish it + # to the public registry + enos = { + source = "app.terraform.io/hashicorp-qti/enos" + version = ">= 0.3.24" + } + } +} + +data "aws_vpc" "vpc" { + id = var.vpc_id +} + +data "aws_ami" "ami" { + filter { + name = "image-id" + values = [var.ami_id] + } +} + +data "aws_ec2_instance_type_offerings" "instance" { + filter { + name = "instance-type" + values = [local.instance_type] + } + + location_type = "availability-zone" +} + +data "aws_availability_zones" "available" { + state = "available" + + filter { + name = "zone-name" + values = data.aws_ec2_instance_type_offerings.instance.locations + } +} + +data "aws_subnets" "vpc" { + filter { + name = "availability-zone" + values = data.aws_availability_zones.available.names + } + + filter { + name = "vpc-id" + values = [var.vpc_id] + } +} + +data "aws_kms_key" "kms_key" { + key_id = var.awskms_unseal_key_arn +} + +data "aws_iam_policy_document" "target" { + statement { + resources = ["*"] + + actions = [ + "ec2:DescribeInstances", + "secretsmanager:*" + ] + } + + statement { + resources = [var.awskms_unseal_key_arn] + + actions = [ + "kms:DescribeKey", + "kms:ListKeys", + "kms:Encrypt", + "kms:Decrypt", + "kms:GenerateDataKey" + ] + } +} + +data "aws_iam_policy_document" "target_instance_role" { + statement { + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +data "enos_environment" "localhost" {} + +locals { + cluster_name = coalesce(var.cluster_name, random_string.cluster_name.result) + instance_type = local.instance_types[data.aws_ami.ami.architecture] + instance_types = { + "arm64" = var.instance_types["arm64"] + "x86_64" = var.instance_types["amd64"] + } + instances = toset([for idx in range(var.instance_count) : tostring(idx)]) + name_prefix = "${var.project_name}-${local.cluster_name}" +} + +resource "random_string" "cluster_name" { + length = 8 + lower = true + upper = false + numeric = false + special = false +} + +resource "aws_iam_role" "target_instance_role" { + name = "target_instance_role-${random_string.cluster_name.result}" + assume_role_policy = data.aws_iam_policy_document.target_instance_role.json +} + +resource "aws_iam_instance_profile" "target" { + name = "${local.name_prefix}-target" + role = aws_iam_role.target_instance_role.name +} + +resource "aws_iam_role_policy" "target" { + name = "${local.name_prefix}-target" + role = aws_iam_role.target_instance_role.id + policy = data.aws_iam_policy_document.target.json +} + +resource "aws_security_group" "target" { + name = "${local.name_prefix}-target" + description = "Target instance security group" + vpc_id = var.vpc_id + + # SSH traffic + ingress { + from_port = 22 + to_port = 22 + protocol = "tcp" + cidr_blocks = flatten([ + formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses), + join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block), + ]) + } + + # Vault traffic + ingress { + from_port = 8200 + to_port = 8201 + protocol = "tcp" + cidr_blocks = flatten([ + formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses), + join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block), + formatlist("%s/32", var.ssh_allow_ips) + ]) + } + + # Consul traffic + ingress { + from_port = 8300 + to_port = 8302 + protocol = "tcp" + cidr_blocks = flatten([ + formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses), + join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block), + ]) + } + + ingress { + from_port = 8301 + to_port = 8302 + protocol = "udp" + cidr_blocks = flatten([ + formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses), + join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block), + ]) + } + + ingress { + from_port = 8500 + to_port = 8503 + protocol = "tcp" + cidr_blocks = flatten([ + formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses), + join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block), + ]) + } + + ingress { + from_port = 8600 + to_port = 8600 + protocol = "tcp" + cidr_blocks = flatten([ + formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses), + join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block), + ]) + } + + ingress { + from_port = 8600 + to_port = 8600 + protocol = "udp" + cidr_blocks = flatten([ + formatlist("%s/32", data.enos_environment.localhost.public_ip_addresses), + join(",", data.aws_vpc.vpc.cidr_block_associations.*.cidr_block), + ]) + } + + # Internal traffic + ingress { + from_port = 0 + to_port = 0 + protocol = "-1" + self = true + } + + # External traffic + egress { + from_port = 0 + to_port = 0 + protocol = "-1" + cidr_blocks = ["0.0.0.0/0"] + } + + tags = merge( + var.common_tags, + { + Name = "${local.name_prefix}-sg" + }, + ) +} + +resource "aws_instance" "targets" { + for_each = local.instances + + ami = var.ami_id + iam_instance_profile = aws_iam_instance_profile.target.name + instance_type = local.instance_type + key_name = var.ssh_keypair + subnet_id = data.aws_subnets.vpc.ids[tonumber(each.key) % length(data.aws_subnets.vpc.ids)] + vpc_security_group_ids = [aws_security_group.target.id] + + tags = merge( + var.common_tags, + { + Name = "${local.name_prefix}-${var.cluster_tag_key}-target" + "${var.cluster_tag_key}" = local.cluster_name + }, + ) +} diff --git a/enos/modules/target_ec2_instances/outputs.tf b/enos/modules/target_ec2_instances/outputs.tf new file mode 100644 index 0000000000..9428bfdb99 --- /dev/null +++ b/enos/modules/target_ec2_instances/outputs.tf @@ -0,0 +1,11 @@ +output "cluster_name" { + value = local.cluster_name +} + +output "hosts" { + description = "The ec2 instance target hosts" + value = { for idx in range(var.instance_count) : idx => { + public_ip = aws_instance.targets[idx].public_ip + private_ip = aws_instance.targets[idx].private_ip + } } +} diff --git a/enos/modules/target_ec2_instances/variables.tf b/enos/modules/target_ec2_instances/variables.tf new file mode 100644 index 0000000000..cad05de066 --- /dev/null +++ b/enos/modules/target_ec2_instances/variables.tf @@ -0,0 +1,67 @@ +variable "ami_id" { + description = "The machine image identifier" + type = string +} + +variable "awskms_unseal_key_arn" { + type = string + description = "The AWSKMS key ARN if using the awskms unseal method. If specified the instances will be granted kms permissions to the key" + default = null +} + +variable "cluster_name" { + type = string + description = "A unique cluster identifier" + default = null +} + +variable "cluster_tag_key" { + type = string + description = "The key name for the cluster tag" + default = "TargetCluster" +} + +variable "common_tags" { + description = "Common tags for cloud resources" + type = map(string) + default = { "Project" : "vault-ci" } +} + +variable "instance_count" { + description = "The number of target instances to create" + type = number + default = 3 +} + +variable "instance_types" { + description = "The instance types to use depending on architecture" + type = object({ + amd64 = string + arm64 = string + }) + default = { + amd64 = "t3a.medium" + arm64 = "t4g.medium" + } +} + +variable "project_name" { + description = "A unique project name" + type = string +} + +variable "ssh_allow_ips" { + description = "Allowlisted IP addresses for SSH access to target nodes. The IP address of the machine running Enos will automatically allowlisted" + type = list(string) + default = [] +} + +variable "ssh_keypair" { + description = "SSH keypair used to connect to EC2 instances" + type = string +} + +variable "vpc_id" { + description = "The identifier of the VPC where the target instances will be created" + type = string +} diff --git a/enos/modules/target_ec2_shim/main.tf b/enos/modules/target_ec2_shim/main.tf new file mode 100644 index 0000000000..90e044f251 --- /dev/null +++ b/enos/modules/target_ec2_shim/main.tf @@ -0,0 +1,46 @@ +terraform { + required_providers { + # We need to specify the provider source in each module until we publish it + # to the public registry + enos = { + source = "app.terraform.io/hashicorp-qti/enos" + version = ">= 0.3.24" + } + } +} + +variable "ami_id" { default = null } +variable "awskms_unseal_key_arn" { default = null } +variable "cluster_name" { default = null } +variable "cluster_tag_key" { default = null } +variable "common_tags" { default = null } +variable "instance_count" { default = 3 } +variable "instance_cpu_max" { default = null } +variable "instance_cpu_min" { default = null } +variable "instance_mem_max" { default = null } +variable "instance_mem_min" { default = null } +variable "instance_types" { default = null } +variable "max_price" { default = null } +variable "project_name" { default = null } +variable "ssh_allow_ips" { default = null } +variable "ssh_keypair" { default = null } +variable "vpc_id" { default = null } + +resource "random_string" "cluster_name" { + length = 8 + lower = true + upper = false + numeric = false + special = false +} + +output "cluster_name" { + value = coalesce(var.cluster_name, random_string.cluster_name.result) +} + +output "hosts" { + value = { for idx in range(var.instance_count) : idx => { + public_ip = "null-public-${idx}" + private_ip = "null-private-${idx}" + } } +} diff --git a/enos/modules/target_ec2_spot_fleet/main.tf b/enos/modules/target_ec2_spot_fleet/main.tf index 522167c20c..2a51230645 100644 --- a/enos/modules/target_ec2_spot_fleet/main.tf +++ b/enos/modules/target_ec2_spot_fleet/main.tf @@ -160,6 +160,14 @@ resource "random_string" "unique_id" { special = false } +// ec2:RequestSpotFleet only allows up to 4 InstanceRequirements overrides so we can only ever +// request a fleet across 4 or fewer subnets if we want to bid with InstanceRequirements instead of +// weighted instance types. +resource "random_shuffle" "subnets" { + input = data.aws_subnets.vpc.ids + result_count = 4 +} + locals { allocation_strategy = "lowestPrice" instances = toset([for idx in range(var.instance_count) : tostring(idx)]) @@ -167,7 +175,7 @@ locals { name_prefix = "${var.project_name}-${local.cluster_name}-${random_string.unique_id.result}" fleet_tag = "${local.name_prefix}-spot-fleet-target" fleet_tags = { - Name = "${local.name_prefix}-target" + Name = "${local.name_prefix}-${var.cluster_tag_key}-target" "${var.cluster_tag_key}" = local.cluster_name Fleet = local.fleet_tag } @@ -304,14 +312,29 @@ resource "aws_security_group" "target" { } resource "aws_launch_template" "target" { - name = "${local.name_prefix}-target" - image_id = var.ami_id - key_name = var.ssh_keypair + name = "${local.name_prefix}-target" + image_id = var.ami_id + instance_type = null + key_name = var.ssh_keypair iam_instance_profile { name = aws_iam_instance_profile.target.name } + instance_requirements { + burstable_performance = "included" + + memory_mib { + min = var.instance_mem_min + max = var.instance_mem_max + } + + vcpu_count { + min = var.instance_cpu_min + max = var.instance_cpu_max + } + } + network_interfaces { associate_public_ip_address = true delete_on_termination = true @@ -353,6 +376,7 @@ resource "aws_spot_fleet_request" "targets" { // to 1 to avoid rebuilding the fleet on a re-run. For any other strategy // set it to zero to avoid rebuilding the fleet on a re-run. instance_pools_to_use_count = local.allocation_strategy == "lowestPrice" ? 1 : 0 + spot_price = var.max_price target_capacity = var.instance_count terminate_instances_on_delete = true wait_for_fulfillment = true @@ -363,24 +387,25 @@ resource "aws_spot_fleet_request" "targets" { version = aws_launch_template.target.latest_version } - overrides { - spot_price = var.max_price - subnet_id = data.aws_subnets.vpc.ids[0] + // We cannot currently use more than one subnet[0]. Until the bug has been resolved + // we'll choose a random subnet. It would be ideal to bid across all subnets to get + // the absolute cheapest available at the time of bidding. + // + // [0] https://github.com/hashicorp/terraform-provider-aws/issues/30505 - instance_requirements { - burstable_performance = "included" + /* + dynamic "overrides" { + for_each = random_shuffle.subnets.result - memory_mib { - min = var.instance_mem_min - max = var.instance_mem_max - } - - vcpu_count { - min = var.instance_cpu_min - max = var.instance_cpu_max - } + content { + subnet_id = overrides.value } } + */ + + overrides { + subnet_id = random_shuffle.subnets.result[0] + } } tags = merge(