wheresmyspaceship
wheresmyspaceship

Reputation: 1090

Why does my AWS lb target group stay in a "draining" state?

I'm trying to deploy a docker image via terraform and AWS ECS using Fargate. Using terraform, I've created a VPC, two private and two public subnets, a ECR repository to store the image, an ECS cluster, ECS task, ECS service, and a load balancer with a target group.

These resources are created successfully, but the target group is constantly:

  1. varying in the number of targets that are shown. For instance, refreshing will sometimes show 3 registered targets. Sometimes it will show 4.
  2. Usually have a status of "draining" and details that say "Target deregistration in progress". Sometimes one of them will have a status of "initial" and details that say "Target registration in progress"

Additionally, visiting the URL of the load balancer returns a "503 Service Temporarily Unavailable"

I came across this post, that led to me this article, which helped me better understand how Fargate works but I'm having trouble translating this into the terraform + aws method I'm trying to implement.

I'm suspecting the issue could be in how the security groups are allowing/disallowing traffic but I'm still a novice with dev ops stuff so I appreciate in advance any help offered.

Here is the terraform main.tf that I've used to create the resources. Most of it is gathered from different tutorials and adjusted with updates whenever terraform screamed at me about a deprecation.

So, which parts of the following configuration is wrong and is causing the target groups to constantly be in a draining state?

Again, thanks in advance for any help or insights provided!

# ..terraform/main.tf

# START CREATE VPC
resource "aws_vpc" "vpc" {
  cidr_block = "10.0.0.0/16"
  instance_tenancy= "default"
  enable_dns_hostnames = true
  enable_dns_support = true
  enable_classiclink = false

  tags = {
    Name = "vpc"
  }
}
# END CREATE VPC



# START CREATE PRIVATE AND PUBLIC SUBNETS
resource "aws_subnet" "public_subnet_1" {
  vpc_id = aws_vpc.vpc.id
  cidr_block = "10.0.1.0/24"
  map_public_ip_on_launch = true
  availability_zone = "us-east-1a"

  tags = {
    Name = "public-subnet-1"
  }
}

resource "aws_subnet" "public_subnet_2" {
  vpc_id = aws_vpc.vpc.id
  cidr_block = "10.0.2.0/24"
  map_public_ip_on_launch = true
  availability_zone = "us-east-1b"

  tags = {
    Name = "public-subnet-2"
  }
}

resource "aws_subnet" "private_subnet_1" {
  vpc_id = aws_vpc.vpc.id
  cidr_block = "10.0.3.0/24"
  map_public_ip_on_launch = false
  availability_zone = "us-east-1a"

  tags = {
    Name = "private-subnet-1"
  }
}

resource "aws_subnet" "private_subnet_2" {
  vpc_id = aws_vpc.vpc.id
  cidr_block = "10.0.4.0/24"
  map_public_ip_on_launch = false
  availability_zone = "us-east-1b"

  tags = {
    Name = "private-subnet-1"
  }
}
# END CREATE PRIVATE AND PUBLIC SUBNETS



# START CREATE GATEWAY
resource "aws_internet_gateway" "vpc_gateway" {
  vpc_id = aws_vpc.vpc.id

  tags = {
    Name = "vpc-gateway"
  }
}
# END CREATE GATEWAY



# START CREATE ROUTE TABLE AND ASSOCIATIONS
resource "aws_route_table" "public_route_table" {
  vpc_id = aws_vpc.vpc.id
  route {
    cidr_block = "0.0.0.0/0"
    gateway_id = aws_internet_gateway.vpc_gateway.id
  }

  tags = {
    Name = "public-route-table"
  }
}

resource "aws_route_table_association" "route_table_association_1" {
  subnet_id = aws_subnet.public_subnet_1.id
  route_table_id = aws_route_table.public_route_table.id
}

resource "aws_route_table_association" "route_table_association_2" {
  subnet_id = aws_subnet.public_subnet_2.id
  route_table_id = aws_route_table.public_route_table.id
}
# END CREATE ROUTE TABLE AND ASSOCIATIONS



# START CREATE ECR REPOSITORY
resource "aws_ecr_repository" "api_ecr_repository" {
 name = "api-ecr-repository"
}
# END CREATE ECR REPOSITORY



# START CREATE ECS CLUSTER
resource "aws_ecs_cluster" "api_cluster" {
  name = "api-cluster"
}
# END CREATE ECS CLUSTER



# START CREATE ECS TASK AND DESIGNATE 'FARGATE'
resource "aws_ecs_task_definition" "api_cluster_task" {
  family = "api-cluster-task"
  container_definitions = <<DEFINITION
  [
    {
      "name": "api-cluster-task",
      "image": "${aws_ecr_repository.api_ecr_repository.repository_url}",
      "essential": true,
      "portMappings": [
        {
          "containerPort": 4000,
          "hostPort": 4000
        }
      ],
      "memory": 512,
      "cpu": 256
    }
  ]
  DEFINITION
  requires_compatibilities = ["FARGATE"]
  network_mode             = "awsvpc"
  memory                   = 512
  cpu                      = 256
  execution_role_arn       = aws_iam_role.ecs_task_execution_role.arn
}
# END CREATE ECS TASK AND DESIGNATE 'FARGATE'



# START CREATE TASK POLICIES
data "aws_iam_policy_document" "assume_role_policy" {
  version = "2012-10-17"
  statement {
    sid = ""
    effect = "Allow"
    actions = ["sts:AssumeRole"]

      principals {
        type        = "Service"
        identifiers = ["ecs-tasks.amazonaws.com"]
      }
  }
}

resource "aws_iam_role" "ecs_task_execution_role" {
  name               = "ecs-take-execution-role"
  assume_role_policy = data.aws_iam_policy_document.assume_role_policy.json
}

resource "aws_iam_role_policy_attachment" "ecs_task_execution_role_attachment" {
  role       = aws_iam_role.ecs_task_execution_role.name
  policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonECSTaskExecutionRolePolicy"
}
# END CREATE TASK POLICIES



# START CREATE ECS SERVICE
resource "aws_ecs_service" "api_cluster_service" {
  name = "api-cluster-service"
  cluster = aws_ecs_cluster.api_cluster.id
  task_definition = aws_ecs_task_definition.api_cluster_task.arn
  launch_type = "FARGATE"
  desired_count = 1

  load_balancer {
    target_group_arn = aws_lb_target_group.api_lb_target_group.arn
    container_name = aws_ecs_task_definition.api_cluster_task.family
    container_port = 4000
  }

  network_configuration {
    security_groups = [aws_security_group.ecs_tasks.id]
    subnets = [
      aws_subnet.public_subnet_1.id,
      aws_subnet.public_subnet_2.id
    ]
    assign_public_ip = true
  }

  depends_on = [aws_lb_listener.api_lb_listener, aws_iam_role_policy_attachment.ecs_task_execution_role_attachment]
}


resource "aws_security_group" "api_cluster_security_group" {
  vpc_id = aws_vpc.vpc.id

  ingress {
    from_port = 0
    to_port = 0
    protocol = -1
    security_groups = [aws_security_group.load_balancer_security_group.id]
  }

  egress {
    from_port = 0
    to_port = 0
    protocol = -1
    cidr_blocks = ["0.0.0.0/0"]
  }
}
# END CREATE ECS SERVICE



# CREATE LOAD BALANCER
resource "aws_alb" "api_load_balancer" {
  name = "api-load-balancer"
  load_balancer_type = "application"
  subnets = [
    aws_subnet.public_subnet_1.id,
    aws_subnet.public_subnet_2.id
  ]
  security_groups = [aws_security_group.load_balancer_security_group.id]
}

resource "aws_security_group" "load_balancer_security_group" {
  name = "allow-load-balancer-traffic"
  vpc_id = aws_vpc.vpc.id
  ingress {
    from_port = 80
    to_port = 80
    protocol = "tcp"
    cidr_blocks = ["0.0.0.0/0"] 
  }

  egress {
    from_port = 0
    to_port = 0
    protocol = "-1"
    cidr_blocks = ["0.0.0.0/0"]
  }
}
# END CREATE LOAD BALANCER



# CREATE ECS TASK SECURITY GROUP
resource "aws_security_group" "ecs_tasks" {
  name        = "ecs-tasks-sg"
  description = "allow inbound access from the ALB only"
  vpc_id = aws_vpc.vpc.id

  ingress {
    protocol        = "tcp"
    from_port       = 4000
    to_port         = 4000
    cidr_blocks     = ["0.0.0.0/0"]
    security_groups = [aws_security_group.load_balancer_security_group.id]
  }

  egress {
    protocol    = "-1"
    from_port   = 0
    to_port     = 0
    cidr_blocks = ["0.0.0.0/0"]
  }
}
# END ECS TASK SECURITY GROUP



# START CREATE LOAD BALANCER TARGET GROUP
resource "aws_lb_target_group" "api_lb_target_group" {
  name = "api-lb-target-group"
  vpc_id = aws_vpc.vpc.id
  port = 80
  protocol = "HTTP"
  target_type = "ip"

  health_check {
    healthy_threshold= "3"
    interval = "90"
    protocol = "HTTP"
    matcher = "200-299"
    timeout = "20"
    path = "/"
    unhealthy_threshold = "2"
  }
}
# END CREATE LOAD BALANCER TARGET GROUP



# START CREATE LOAD BALANCER LISTENER
resource "aws_lb_listener" "api_lb_listener" {
  load_balancer_arn = aws_alb.api_load_balancer.arn
  port = 80
  protocol = "HTTP"

  default_action {
    type = "forward"
    target_group_arn = aws_lb_target_group.api_lb_target_group.arn
  }
}
# END CREATE LOAD BALANCER LISTENER

Upvotes: 4

Views: 11613

Answers (1)

Marcin
Marcin

Reputation: 238707

Your are not using api_cluster_security_group at all in your setup, thus its not clear what it is its purpose. Also in your aws_security_group.ecs_tasks you are allowing only port 4000. However, due to dynamic port mapping between ALB and ECS services, you should allow all ports, not only 4000.

There could be other issues, which are not apparent yet.

Upvotes: 1

Related Questions