Reputation: 1
Answer did not have pointers for this problem, because rollback deletes the stack.
Below is the CloudFormation template, written to launch Jenkins docker container in ECS container instance(DesiredCount: 1), in default public subnet.
Jenkins docker image is publicly available in dockerhub, which is launched in ECS container instance.
We used ECS optimised AMI image(ami-05958d7635caa4d04
) ca-central-1
region, that run docker version 18.06.1
.
{
"AWSTemplateFormatVersion": "2010-09-09",
"Description": "Jenkins Stack",
"Parameters":{
"VpcId": {
"Type": "AWS::EC2::VPC::Id",
"Description": "The target VPC Id"
},
"SubnetId": {
"Type": "AWS::EC2::Subnet::Id",
"Description": "The target subnet Id"
},
"KeyName": {
"Type": "String",
"Description": "The key pair that is allowed SSH access"
}
},
"Resources":{
"EC2Instance":{
"Type": "AWS::EC2::Instance",
"Properties":{
"ImageId": "ami-05958d7635caa4d04",
"InstanceType": "t2.micro",
"SubnetId": { "Ref": "SubnetId"},
"KeyName": { "Ref": "KeyName"},
"SecurityGroupIds": [ { "Ref": "EC2InstanceSecurityGroup"} ],
"IamInstanceProfile": { "Ref" : "EC2InstanceProfile"},
"UserData":{
"Fn::Base64": { "Fn::Join": ["", [
"#!/bin/bash\n",
"echo ECS_CLUSTER=", { "Ref": "EcsCluster" }, " >> /etc/ecs/ecs.config\n",
"groupadd -g 1000 jenkins\n",
"useradd -u 1000 -g jenkins jenkins\n",
"mkdir -p /ecs/jenkins_home\n",
"chown -R jenkins:jenkins /ecs/jenkins_home\n"
] ] }
},
"Tags": [ { "Key": "Name", "Value": { "Fn::Join": ["", [ { "Ref": "AWS::StackName"}, "-instance" ] ]} }]
}
},
"EC2InstanceSecurityGroup":{
"Type": "AWS::EC2::SecurityGroup",
"Properties": {
"GroupDescription": { "Fn::Join": ["", [ { "Ref": "AWS::StackName" }, " ingress security group" ] ] },
"VpcId": { "Ref": "VpcId" },
"SecurityGroupIngress": [
{
"IpProtocol": "tcp",
"FromPort": "8080",
"ToPort": "8080",
"SourceSecurityGroupId": { "Ref": "ElbSecurityGroup"}
},
{
"IpProtocol": "tcp",
"FromPort": "22",
"ToPort": "22",
"CidrIp": "0.0.0.0/0"
}
]
}
},
"EC2InstanceProfile": {
"Type": "AWS::IAM::InstanceProfile",
"Properties": {
"Path": "/",
"Roles": [ { "Ref": "EC2InstanceRole" } ]
}
},
"EC2InstanceRole": {
"Type": "AWS::IAM::Role",
"Properties": {
"AssumeRolePolicyDocument":{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": { "Service": [ "ec2.amazonaws.com" ] },
"Action": [ "sts:AssumeRole" ]
}
]
},
"Path": "/",
"ManagedPolicyArns": [ "arn:aws:iam::aws:policy/service-role/AmazonEC2ContainerServiceforEC2Role" ]
}
},
"ElbSecurityGroup": {
"Type": "AWS::EC2::SecurityGroup",
"Properties": {
"GroupDescription": { "Fn::Join": ["", [ { "Ref": "AWS::StackName" }, " ELB ingress security group" ] ] },
"VpcId": { "Ref": "VpcId"},
"SecurityGroupIngress": [
{
"IpProtocol": "tcp",
"FromPort": "80",
"ToPort": "80",
"CidrIp": "0.0.0.0/0"
}
],
"Tags": [ { "Key": "Name", "Value": { "Fn::Join": ["", [ { "Ref": "AWS::StackName" }, "-elb-sg" ] ] } } ]
}
},
"ElasticLoadBalancer": {
"Type": "AWS::ElasticLoadBalancing::LoadBalancer",
"Properties": {
"CrossZone": "false",
"SecurityGroups": [ { "Ref": "ElbSecurityGroup" } ],
"Listeners": [
{
"LoadBalancerPort": "80",
"InstancePort": "8080",
"Protocol": "http"
}
],
"Instances": [ { "Ref": "EC2Instance"} ],
"Subnets": [ { "Ref": "SubnetId"} ]
}
},
"EcsCluster": {
"Type": "AWS::ECS::Cluster"
},
"EcsTaskDefinition": {
"Type": "AWS::ECS::TaskDefinition",
"Properties": {
"ContainerDefinitions": [
{
"Name": "jenkins",
"Image": "someaccount/jenkins:ecs",
"Memory": 995,
"PortMappings": [ { "ContainerPort": 8080, "HostPort": 8080 } ],
"MountPoints": [
{
"SourceVolume": "docker",
"ContainerPath": "/var/run/docker.sock"
},
{
"SourceVolume": "jenkins_home",
"ContainerPath": "/var/jenkins_home"
}
]
}
],
"Volumes": [
{
"Name": "jenkins_home",
"Host": { "SourcePath": "/ecs/jenkins_home" }
},
{
"Name": "docker",
"Host": { "SourcePath": "/var/run/docker.sock" }
}
]
}
},
"EcsService": {
"Type": "AWS::ECS::Service",
"Properties": {
"Cluster": { "Ref": "EcsCluster" },
"TaskDefinition": { "Ref": "EcsTaskDefinition" },
"DesiredCount": 1
}
}
},
"Outputs":{
"ElbDomainName": {
"Description": "Public DNS name of Elastic Load Balancer",
"Value": {
"Fn::GetAtt": [
"ElasticLoadBalancer",
"DNSName"
]
}
},
"EC2InstanceDomainName": {
"Description": "Public DNS name of EC2 instance",
"Value": {
"Fn::GetAtt": [
"EC2Instance",
"PublicDnsName"
]
}
}
}
}
Below are the events of failure:
This error occurs after hours.
ECS instance goes active, so AMI image(ami-05958d7635caa4d04
) looks fine. but there is no task in Running
or Stopped
state.
To troubleshoot ECS task, I tried docker-compose
approach:
version: '2'
volumes:
jenkins_home:
external: true
services:
jenkins:
image: someaccount/jenkins:ecs
volumes:
- jenkins_home:/var/jenkins_home
- /var/run/docker.sock:/var/run/docker.sock
ports:
- "8080:8080"
and I see that docker-compose up -d
launches the container in my laptop
After ECS instance(t2.micro
) goes active, I tried manually running new task, I get below error:
In Cloudformation tempate, EcsTaskDefinition
has "Memory": 995
When the computing platform is Linux server(VM), we analyse logs and troubleshoot.
How to troubleshoot this problem? in ECS environment of public cloud
Upvotes: 4
Views: 5005
Reputation: 14462
t2.micro
instance that you are trying to launch has total memory capacity of 1 GiB.
ECS will try to launch the task into your cluster but it will realize that it cannot meet the requirements because your container instance doesn't have enough available memory (OS processes consume more than 5 MiB of memory) and your task requires "Memory": 995
which is exactly the issue.
Debugging the container itself will not help in this case as there is nothing wrong with it (probably). It is simply the fact that ECS scheduler is unable to meet the specified requirements to launch the task, therefore it cannot do anything else other than failing and giving a "helpful" message.
There are various things that can cause scheduler to fail to launch a task and it should always give a hint about the problem. In you case, it "clearly" states that you don't have enough memory. Other thing to look for are daemon tasks that can be launched only once per container instance (this accidentally happened to me).
Upvotes: 2