Reputation: 4146
I have a task definition that I want to launch using Fargate on AWS. Right now without any load balancing and stuff. I just want to run the task. The definition is following:
{
"ipcMode": null,
"executionRoleArn": "arn:aws:iam::941606308749:role/ecsTaskExecutionRole",
"containerDefinitions": [
{
"dnsSearchDomains": null,
"environmentFiles": null,
"logConfiguration": {
"logDriver": "awslogs",
"secretOptions": null,
"options": {
"awslogs-group": "/ecs/web",
"awslogs-region": "eu-central-1",
"awslogs-stream-prefix": "ecs"
}
},
"entryPoint": null,
"portMappings": [
{
"hostPort": 8000,
"protocol": "tcp",
"containerPort": 8000
}
],
"command": null,
"linuxParameters": null,
"cpu": 512,
"environment": [
{
"name": "AWS_STORAGE_BUCKET_NAME",
"value": "blacksheep-dev2"
},
{
"name": "CELERY_BROKER_HOST",
"value": "https://sqs.eu-central-1.amazonaws.com/941606308749/BlackSheepLearnsBroker"
},
{
"name": "POSTGRES_DB",
"value": "postgres"
},
{
"name": "POSTGRES_HOST",
"value": "blacksheeplearnsdb.c9a9ehc0s9ms.eu-central-1.rds.amazonaws.com"
},
{
"name": "POSTGRES_USER",
"value": "postgres"
},
{
"name": "ROLLBAR_ENABLED",
"value": "True"
}
],
"resourceRequirements": null,
"ulimits": null,
"dnsServers": null,
"mountPoints": [],
"workingDirectory": null,
"secrets": null,
"dockerSecurityOptions": null,
"memory": null,
"memoryReservation": 1024,
"volumesFrom": [],
"stopTimeout": null,
"image": "941606308749.dkr.ecr.eu-central-1.amazonaws.com/blacksheeplearns:latest",
"startTimeout": null,
"firelensConfiguration": null,
"dependsOn": null,
"disableNetworking": null,
"interactive": null,
"healthCheck": {
"retries": 3,
"command": [
"CMD-SHELL",
"curl -f http://localhost:8000/health/ || exit 1"
],
"timeout": 5,
"interval": 30,
"startPeriod": 30
},
"essential": true,
"links": null,
"hostname": null,
"extraHosts": null,
"pseudoTerminal": null,
"user": null,
"readonlyRootFilesystem": null,
"dockerLabels": {
"project": "BlackSheepLearns"
},
"systemControls": null,
"privileged": null,
"name": "web"
}
],
"placementConstraints": [],
"memory": "1024",
"taskRoleArn": "arn:aws:iam::941606308749:role/ecsTaskRole",
"compatibilities": [
"EC2",
"FARGATE"
],
"taskDefinitionArn": "arn:aws:ecs:eu-central-1:941606308749:task-definition/web:14",
"family": "web",
"requiresAttributes": [
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.logging-driver.awslogs"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "ecs.capability.execution-role-awslogs"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.ecr-auth"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.19"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.21"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.task-iam-role"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "ecs.capability.container-health-check"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "ecs.capability.execution-role-ecr-pull"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.18"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "ecs.capability.task-eni"
},
{
"targetId": null,
"targetType": null,
"value": null,
"name": "com.amazonaws.ecs.capability.docker-remote-api.1.29"
}
],
"pidMode": null,
"requiresCompatibilities": [
"FARGATE"
],
"networkMode": "awsvpc",
"cpu": "512",
"revision": 14,
"status": "ACTIVE",
"inferenceAccelerators": null,
"proxyConfiguration": null,
"volumes": []
}
However, when I want to start it, it gets up for about 1,5 minute and then it gets killed. I suspect this has something to do with healthchecks.
At certain point it just receives a kill signal and stops. Here it is configured without a target group or load balancer:
2021-07-10 10:48:40
[2021-07-10 08:48:40 +0000] [1] [INFO] Shutting down: Master
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:48:39
[2021-07-10 08:48:39 +0000] [13] [INFO] Worker exiting (pid: 13)
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:48:39
[2021-07-10 08:48:39 +0000] [14] [INFO] Worker exiting (pid: 14)
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:48:39
[2021-07-10 08:48:39 +0000] [1] [INFO] Handling signal: term
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:33
WARNING:rollbar:Rollbar already initialized. Ignoring re-init.
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:33
WARNING:rollbar:Rollbar already initialized. Ignoring re-init.
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:32
INFO:root:Retrieving secret: ROLLBAR_KEY
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:32
INFO:root:Retrieving secret: ROLLBAR_KEY
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:32
INFO:root:Retrieving secret: POSTGRES_PASSWORD
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:32
INFO:root:Retrieving secret: POSTGRES_PASSWORD
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:32
INFO:root:Retrieving secret: SECRET_KEY
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:32
INFO:root:Retrieving secret: SECRET_KEY
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:29
[2021-07-10 08:46:29 +0000] [14] [INFO] Booting worker with pid: 14
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:29
[2021-07-10 08:46:29 +0000] [13] [INFO] Booting worker with pid: 13
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:29
[2021-07-10 08:46:29 +0000] [1] [INFO] Listening at: http://0.0.0.0:8000 (1)
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:29
[2021-07-10 08:46:29 +0000] [1] [INFO] Using worker: sync
ccb94e999e294bdbaadc3f941b786603
2021-07-10 10:46:29
[2021-07-10 08:46:29 +0000] [1] [INFO] Starting gunicorn 20.0.4
ccb94e999e294bdbaadc3f941b786603
So from what I see, the service is getting up, the gunicorn (server for python web apps) is getting up, listening on port 8000, that I've mapped and so on. I also have exposed /health/
endpoint in my application in order to allow easy and lightweight healthchecks (it only returns 200s). And yet in the service console I keep getting:
3a52c067-63bd-4d58-a092-a69d29380962 2021-07-10 11:46:08 +0200 service web task 7982151a4a904a82b077fc48410dd672 failed container health checks.
What am I doing wrong?
Upvotes: 1
Views: 1019
Reputation: 401
Can you do 2 checks related to healthcheck and write what you find?
Upvotes: 1