Markus Fisher
Markus Fisher

Reputation: 87

AWS ECS (Fargate) is failing to mount EFS file system "Failed to resolve"

In my testbed project I'm trying to mount an EFS to my Fargate container using the AWS Javascript SDK. I've been using this blog post as a reference for what I'm implementing: https://aws.amazon.com/blogs/containers/developers-guide-to-using-amazon-efs-with-amazon-ecs-and-aws-fargate-part-3/

I seem to have the same steps as the EFS part of AWS blog post. But I'm running into the following error that is displayed in the ECS task's status:

"ResourceInitializationError: failed to invoke EFS utils commands to set up EFS volumes: stderr: Failed to resolve "fs-XXXXX.efs.us-east-1.amazonaws.com" - check that your file system ID is correct. See https://docs.aws.amazon.com/console/efs/mount-d...',"

DNS Resolution and DNS hostnames are both enabled in my VPC.

Here's the most important bits of code:

import * as SDK from 'aws-sdk';
import Config from '../config';
import Networking from '../cluster/networking';
import Logging from '../cluster/logging';
import BuildJobParams from './buildJobParams';

import ExecutionRole from '../executionRole/executionRole';

class BuildJob {
    static async run() {
        let taskDefinitionArn = await this.registerJob();
        let params = {
            cluster: Config.default.clusterName,
            taskDefinition: taskDefinitionArn,
            platformVersion: "1.4.0",
            launchType: "FARGATE",
            networkConfiguration: {
                awsvpcConfiguration: {
                    subnets: [
                        await Networking.get()
                        ],
                    assignPublicIp: "ENABLED",
                    securityGroups: [
                        await Networking.getSecurityGroup()
                    ]
                }
            },
        };
        let ECS = new SDK.ECS();
        console.log("Build job starting");
        let task = await ECS.runTask(params).promise();
        await this.watch(task.tasks[0].taskArn);

        return task;
    }

    static async watch(task) {
        let ECS = new SDK.ECS();
        try{

        await ECS.waitFor("tasksRunning", {
            cluster: Config.default.clusterName,
            tasks: [task]
        }).promise();

        console.log("Build job is running, watching logs");
        await Logging.watch(async () => {
            return (await (new SDK.ECS()).describeTasks({ tasks: [task], cluster: Config.default.clusterName }).promise()).tasks[0].lastStatus;
        });


        await ECS.waitFor("tasksStopped", {
            cluster: Config.default.clusterName,
            tasks: [task]
        }).promise();

        console.log("Build job has ended");
        }catch(err){
           console.log( await ECS.describeTasks({
            cluster: Config.default.clusterName,
            tasks: [task]}).promise())
            console.error(err);
        }
    }

    static async registerJob() {
        let ECS = new SDK.ECS();
        let taskDefinitionResult = await ECS.registerTaskDefinition(await BuildJobParams.get()).promise();
        return taskDefinitionResult.taskDefinition.taskDefinitionArn;
    }
}

export default BuildJob
import Config from '../config'
import ElasticFileSystem from '../cluster/fileSystem';
import ExecutionRole from '../executionRole/executionRole';

class BuildJobParams{
    static async get(){
        return {
            containerDefinitions: [
                {
                    name: "BuildWorker",
                    command: Config.default.buildJobCommand,
                    environment: [
                        { name: "ws", value: "" }
                    ],
                    image: "ubuntu",
                    logConfiguration: {
                        logDriver: "awslogs",
                        options: {
                            "awslogs-group": Config.default.logGroupName,
                            "awslogs-stream-prefix": "ecs",
                            "awslogs-region": Config.default.region
                        }
                    },
                    mountPoints: [
                        {
                            containerPath: "/efs/",
                            sourceVolume: 'test'
                        }
                    ],
                    portMappings:[
                        {
                            containerPort: "2049", 
                            hostPort:"2049", 
                            protocol: "tcp"
                            }
                    ]
                }
            ],
            memory: "512",
            cpu: "256",
            family: "GameCI",
            volumes: [
                {
                    name: "test",
                    efsVolumeConfiguration: {
                        fileSystemId: ElasticFileSystem.getFileSystemId(),
                        transitEncryption: "ENABLED",
                        authorizationConfig: {
                            accessPointId: ElasticFileSystem.getAccessPointId(),
                            iam: "DISABLED"
                        }
                    }
                }
            ],
            requiresCompatibilities: ["FARGATE"],
            networkMode: "awsvpc", 
            taskRoleArn: (await ExecutionRole.getRunArn()),
            executionRoleArn : (await ExecutionRole.getLoggingArn())
        };
    }
}
export default BuildJobParams
import * as SDK from 'aws-sdk';
import Config from '../config';
import Networking from './networking';

class ElasticFileSystem {
    static async get() {
        let EFS = new SDK.EFS();
        let params = {
            CreationToken: Config.default.clusterName,
            PerformanceMode: "generalPurpose",
            Encrypted: true,
            Tags: [
                {
                    Key: "Name",
                    Value: "MyFileSystem"
                }
            ]
        };

        let result = await EFS.createFileSystem(params).promise();
        while((await EFS.describeFileSystems().promise()).FileSystems.find(element=>element.FileSystemId==result.FileSystemId).LifeCycleState != "available")
        {
            await (new Promise((resolve) => setTimeout(resolve, 10000)));
            let state = (await EFS.describeFileSystems().promise()).FileSystems.find(element=>element.CreationToken==Config.default.clusterName).LifeCycleState;
            if(state != "creating" && state != "available" && state != "updating"){
                throw `Failed to create file system (${state})`;
            }
        }
        let accessPoint = await EFS.createAccessPoint({
            FileSystemId: result.FileSystemId,
            ClientToken: Config.default.clusterName
        }).promise();
        let mt = await EFS.createMountTarget({
            FileSystemId: result.FileSystemId,
            SubnetId: `${await Networking.get()}`,
            SecurityGroups: [
                `${await Networking.getSecurityGroup()}`
            ],
        }).promise();
        console.log("File System created");
        
        
        Object.assign(ElasticFileSystem, {
            fileSystemId: result.FileSystemId,
            accessPointId: accessPoint.AccessPointId,
            mountTargetId: mt.MountTargetId
        });
        return result;
    }

    static getFileSystemId(){
        console.log(this.fileSystemId);
        return this.fileSystemId;
    }

    static getAccessPointId(){
        console.log(this.fileSystemId);
        return this.accessPointId;
    }

    static getMountTargetId(){
        console.log(this.fileSystemId);
        return this.mountTargetId;
    }

    static async delete() {
        let EFS = new SDK.EFS();
        let params = {
            FileSystemId: this.fileSystemId
        };
        //await EFS.deleteFileSystem(params).promise();
        let fs = await EFS.describeFileSystems().promise();
        for (let index = 0; index < fs.FileSystems.length; index++) {
            const element = fs.FileSystems[index];
            await EFS.deleteFileSystem({FileSystemId:element.FileSystemId}).promise();
        }
    }
}

export default ElasticFileSystem
import * as SDK from 'aws-sdk';

import Logging from './logging';
import ElasticFileSystem from './fileSystem';
import Config from '../config';

class ElasticCluster {
    static async get() {
        console.log("Creating cluster");

        await Logging.get(Config.default.logGroupName);
        let fileSystem = await ElasticFileSystem.get();
        let ECS = new SDK.ECS();
        Object.assign(ElasticCluster, {
            ECS: ECS
        });

        let data = await this.create();
        await ElasticCluster.waitForClusterReady();
        console.log("Created cluster");
        return data;
    }

    static async create() {
        let params = {
            capacityProviders: [
                'FARGATE'
            ],
            clusterName: Config.default.clusterName,
            defaultCapacityProviderStrategy: [
                {
                    capacityProvider: 'FARGATE',
                    base: '0',
                    weight: '1'
                }
            ],
            settings: [],
            tags: [
                {
                    key: 'STRING_VALUE',
                    value: 'STRING_VALUE'
                }
            ]
        };
        return await this.ECS.createCluster(params).promise();
    }

    static async exists(){
        let params = {
            clusters: [
                Config.default.clusterName,
            ]
        };
        return (await this.ECS.describeClusters(params).promise()).clusters.length > 0;
    }

    static async getCluster(){
        let params = {
            clusters: [
                Config.default.clusterName,
            ]
        };
        return (await this.ECS.describeClusters(params).promise()).clusters[0];
    }

    static async waitForClusterReady() {
        let cluster = await this.getCluster();
        if (cluster.status == "ACTIVE" || cluster.status == "INACTIVE") {
            return;
        }
        else {
            await new Promise(res => setTimeout(res, 10000));
            await this.waitForClusterReady();
        }
    }


    static async delete() {
        let ecs = new SDK.ECS();

        try {
            //await this.ECS.deleteCluster({
            //    cluster: Config.default.clusterName
            //}).promise();
            let clusters = (await ecs.listClusters().promise()).clusterArns;
            for (let index = 0; index < clusters.length; index++) {
                const element = clusters[index];
                await ecs.deleteCluster({
                    cluster: element
                }).promise();
            }
        } catch (err) {

        }
        await ElasticFileSystem.delete();
        await Logging.delete();
        console.log("garbage collected");
    }
}

export default ElasticCluster
import * as SDK from 'aws-sdk';
import Config from '../config';


class Networking {
    static async get() {
        return (await this.getAll())[0].SubnetId;
    }

    static async getAll() {
        let EC2 = new SDK.EC2();
        let params = {
            Filters: [
                {
                    Name: "vpc-id",
                    Values: [
                        await this.getVpcId()
                    ]
                }
            ]
        };
        let value = await EC2.describeSubnets(params).promise();
        return value.Subnets;
    }

    static async getSecurityGroup() {
        let EC2 = new SDK.EC2();
        if (this.securityGroup != null){
            return this.securityGroup;
        }
        console.log("creating security group");
        let sg = await EC2.createSecurityGroup({
            GroupName: Config.default.clusterName,
            Description: "test",
            VpcId: await this.getVpcId(),
            
        }).promise();
        await EC2.authorizeSecurityGroupIngress({
            GroupId: sg.GroupId,
            IpPermissions: [{
                
                FromPort: "2049",
                ToPort: "2049",
                IpProtocol: "tcp",
            }]
        }).promise();
        Object.assign(this, {securityGroup:sg.GroupId});
        return sg.GroupId;
    }

    static async getVpcId() {
        return "vpc-XXXXXXX";
    }
}

export default Networking

Upvotes: 2

Views: 7585

Answers (1)

w0otness
w0otness

Reputation: 41

TLDR: Check your DHCP Option Set has AmazonProvidedDNS

I am having the same issue with my CDK stack. My task is running in a private subnet of my VPC, and I have confirmed that EFS is setup correctly as I have a lambda set to run in the same private subnet and it connects to EFS fine.

I tried VPC interface endpoints for EFS, no cigar.

I haven't been able to test yet, but your solve might be based on information I found on acloud.guru, curiously that post says 2 years ago, wasn't support for EFS to Fargate only just added 2020?

My VPC has both DNS hostnames and resolution enabled, but my DHCP Option Set doesn't have AmazonProvidedDNS as a name server, which would explain why we get Failed to resolve "fs-XXXXX.efs.us-east-1.amazonaws.com"

Upvotes: 4

Related Questions