Reputation: 11
I have created 8 Docker containers on an EC2 Ubuntu 20.04 instance as shown in the picture above. While all the other containers are running fine, the Namenode container keeps producing the following error infinitely and does not work properly: Re-format filesystem in Storage Directory root= /opt/hadoop/dfs/name; location= null ? (Y or N) Invalid input:
Dockerfile for Namenode
FROM hadoop-spark-base
HEALTHCHECK CMD curl -f http://namenode:9870/ || exit 1
ADD hdfs-site.xml $HADOOP_CONF_DIR
RUN mkdir $HADOOP_HOME/dfs/name
ADD start.sh /start.sh
RUN chmod a+x /start.sh
EXPOSE 9000 9870
CMD ["/start.sh", "/opt/hadoop/dfs/name"]
start.sh
NAME_DIR=$1
echo "Namenode DIR : $NAME_DIR"
$HADOOP_HOME/bin/hdfs namenode -format
$HADOOP_HOME/bin/hdfs --config $HADOOP_CONF_DIR namenode
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///opt/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>10485760</value>
</property>
<property>
<name>dfs.client.use.datanode.hostname</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.rpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.servicerpc-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.http-bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>dfs.namenode.https-bind-host</name>
<value>0.0.0.0</value>
</property>
</configuration>
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://namenode:9000/</value>
<description>NameNode URI</description>
</property>
</configuration>
yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>resourcemanager</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/opt/hadoop/yarn/data</value>
</property>
<property>
<name>yarn.nodemanager.logs-dirs</name>
<value>/opt/hadoop/yarn/data</value>
</property>
<property>
<name>yarn.timeline-service.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.generic-application-history.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.timeline-service.leveldb-timeline-store.path</name>
<value>/opt/hadoop/yarn/timeline</value>
</property>
<property>
<name>yarn.timeline-service.hostname</name>
<value>yarntimelineserver</value>
</property>
</configuration>
docker-compose.yml
version: "3.7"
x-datanode_base: &datanode_base
image: hadoop-datanode
networks:
- bridge
services:
namenode:
image: hadoop-namenode
container_name: namenode
hostname: namenode
ports:
- "9098:9870" # namenode web UI
volumes:
- namenode:/opt/hadoop/dfs/name # namenode data mount
- namenode:/opt/spark/eventLog # spark history log data mount
- namenode:/opt/hadoop/yarn/timeline # yarn timeline data mount
networks:
- bridge
datanode01:
<<: *datanode_base
container_name: datanode01
hostname: datanode01
volumes:
- datanode01:/opt/hadoop/dfs/data
- datanode01:/opt/hadoop/yarn/data
- namenode:/opt/spark/eventLog
datanode02:
<<: *datanode_base
container_name: datanode02
hostname: datanode02
volumes:
- datanode02:/opt/hadoop/dfs/data
- datanode02:/opt/hadoop/yarn/data
- namenode:/opt/spark/eventLog
datanode03:
<<: *datanode_base
container_name: datanode03
hostname: datanode03
volumes:
- datanode03:/opt/hadoop/dfs/data
- datanode03:/opt/hadoop/yarn/data
- namenode:/opt/spark/eventLog
resourcemanager:
image: resourcemanager
container_name: resourcemanager
hostname: resourcemanager
ports:
- "9099:8088"
networks:
- bridge
yarntimelineserver:
image: yarn-timelineserver
container_name: yarntimelineserver
hostname: yarntimelineserver
ports:
- "9096:8188"
networks:
- bridge
volumes:
- namenode:/opt/hadoop/yarn/timeline
sparkhistoryserver:
image: spark-historyserver
container_name: sparkhistoryserver
hostname: sparkhistoryserver
ports:
- "9093:18080"
depends_on:
- namenode
- resourcemanager
volumes:
- namenode:/opt/spark/eventLog
networks:
- bridge
zeppelin:
image: zeppelin
container_name: zeppelin
hostname: zeppelin
ports:
- "9097:8080"
networks:
- bridge
volumes:
- namenode:/opt/spark/eventLog
- /env/hadoop-eco/hadoop/zeppelin/notebook:/zeppelin-0.10.1-bin-all/notebook
- /home/ec2-user/testdata:/testdata
volumes:
namenode:
datanode01:
datanode02:
datanode03:
networks:
bridge:
Upvotes: 1
Views: 82