Reputation: 3191
I am trying to compose a lightweight minimal hadoop stack with the images provided by bde2020 (learning purpose). Right now, the stack includes (among others)
Basically, I started from Big Data Europe official docker compose, and added a hue image based on their documentation
Hue's file browser can't access HDFS:
Cannot access: /user/dav. The HDFS REST service is not available. Note: you are a Hue admin but not a HDFS superuser, "hdfs" or part of HDFS supergroup, "supergroup".
HTTPConnectionPool(host='namenode', port=50070): Max retries exceeded with url: /webhdfs/v1/user/dav?op=GETFILESTATUS&user.name=hue&doas=dav (Caused by NewConnectionError('<requests.packages.urllib3.connection.HTTPConnection object at 0x7f8119a3cf10>: Failed to establish a new connection: [Errno 111] Connection refused',))
localhost:9870/webhdfs/v1
in the namenode env file (source) and edit hue.ini
in hue's container accordingly (by adding webhdfs_url=http://namenode:9870/webhdfs/v1
)when I log into hue's container, I can see that namenode's port 9870 is open (nmap -p 9870 namenode
). 50070 is not. I don't think that my issue is network related. Despite editing hue.ini
, Hue still go for port 50070. So, how can I force hue to go for port 9870 in my current setup? (if this is the reason)
version: '3.7'
services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8
container_name: namenode
hostname: namenode
domainname: hadoop
ports:
- 9870:9870
volumes:
- hadoop_namenode:/hadoop/dfs/name
- ./entrypoints/namenode/entrypoint.sh:/entrypoint.sh
env_file:
- ./hadoop.env
- .env
networks:
- hadoop_net
# TODO adduser --ingroup hadoop dav
datanode1:
image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8
container_name: datanode
hostname: datanode1
domainname: hadoop
volumes:
- hadoop_datanode:/hadoop/dfs/data
environment:
SERVICE_PRECONDITION: "namenode:9870"
env_file:
- ./hadoop.env
networks:
- hadoop_net
resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8
container_name: resourcemanager
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode:9864"
env_file:
- ./hadoop.env
networks:
- hadoop_net
nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8
container_name: nodemanager
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088"
env_file:
- ./hadoop.env
networks:
- hadoop_net
historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8
container_name: historyserver
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode:9864 resourcemanager:8088"
volumes:
- hadoop_historyserver:/hadoop/yarn/timeline
env_file:
- ./hadoop.env
networks:
- hadoop_net
filebrowser:
container_name: hue
image: bde2020/hdfs-filebrowser:3.11
ports:
- "8088:8088"
env_file:
- ./hadoop.env
volumes: # BYPASS DEFAULT webhdfs url
- ./overrides/hue/hue.ini:/opt/hue/desktop/conf.dist/hue.ini
environment:
- NAMENODE_HOST=namenode
networks:
- hadoop_net
networks:
hadoop_net:
volumes:
hadoop_namenode:
hadoop_datanode:
hadoop_historyserver:
Upvotes: 0
Views: 1721
Reputation: 3191
Yeah, found it. A few key elements:
hue.ini
involves to mount a file named hue-overrides.ini
version: '3.7'
services:
namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop3.1.1-java8
container_name: namenode
ports:
- 9870:9870
- 8020:8020
volumes:
- hadoop_namenode:/hadoop/dfs/name
- ./overrides/namenode/entrypoint.sh:/entrypoint.sh
env_file:
- ./hadoop.env
- .env
networks:
- hadoop
filebrowser:
container_name: hue
image: gethue/hue:4.4.0
ports:
- "8000:8888"
env_file:
- ./hadoop.env
volumes: # HERE
- ./overrides/hue/hue-overrides.ini:/usr/share/hue/desktop/conf/hue-overrides.ini
depends_on:
- namenode
networks:
- hadoop
- frontend
datanode1:
image: bde2020/hadoop-datanode:2.0.0-hadoop3.1.1-java8
container_name: datanode1
volumes:
- hadoop_datanode:/hadoop/dfs/data
environment:
SERVICE_PRECONDITION: "namenode:9870"
env_file:
- ./hadoop.env
networks:
- hadoop
resourcemanager:
image: bde2020/hadoop-resourcemanager:2.0.0-hadoop3.1.1-java8
container_name: resourcemanager
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode1:9864"
env_file:
- ./hadoop.env
networks:
- hadoop
nodemanager1:
image: bde2020/hadoop-nodemanager:2.0.0-hadoop3.1.1-java8
container_name: nodemanager
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 resourcemanager:8088"
env_file:
- ./hadoop.env
networks:
- hadoop
historyserver:
image: bde2020/hadoop-historyserver:2.0.0-hadoop3.1.1-java8
container_name: historyserver
environment:
SERVICE_PRECONDITION: "namenode:9870 datanode1:9864 resourcemanager:8088"
volumes:
- hadoop_historyserver:/hadoop/yarn/timeline
env_file:
- ./hadoop.env
networks:
- hadoop
networks:
hadoop:
frontend:
volumes:
hadoop_namenode:
hadoop_datanode:
hadoop_historyserver:
CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
CORE_CONF_io_compression_codecs=org.apache.hadoop.io.compress.SnappyCodec
HDFS_CONF_dfs_replication=1
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
[desktop]
http_host=0.0.0.0
http_port=8888
time_zone=France
dev=true
app_blacklist=impala,zookeeper,oozie,hbase,security,search
[hadoop]
[[hdfs_clusters]]
[[[default]]]
fs_defaultfs=hdfs://namenode:8020
webhdfs_url=http://namenode:9870/webhdfs/v1
security_enabled=false
Thanks @cricket_007
Upvotes: 0
Reputation: 191993
I was able to get the Filebrowser working with this INI
[desktop]
http_host=0.0.0.0
http_port=8888
time_zone=America/Chicago
dev=true
app_blacklist=impala,zookeeper,oozie,hbase,security,search
[hadoop]
[[hdfs_clusters]]
[[[default]]]
fs_defaultfs=hdfs://namenode:8020
webhdfs_url=http://namenode:50070/webhdfs/v1
security_enabled=false
And this compose
version: "2"
services:
namenode:
image: bde2020/hadoop-namenode:1.1.0-hadoop2.7.1-java8
container_name: namenode
ports:
- 8020:8020
- 50070:50070
# - 59050:59050
volumes:
- hadoop_namenode:/hadoop/dfs/name
environment:
- CLUSTER_NAME=test
env_file:
- ./hadoop.env
networks:
- hadoop
datanode1:
image: bde2020/hadoop-datanode:1.1.0-hadoop2.7.1-java8
container_name: datanode1
ports:
- 50075:50075
# - 50010:50010
# - 50020:50020
depends_on:
- namenode
volumes:
- hadoop_datanode1:/hadoop/dfs/data
env_file:
- ./hadoop.env
networks:
- hadoop
hue:
image: gethue/hue
container_name: hue
ports:
- 8000:8888
depends_on:
- namenode
volumes:
- ./conf/hue.ini:/hue/desktop/conf/pseudo-distributed.ini
networks:
- hadoop
- frontend
volumes:
hadoop_namenode:
hadoop_datanode1:
networks:
hadoop:
frontend:
hadoop.env
has to add hue as a proxy user as well
CORE_CONF_fs_defaultFS=hdfs://namenode:8020
CORE_CONF_hadoop_http_staticuser_user=root
CORE_CONF_hadoop_proxyuser_hue_hosts=*
CORE_CONF_hadoop_proxyuser_hue_groups=*
HDFS_CONF_dfs_replication=1
HDFS_CONF_dfs_webhdfs_enabled=true
HDFS_CONF_dfs_permissions_enabled=false
Upvotes: 4