Desanth pv
Desanth pv

Reputation: 351

How to connect with hdfs via kerberos from OSGI bundles

We are trying to connect with the HDFS using kerberos, from Karaf container by OSGI bundle. We have already installed the hadoop client in karaf using apache servicemix bundles.

<groupId>org.apache.servicemix.bundles</groupId>
            <artifactId>org.apache.servicemix.bundles.hadoop-client</artifactId>
            <version>2.4.1_1</version>

Pom file is attached below:

<build>
        <plugins>
            <plugin>
                <groupId>org.apache.felix</groupId>
                <artifactId>maven-bundle-plugin</artifactId>
                <version>2.3.7</version>
                <extensions>true</extensions>
                <configuration>
                    <instructions>
                        <Bundle-Activator>com.bdbizviz.hadoop.activator.PaHdfsActivator</Bundle-Activator>
                        <Bundle-SymbolicName>${project.artifactId}</Bundle-SymbolicName>
                        <Bundle-Version>${project.version}</Bundle-Version>
                        <Export-Package>
                            <!-- com.google.*, !org.apache.camel.model.dataformat, !org.apache.poi.ddf, 
                                !org.apache.xmlbeans, org.apache.commons.collections.*, org.apache.commons.configuration.*, 
                                org.apache.hadoop.hdfs*, org.apache.hadoop.hdfs.client*, org.apache.hadoop.hdfs.net*, 
                                org.apache.hadoop.hdfs.protocol.datatransfer*, org.apache.hadoop.hdfs.protocol.proto*, 
                                org.apache.hadoop.hdfs.protocolPB*, org.apache.hadoop.conf.*, org.apache.hadoop.io.*, 
                                org.apache.hadoop.fs.*, org.apache.hadoop.security.*, org.apache.hadoop.metrics2.*, 
                                org.apache.hadoop.util.*, org.apache.hadoop*; -->
                            <!-- org.apache.*; -->
                        </Export-Package>
                        <Import-Package>
                            org.apache.hadoop*,org.osgi.framework,*;resolution:=optional
                        </Import-Package>
                        <Include-Resource>
                            {maven-resources},
                            @org.apache.servicemix.bundles.hadoop-client-2.4.1_1.jar!/coredefault.
                            xml,
                            @org.apache.servicemix.bundles.hadoop-client-2.4.1_1.jar!/hdfsdefault.
                            xml,
                            @org.apache.servicemix.bundles.hadoop-client-
                            2.4.1_1.jar!/mapred-default.xml,
                            @org.apache.servicemix.bundles.hadoop-client-
                            2.4.1_1.jar!/hadoop-metrics.properties
                        </Include-Resource>
                        <DynamicImport-Package>*</DynamicImport-Package>
                    </instructions>
                </configuration>
            </plugin>
        </plugins>
    </build>
    <dependencies>
        <dependency>
            <groupId>org.apache.servicemix.bundles</groupId>
            <artifactId>org.apache.servicemix.bundles.hadoop-client</artifactId>
            <version>2.4.1_1</version>
            <exclusions>
                <exclusion>
                    <groupId>jdk.tools</groupId>
                    <artifactId>jdk.tools</artifactId>
                    <!-- <version>1.7</version> -->
                </exclusion>
            </exclusions>
        </dependency>

    </dependencies>

Code Snippet:

public class TestHdfs implements ITestHdfs{

    public void printName() throws IOException{

        /*

        Configuration config = new Configuration();
        config.set("fs.default.name", "hdfs://192.168.1.17:8020");
        config.set("fs.hdfs.impl", org.apache.hadoop.hdfs.DistributedFileSystem.class.getName());
        config.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName());
        try {
            fs = FileSystem.get(config);
            getHostnames(fs);
        } catch (IOException e) {
            e.printStackTrace();
        }*/
        Thread.currentThread().setContextClassLoader(getClass().getClassLoader());

        final Configuration config = new Configuration();
        config.set("fs.default.name", "hdfs://192.168.1.124:8020");
        config.set("fs.file.impl", LocalFileSystem.class.getName());
        config.set("fs.hdfs.impl", DistributedFileSystem.class.getName());
        config.set("hadoop.security.authentication", "KERBEROS");
        config.set("dfs.namenode.kerberos.principal.pattern",
                "hdfs/*@********.COM");

        System.setProperty("HADOOP_JAAS_DEBUG", "true");
        System.setProperty("sun.security.krb5.debug", "true");
        System.setProperty("java.net.preferIPv4Stack", "true");

        System.out.println("--------------status---:"
                + UserGroupInformation.isSecurityEnabled());
        UserGroupInformation.setConfiguration(config);
        // UserGroupInformation.loginUserFromKeytab(
        // "hdfs/hadoop1.********.com@********.COM",
        // "file:/home/kaushal/hdfs-hadoop1.keytab");

        UserGroupInformation app_ugi = UserGroupInformation
                .loginUserFromKeytabAndReturnUGI("hdfs/hadoop1.********.com@********.COM",
                        "C:\\Users\\desanth.pv\\Desktop\\hdfs-hadoop1.keytab");
        UserGroupInformation proxy_ugi = UserGroupInformation.createProxyUser(
                "ssdfsdfsdfsdfag", app_ugi);
        System.out.println("--------------status---:"
                + UserGroupInformation.isSecurityEnabled());
        /*ClassLoader tccl = Thread.currentThread()
                .getContextClassLoader();*/
        try {
            /*Thread.currentThread().setContextClassLoader(
                    getClass().getClassLoader());*/
            proxy_ugi.doAs(new PrivilegedExceptionAction() {

                @Override
                public Object run() throws Exception {
                    /*ClassLoader tccl = Thread.currentThread()
                            .getContextClassLoader();*/
                    try {
                        /*Thread.currentThread().setContextClassLoader(
                                getClass().getClassLoader());*/
                        System.out.println("desanth");
                        FileSystem fs = FileSystem.get(config);
                        DistributedFileSystem hdfs = (DistributedFileSystem) fs;
                        DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();

                        String[] names = new String[dataNodeStats.length];
                        for (int i = 0; i < dataNodeStats.length; i++) {
                            names[i] = dataNodeStats[i].getHostName();
                            System.out.println((dataNodeStats[i].getHostName()));
                        }
                    } catch (IOException e) {
                        e.printStackTrace();
                    } finally {
                        //Thread.currentThread().setContextClassLoader(tccl);
                    }

                    return null;
                }
            });
        } catch (InterruptedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } finally {
            /*Thread.currentThread().setContextClassLoader(tccl);*/

        }
    }



    public void getHostnames(FileSystem fs) throws IOException {
        DistributedFileSystem hdfs = (DistributedFileSystem) fs;
        DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats();

        String[] names = new String[dataNodeStats.length];
        for (int i = 0; i < dataNodeStats.length; i++) {
            names[i] = dataNodeStats[i].getHostName();
            System.out.println((dataNodeStats[i].getHostName()));
        }
    }
}

Error :

Caused by: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS]
[12:35:51 PM] Jayendra Parsai: java.io.IOException: Failed on local exception: java.io.IOException: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS]; Host Details : local host is: "jayendra-dynabook-T451-34EW/127.0.1.1"; destination host is: "hadoop2.********.com":8020;

Upvotes: 4

Views: 1611

Answers (2)

Panz0r
Panz0r

Reputation: 105

Following the background section of the Vladimir's answer I've tried many things, but the simplest one which is adding

SecurityUtil.setSecurityInfoProviders(new AnnotatedSecurityInfo());

before UserGroupInformation.loginUserFromKeytab helped me to solve the issue.

Upvotes: 2

Vladimir L
Vladimir L

Reputation: 1

I have not tried to reproduce this issue in an OSGI environment, but I think you may be facing an issue similar to the one you face when trying to run in a Kerberised environment with a fat jar that includes the hadoop/hdfs dependencies.

Namely the org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS] error.

Background

After turning on DEBUG logging there was a funny line after SASL negotiation:

Get kerberos info proto:interface org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB info:null

Notice the null - successful executions have a class reference here instead.

Tracking this down, SaslRpcClient calls SecurityUtil.getTokenInfo. This initiates a search of all the org.apache.hadoop.security.SecurityInfo providers.

org.apache.hadoop.security.SecurityUtil uses java.util.ServiceLoader to look up SecurityInfo instances. ServiceLoader by default uses the current thread's ContextClassLoader to look for files in the META-INF/services/ directory on the classpath. The files are named corresponding to the service name, so it's looking for META-INF/services/org.apache.hadoop.security.SecurityInfo

When a jar is an uber jar (or I guess if you load something in an OSGI bundle) and you have only one such file on the classpath then you have to ensure all the entries are appended. In maven for example, you can use the ServicesResourceTransformer to append the entries. sbt-assembly has a similar merge option that is more configurable.

Solution

As described in background, make sure the classloader that java.util.ServiceLoader is using can find the META-INF/services/org.apache.hadoop.security.SecurityInfo with all the entries from the hadoop jars.

In the OSGI case, you still have to somehow merge the entries. Try including them in the <Include-Resources> section of your bundle XML?

Log output

This is the output the I get when it does not work:

2018-05-03 12:01:56,739 DEBUG PrivilegedAction as:user@DOMAIN (auth:KERBEROS) from:org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:757) [ForkJoinPool-1-worker-5] org.apache.hadoop.security.UserGroupInformation (UserGroupInformation.java:1893) 
2018-05-03 12:01:56,740 DEBUG Sending sasl message state: NEGOTIATE
                                                                                   [ForkJoinPool-1-worker-5] org.apache.hadoop.security.SaslRpcClient (SaslRpcClient.java:457) 
2018-05-03 12:01:56,741 DEBUG Received SASL message state: NEGOTIATE
auths {
  method: "TOKEN"
  mechanism: "DIGEST-MD5"
  protocol: ""
  serverId: "default"
  challenge: "XXX"
}
auths {
  method: "KERBEROS"
  mechanism: "GSSAPI"
  protocol: "XXX"
  serverId: "XXX"
}
 [ForkJoinPool-1-worker-5] org.apache.hadoop.security.SaslRpcClient (SaslRpcClient.java:389) 
2018-05-03 12:01:56,741 DEBUG Get token info proto:interface org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB info:null                      [ForkJoinPool-1-worker-5] org.apache.hadoop.security.SaslRpcClient (SaslRpcClient.java:264) 
2018-05-03 12:01:56,741 DEBUG Get kerberos info proto:interface org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolPB info:null                   [ForkJoinPool-1-worker-5] org.apache.hadoop.security.SaslRpcClient (SaslRpcClient.java:291) 
2018-05-03 12:01:56,742 DEBUG PrivilegedActionException as:user@DOMAIN (auth:KERBEROS) cause:org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS] [ForkJoinPool-1-worker-5] org.apache.hadoop.security.UserGroupInformation (UserGroupInformation.java:1870) 
2018-05-03 12:01:56,742 DEBUG PrivilegedAction as:user@DOMAIN (auth:KERBEROS) from:org.apache.hadoop.ipc.Client$Connection.handleSaslConnectionFailure(Client.java:683) [ForkJoinPool-1-worker-5] org.apache.hadoop.security.UserGroupInformation (UserGroupInformation.java:1893) 
2018-05-03 12:01:56,743  WARN Exception encountered while connecting to the server : org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS] [ForkJoinPool-1-worker-5] org.apache.hadoop.ipc.Client (Client.java:715) 
2018-05-03 12:01:56,743 DEBUG PrivilegedActionException as:user@DOMAIN (auth:KERBEROS) cause:java.io.IOException: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS] [ForkJoinPool-1-worker-5] org.apache.hadoop.security.UserGroupInformation (UserGroupInformation.java:1870) 
2018-05-03 12:01:56,743 DEBUG closing ipc connection to XXX/nnn.nnn.nnn.nnn:8020: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS] [ForkJoinPool-1-worker-5] org.apache.hadoop.ipc.Client (Client.java:1217) 
java.io.IOException: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS]
    at org.apache.hadoop.ipc.Client$Connection$1.run(Client.java:720)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
    at org.apache.hadoop.ipc.Client$Connection.handleSaslConnectionFailure(Client.java:683)
    at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:770)
    at org.apache.hadoop.ipc.Client$Connection.access$3200(Client.java:397)
    at org.apache.hadoop.ipc.Client.getConnection(Client.java:1620)
    at org.apache.hadoop.ipc.Client.call(Client.java:1451)
    at org.apache.hadoop.ipc.Client.call(Client.java:1398)
    at org.apache.hadoop.ipc.ProtobufRpcEngine$Invoker.invoke(ProtobufRpcEngine.java:233)
    at com.sun.proxy.$Proxy10.create(Unknown Source)
    at org.apache.hadoop.hdfs.protocolPB.ClientNamenodeProtocolTranslatorPB.create(ClientNamenodeProtocolTranslatorPB.java:313)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:498)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:291)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:203)
    at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:185)
    at com.sun.proxy.$Proxy11.create(Unknown Source)
    at org.apache.hadoop.hdfs.DFSOutputStream.newStreamForCreate(DFSOutputStream.java:1822)
    at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1701)
    at org.apache.hadoop.hdfs.DFSClient.create(DFSClient.java:1636)
    at org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:480)
    at org.apache.hadoop.hdfs.DistributedFileSystem$8.doCall(DistributedFileSystem.java:476)
    at org.apache.hadoop.fs.FileSystemLinkResolver.resolve(FileSystemLinkResolver.java:81)
    at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:476)
    at org.apache.hadoop.hdfs.DistributedFileSystem.create(DistributedFileSystem.java:417)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:930)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:911)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:807)
    at org.apache.hadoop.fs.FileSystem.create(FileSystem.java:796)
    ...
Caused by: org.apache.hadoop.security.AccessControlException: Client cannot authenticate via:[TOKEN, KERBEROS]
    at org.apache.hadoop.security.SaslRpcClient.selectSaslClient(SaslRpcClient.java:172)
    at org.apache.hadoop.security.SaslRpcClient.saslConnect(SaslRpcClient.java:396)
    at org.apache.hadoop.ipc.Client$Connection.setupSaslConnection(Client.java:595)
    at org.apache.hadoop.ipc.Client$Connection.access$2000(Client.java:397)
    at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:762)
    at org.apache.hadoop.ipc.Client$Connection$2.run(Client.java:758)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:422)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1866)
    at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:757)
    ... 50 more

Upvotes: 0

Related Questions