Reputation: 554
I have below a Minimal, Complete, and Verifiable example of what I want to do.
Basically I want to implement an OpenSSL RSA engine that integrates some CUDA code. The CUDA part should do the modular exponentiation but it is not important in this example, so I just used the BN_mod_exp
(see the engine.c file, modexp
function) in order to simplify the code. The code that I present is a simplified version of my project and it can be built/compiled very easy with these commands:
gcc -fPIC -I/usr/local/cuda/include -c engine.c
nvcc --compiler-options '-fPIC' -c my_cuda.cu -lcrypto
g++ -L/usr/local/cuda/lib64 -shared -o gpu.so engine.o my_cuda.o -lcuda -lcudart
openssl engine -t -c `pwd`/gpu.so
... the output of the last command says that the RSA engine is available
/*engine.c*/
#include <openssl/opensslconf.h>
#include <stdio.h>
#include <string.h>
#include <openssl/crypto.h>
#include <openssl/buffer.h>
#include <openssl/engine.h>
#include <openssl/rsa.h>
#include <openssl/bn.h>
#include <openssl/err.h>
#include "my_cuda.h"
/* Constants used when creating the ENGINE */
static const char *engine_e_rsax_id = "rsa_gpu";
static const char *engine_e_rsax_name = "RSAX engine support";
static int modexp(BIGNUM *r, const BIGNUM *I, RSA *rsa, BN_CTX *ctx);// r = r ^ I mod rsa->n
static int e_rsax_rsa_finish(RSA *r);
static int e_rsax_destroy(ENGINE *e);
static int e_rsax_init(ENGINE *e);
static int e_rsax_finish(ENGINE *e);
static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void));
static const ENGINE_CMD_DEFN e_rsax_cmd_defns[] = {
{0, NULL, NULL, 0}
};
static ENGINE *ENGINE_rsax (void);
void ENGINE_load_rsax (void)
{
/* On non-x86 CPUs it just returns. */
ENGINE *toadd = ENGINE_rsax();
if(!toadd) return;
ENGINE_add(toadd);
ENGINE_free(toadd);
ERR_clear_error();
}
static RSA_METHOD e_rsax_rsa =
{
"Intel RSA-X method",
NULL,
NULL,
NULL,
NULL,
modexp,
NULL,
NULL,
NULL,
RSA_FLAG_CACHE_PUBLIC|RSA_FLAG_CACHE_PRIVATE,
NULL,
NULL,
NULL
};
/* This internal function is used by ENGINE_rsax() */
static int bind_helper(ENGINE *e, const char *id)
{
printf("%s\n", id);
const RSA_METHOD *meth1;
if(!ENGINE_set_id(e, engine_e_rsax_id) ||
!ENGINE_set_name(e, engine_e_rsax_name) ||
!ENGINE_set_RSA(e, &e_rsax_rsa) ||
!ENGINE_set_destroy_function(e, e_rsax_destroy) ||
!ENGINE_set_init_function(e, e_rsax_init) ||
!ENGINE_set_finish_function(e, e_rsax_finish) ||
!ENGINE_set_ctrl_function(e, e_rsax_ctrl) ||
!ENGINE_set_cmd_defns(e, e_rsax_cmd_defns))
return 0;
meth1 = RSA_PKCS1_SSLeay();
e_rsax_rsa.rsa_pub_enc = meth1->rsa_pub_enc;
e_rsax_rsa.rsa_pub_dec = meth1->rsa_pub_dec;
e_rsax_rsa.rsa_priv_enc = meth1->rsa_priv_enc;
e_rsax_rsa.rsa_priv_dec = meth1->rsa_priv_dec;
e_rsax_rsa.bn_mod_exp = meth1->bn_mod_exp;
e_rsax_rsa.finish = meth1->finish;
return 1;
}
/* Used to attach our own key-data to an RSA structure */
static int rsax_ex_data_idx = -1;
static int e_rsax_destroy(ENGINE *e)
{
return 1;
}
/* (de)initialisation functions. */
static int e_rsax_init(ENGINE *e)
{
if (rsax_ex_data_idx == -1)
rsax_ex_data_idx = RSA_get_ex_new_index(0,
NULL,
NULL, NULL, NULL);
if (rsax_ex_data_idx == -1)
return 0;
return 1;
}
static int e_rsax_finish(ENGINE *e)
{
return 1;
}
static int e_rsax_ctrl(ENGINE *e, int cmd, long i, void *p, void (*f)(void))
{
int to_return = 1;
switch(cmd)
{
/* The command isn't understood by this engine */
default:
to_return = 0;
break;
}
return to_return;
}
IMPLEMENT_DYNAMIC_BIND_FN(bind_helper)
IMPLEMENT_DYNAMIC_CHECK_FN()
static ENGINE *ENGINE_rsax(void)
{
ENGINE *ret = ENGINE_new();
if(!ret)
return NULL;
if(!bind_helper(ret, engine_e_rsax_id))
{
ENGINE_free(ret);
return NULL;
}
return ret;
}
static int modexp(BIGNUM *r, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)// r = r ^ I mod rsa->n
{
modexp512();
return BN_mod_exp(r, r, I, rsa->n, ctx);
}
... the next two files are for demonstration purposes and are reproducing my project
/*my_cuda.cu*/
#include <cuda_runtime.h>
#include <stdio.h>
extern "C" {
#include "my_cuda.h"
}
__global__ void dummy_gpu_kernel(){
// stuff here
}
extern "C"
void modexp512(){
dummy_gpu_kernel<<<1,1>>>();
}
...
/*my_cuda.h*/
#ifndef MY_DUMMY
#define MY_DUMMY
void modexp512();
#endif
Now I want to make some speed tests using
$ openssl speed rsa512 -engine `pwd`/gpu.so
and I receive the next error
/full/path/gpu.so
engine "rsa_gpu" set.
Doing 512 bit private rsa's for 10s: 575412 512 bit private RSA's in 10.02s
RSA verify failure. No RSA verify will be done.
140592781633184:error:0407006A:rsa routines:RSA_padding_check_PKCS1_type_1:block type is not 01:rsa_pk1.c:100:
140592781633184:error:04067072:rsa routines:RSA_EAY_PUBLIC_DECRYPT:padding check failed:rsa_eay.c:721:
OpenSSL 1.0.1f 6 Jan 2014
built on: Mon Feb 29 18:11:15 UTC 2016
options:bn(64,64) rc4(16x,int) des(idx,cisc,16,int) aes(partial) blowfish(idx)
compiler: cc -fPIC -DOPENSSL_PIC -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -m64 -DL_ENDIAN -DTERMIO -g -O2 -fstack-protector --param=ssp-buffer-size=4 -Wformat -Werror=format-security -D_FORTIFY_SOURCE=2 -Wl,-Bsymbolic-functions -Wl,-z,relro -Wa,--noexecstack -Wall -DMD32_REG_T=int -DOPENSSL_IA32_SSE2 -DOPENSSL_BN_ASM_MONT -DOPENSSL_BN_ASM_MONT5 -DOPENSSL_BN_ASM_GF2m -DSHA1_ASM -DSHA256_ASM -DSHA512_ASM -DMD5_ASM -DAES_ASM -DVPAES_ASM -DBSAES_ASM -DWHIRLPOOL_ASM -DGHASH_ASM
I have searched for these errors on the internet, but without success. My question is: how can I get rid of this "RSA verify failure. No RSA verify will be done." error? It seems like OpenSSL does not trust my implementation or something.
I have deleted my previous post because it couldn't be compiled and verified. This example can be verified by anyone who has OpenSSL and CUDA[optional] installed. But since the CUDA part is not important in this example and someone wants to try it, he/she has to comment out the #include "my_cuda.h"
and modexp512();
from engine.c
file and change the build process accordingly, that is:
gcc -fPIC -c engine.c
g++ -shared -o gpu.so -lcrypto engine.o
and try the same command
$ openssl speed rsa512 -engine `pwd`/gpu.so
Upvotes: 2
Views: 1464
Reputation: 8457
I tinkered with this a bit because I found it interesting. I am not convinced that you are doing anything wrong. I am not certain how rsa->_method_mod_n
should be used, but I replaced your modexp with this:
static int modexp(BIGNUM *r, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
{
const RSA_METHOD *meth = RSA_PKCS1_SSLeay();
return meth->rsa_mod_exp(r, I, rsa, ctx);
}
and it worked fine, and then this:
static int modexp(BIGNUM *r, const BIGNUM *I, RSA *rsa, BN_CTX *ctx)
{
const RSA_METHOD *meth = RSA_PKCS1_SSLeay();
return meth->bn_mod_exp(r, (const BIGNUM*) r, I, rsa->n, ctx, rsa->_method_mod_n);
}
and it failed exactly as you describe.
In both cases I am using the OpenSSL functions of the method as my reading of the openssl source would suggest they should be used.
Upvotes: 5