Reputation: 4077
I'm having problems when trying to integrate Qt with CUDA. I am running on a 64Bit Mac with the 64Bit CUDA toolkit installed, however when I try to build my code the error ld: file not found: @rpath/CUDA.framework/Versions/A/CUDA for architecture x86_64
is thrown.
I have verified all my paths but the same error is consistently thrown. My .pro
configuration code is as follows:
QT += core gui
QT += multimedia
QT += multimediawidgets
QT += concurrent
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
TARGET = WebcamFilter
TEMPLATE = app
SOURCES += main.cpp\
mainwindow.cpp \
camerafeed.cpp \
HEADERS += mainwindow.h \
camerafeed.h
FORMS += mainwindow.ui
# CUDA Resources
CUDA_SOURCES += gaussian.cu
CUDA_DIR = /usr/local/cuda
# Path to header and lib files
INCLUDEPATH += $$CUDA_DIR/include
QMAKE_LIBDIR += $$CUDA_DIR/lib
# Libs used for source code
LIBS += -lcudart -lcuda
# GPU Architecture
CUDA_ARCH = sm_20
# Custom flags for nvcc
NVCCFLAGS = --compiler-options -fno-strict-aliasing -use_fast_math --ptxas-options=-v
# Prepare extra compiler configuration
CUDA_INC = $$join(INCLUDEPATH,' -I','-I',' ')
cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O3 -arch=$$CUDA_ARCH -c $$NVCCFLAGS \
$$CUDA_INC $$LIBS ${QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT} \
2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
cuda.dependency_type = TYPE_C
cuda.depend_command = $$CUDA_DIR/bin/nvcc -O3 -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME}
cuda.input = CUDA_SOURCES
cuda.output = ${OBJECTS_DIR}${QMAKE_FILE_BASE}_cuda.o
# Tell Qt that we want add more stuff to the Makefile
QMAKE_EXTRA_COMPILERS += cuda
Upvotes: 0
Views: 692
Reputation: 179
I came across this problem a few months ago (plus some other issues after this was fixed) so I figured I'd just post a fully working QT/CUDA example now that I have it mostly figured out. I pulled most of the .pro
file from a larger project for both Linux and Mac (CUDA stuff is in the gpu folder) but this bit of code has only been tested on OS X.
I'm currently using:
CUDA 7.0 driver V7.0.27
OS X Yosemite 10.10.3
QT 5.3.1
If you haven't updated recently make sure the CUDA deviceQuery and bandwidthTest samples are still working before trying this code.
The .pro
file below might be all you need to solve your problems but the C++ code is below as well. The code comments do most of the explaining.
#-------------------------------------------------
#
# Project created by QtCreator 2015-05-02T02:37:39
#
#-------------------------------------------------
QT += core gui
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
TARGET = qtcuda
TEMPLATE = app
# project build directories (if not using shadow build)
DESTDIR = $$system(pwd)
BUILDDIR = $$DESTDIR/build
MOC_DIR = $$BUILDDIR # moc_... files
UI_DIR = $$BUILDDIR # ui_mainwindow.cpp
OBJECTS_DIR = $$BUILDDIR/bin # .o binary files
SOURCES += main.cpp\
mainwindow.cpp
HEADERS += mainwindow.h
FORMS += mainwindow.ui
# NOTE: C++ flags are needed here for
# the CUDA Thrust library
############### UNIX FLAGS #####################
unix {
QMAKE_CXXFLAGS += -std=c++11
}
############### MAC FLAGS #####################
macx {
# libs that don't get passed to nvcc (we'll remove them from LIBS later)
NON_CUDA_LIBS += -stdlib=libc++
LIBS += $$NON_CUDA_LIBS
QMAKE_CXXFLAGS += -stdlib=libc++ -mmacosx-version-min=10.7
QMAKE_LFLAGS += -mmacosx-version-min=10.7
QMAKE_MACOSX_DEPLOYMENT_TARGET = 10.7
# specific to computers without older sdks
MAC_SDK = /Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX10.9. sdk/
if( exists( $$MAC_SDK) ) {
QMAKE_MAC_SDK = macosx10.9 # lowest sdk on my computer :/
}
# don't show warnings for c++11 extentions
QMAKE_CXXFLAGS += -Wno-c++11-extensions
}
################### CUDA ###################### (similar to your setup)
unix:!macx {
SED_STUFF = 2>&1 | sed -r \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
}
macx {
SED_STUFF = 2>&1 | sed -E \"s/\\(([0-9]+)\\)/:\\1/g\" 1>&2
}
CUDA_DIR = /usr/local/cuda
# make sure cuda is available on the computer
if ( exists( $$CUDA_DIR/ ) ) {
message( "Configuring for cuda...");
DEFINES += CUDA_7 # # same as putting this in code -> #define CUDA_7
# Cuda sources
CUDA_SOURCES += cuda/wrappers.cu
# show files in working tree
OTHER_FILES += cuda/wrappers.cu \
cuda/wrappers.cuh \
cuda/helper_cuda.h
# Path to cuda install
CUDA_LIB = $$CUDA_DIR/lib
# Pather to header and lib files
INCLUDEPATH += $$CUDA_DIR/include \
cuda # my cuda files
QMAKE_LIBDIR += $$CUDA_LIB
# prevents warnings from code we didn't write
QMAKE_CXXFLAGS += -isystem $$CUDA_DIR/include
LIBS += -lcudart # add other cuda libs here (-lcublas -lcurand, etc.)
# SPECIFY THE R PATH FOR NVCC!!!!! (your problem...previously my problem)
QMAKE_LFLAGS += -Wl,-rpath,$$CUDA_LIB
NVCCFLAGS = -Xlinker -rpath,$$CUDA_LIB
# libs used in the code
CUDA_LIBS = $$LIBS
CUDA_LIBS -= $$NON_CUDA_LIBS # remove libs nvcc won't recognize
# GPU architecture (might be a way to detect this somehow instead of hardcoding)
CUDA_ARCH = sm_20 # <- based on specs from your code. This was tested with sm_30
# Some default NVCC flags
NVCCFLAGS += --compiler-options -fno-strict-aliasing -use_fast_math --ptxas-options=-v --std=c++11
# Prepare the extra compiler configuration (taken from the nvidia forum)
CUDA_INC = $$join(INCLUDEPATH,' -I','-I',' ')
cuda.commands = $$CUDA_DIR/bin/nvcc -m64 -O3 -arch=$$CUDA_ARCH -c $$NVCCFLAGS \
$$CUDA_INC $$CUDA_LIBS ${QMAKE_FILE_NAME} -o ${QMAKE_FILE_OUT} \
$$SED_STUFF
# nvcc error printout format ever so slightly different from gcc
# http://forums.nvidia.com/index.php?showtopic=171651
cuda.dependency_type = TYPE_C
cuda.depend_command = $$CUDA_DIR/bin/nvcc -O3 -M $$CUDA_INC $$NVCCFLAGS ${QMAKE_FILE_NAME}
cuda.input = CUDA_SOURCES
cuda.output = ${OBJECTS_DIR}${QMAKE_FILE_BASE}_cuda.o
# Tell Qt that we want add more stuff to the Makefile
QMAKE_EXTRA_COMPILERS += cuda
} # endif CUDA
The following two files are composed of extern functions used to execute CUDA code. The .cu
file defines functions that contain CUDA code and gets compiled with NVCC (as specified in the .pro
file). The .cuh
file is used as a header file and simply declares the same functions so they can be referenced by C++ files. Only wrappers.cuh
needs to be included in the C++ code.
Note: The referenced helper_cuda.h
file can be found here
NoteNote: This project assumes wrappers.cuh
, wrappers.cu
, and helper_cuda.h
are kept in a folder labeled cuda
within the project directory.
#ifndef WRAPPERS_CUH
#define WRAPPERS_CUH
typedef unsigned int uint;
extern "C"
{
void cudaInit();
void allocateArray(void **devPtr, int size);
void freeArray(void *devPtr);
void copyArrayToDevice(void *device, const void *host, int offset, int size);
void copyArrayFromDevice(void *host, const void *device, int size);
uint sumNumbers(uint *dNumbers, uint n);
// not used here but useful when calling kernel functions
void computeGridSize(uint n, uint blockSize, uint &numBlocks, uint &numThreads);
}
#endif // WRAPPERS_CUH
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>
#include <thrust/device_ptr.h>
#include <thrust/reduce.h>
#include "helper_cuda.h"
typedef unsigned int uint;
extern "C"
{
void cudaInit()
{
int devID;
// use device with highest Gflops/s
devID = findCudaDevice();
if (devID < 0)
{
printf("No CUDA Capable devices found, exiting...\n");
exit(EXIT_SUCCESS);
}
}
void allocateArray(void **devPtr, size_t size)
{
checkCudaErrors(cudaMalloc(devPtr, size));
}
void freeArray(void *devPtr)
{
checkCudaErrors(cudaFree(devPtr));
}
void copyArrayToDevice(void *device, const void *host, int offset, int size)
{
checkCudaErrors(cudaMemcpy((char *) device + offset, host, size, cudaMemcpyHostToDevice));
}
void copyArrayFromDevice(void *host, const void *device, int size)
{
checkCudaErrors(cudaMemcpy(host, device, size, cudaMemcpyDeviceToHost));
}
uint sumNumbers(uint *dNumbers, uint n)
{
// simple reduction from 1 to n
thrust::device_ptr<uint> dp_numbers(dNumbers);
return thrust::reduce(dp_numbers, dp_numbers + n);
}
//Round a / b to nearest higher integer value
uint iDivUp(uint a, uint b)
{
return (a % b != 0) ? (a / b + 1) : (a / b);
}
// compute grid and thread block size for a given number of elements
void computeGridSize(uint n, uint blockSize, uint &numBlocks, uint &numThreads)
{
numThreads = min(blockSize, n);
numBlocks = iDivUp(n, numThreads);
}
}
The next three files create a simple QT window and check for mouse events. Every time the mouse is moved the X and Y pixel positions are added together to create n. Then a CUDA function is used to find 1 + 2 + ... + n (yes this is weird and random; the point was to show CUDA running in a quick and easy way).
So if the mouse is at (23, 45) then:
n = (23 + 45) = 68 and
1 + 2 + ... + n = 2346
This is then displayed at the bottom of the window.
#include "mainwindow.h"
#include <QApplication>
int main(int argc, char *argv[])
{
QApplication a(argc, argv);
MainWindow w;
w.show();
return a.exec();
}
#ifndef MAINWINDOW_H
#define MAINWINDOW_H
#include <QMainWindow>
namespace Ui {
class MainWindow;
}
class MainWindow : public QMainWindow
{
Q_OBJECT
public:
explicit MainWindow(QWidget *parent = 0);
~MainWindow();
// events are passed here
virtual bool eventFilter(QObject *obj, QEvent *event);
private:
Ui::MainWindow *ui;
uint *m_dNumbers; // device array
};
#endif // MAINWINDOW_H
#include "mainwindow.h"
#include "ui_mainwindow.h"
#include <QEvent>
#include <QMouseEvent>
#include <assert.h>
#include "wrappers.cuh"
const uint MAX_NUMBERS = 5000;
MainWindow::MainWindow(QWidget *parent) :
QMainWindow(parent),
ui(new Ui::MainWindow)
{
// basic ui setup and event filter for mouse movements
ui->setupUi(this);
qApp->installEventFilter(this);
// create a host array and initialize it to {1, 2, 3, ..., MAX_NUMBERS}
uint hNumbers[MAX_NUMBERS];
for (uint i = 0; i < MAX_NUMBERS; i++)
{
hNumbers[i] = i + 1;
}
// CUDA FUNCTIONS:
cudaInit(); // initialiaze the cuda device
allocateArray((void**)&m_dNumbers, MAX_NUMBERS*sizeof(int)); // allocate device array
copyArrayToDevice(m_dNumbers, hNumbers, 0, MAX_NUMBERS*sizeof(int)); // copy host array to device array
}
MainWindow::~MainWindow()
{
// CUDA FUNCTION: free device memory
freeArray(m_dNumbers);
delete ui;
}
// used to detect mouse movement events
bool MainWindow::eventFilter(QObject *, QEvent *event)
{
if (event->type() == QEvent::MouseMove)
{
// find mouseX + mouseY
QMouseEvent *mouseEvent = static_cast<QMouseEvent*>(event);
QPoint p = mouseEvent->pos();
uint n = std::min((uint)(p.x() + p.y()), MAX_NUMBERS);
// CUDA FUNCTION:
// compute the sum of 1 + 2 + 3 + ... + n
uint sum = sumNumbers(m_dNumbers, n);
// check that the sum is correct
assert(sum == ( (n * (n+1) ) / 2 ) );
// show the sum at the bottom of the window
statusBar()->showMessage(QString("Mouse pos: (%1, %2) Sum from 0 to %3 = %4").arg(p.x()).arg(p.y()). arg(n).arg(sum));
}
return false;
}
And last but not least the .ui
file if you want to actually build and run the project:
<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
<class>MainWindow</class>
<widget class="QMainWindow" name="MainWindow">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>300</height>
</rect>
</property>
<property name="windowTitle">
<string>MainWindow</string>
</property>
<widget class="QWidget" name="centralWidget"/>
<widget class="QMenuBar" name="menuBar">
<property name="geometry">
<rect>
<x>0</x>
<y>0</y>
<width>400</width>
<height>22</height>
</rect>
</property>
</widget>
<widget class="QToolBar" name="mainToolBar">
<attribute name="toolBarArea">
<enum>TopToolBarArea</enum>
</attribute>
<attribute name="toolBarBreak">
<bool>false</bool>
</attribute>
</widget>
<widget class="QStatusBar" name="statusBar"/>
</widget>
<layoutdefault spacing="6" margin="11"/>
<resources/>
<connections/>
</ui>
I know the QT/CUDA process can be annoying and it's been half a year of silence since you asked the question but hopefully this helps.
Upvotes: 2