Hossein
Hossein

Reputation: 25994

Undefined reference error despite of having the library specified in CMake (issues linking with libtorch (C++11 ABI?)

I'm trying to create a test executable out of a library I made. Let's name them lib1 and lib2. lib1 gets built along with its test just fine. the lib2 is also built without any issues. However, whenever I try to link lib2 with its test executable (i.e. a sample program that uses lib2) I get the following error:

usr/bin/ld: CMakeFiles/Lib2_Test.dir/Lib2_Test.cpp.o: in function `main':
Lib2_Test.cpp:(.text+0xf3): undefined reference to `Lib2::Lib2(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >, int, int)'
/usr/bin/ld: Lib2_Test.cpp:(.text+0x3f5): undefined reference to `Lib2::Evaluate(bool&, std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >&, float&, cv::Mat&, cv::Mat&, bool)'
collect2: error: ld returned 1 exit status
make[2]: *** [CMakeFiles/Lib2_Test.dir/build.make:130: Lib2_Test] Error 1
make[1]: *** [CMakeFiles/Makefile2:76: CMakeFiles/Lib2_Test.dir/all] Error 2
make: *** [Makefile:130: all] Error 2

I tried viewing the headers using readelf -d and using the ldd command, both of the libs seems to be having all the necessary references. However lib1 doesn't have any issues, while lib2 generates unreferenced related errors when being linked to an executable that uses it.

Below are the cmakeLists I made for both of them and later on I also included the output of readelf.

CMakelist.txt for lib1:

cmake_minimum_required(VERSION 3.11)
project(Lib1)

set(CMAKE_CXX_STANDARD 17)

find_package(Torch REQUIRED)
find_package(OpenCV REQUIRED)

add_definitions(-D_LIB1_BUILD_DLL) 

set( CMAKE_CXX_FLAGS  "${CMAKE_CXX_FLAGS} -fPIC" )

set(CMAKE_INSTALL_PREFIX /home/me/Desktop/LibtorchPort/built_stuff)

include_directories( /home/me/Desktop/LibtorchPort/Dependencies/include ${TORCH_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS})
LINK_DIRECTORIES(/home/me/libtorch-cxx11-abi-shared-with-deps-1.6.0+cpu/libtorch/lib)

# http://dlib.net/examples/CMakeLists.txt.html
add_subdirectory(/home/me/dlib-19.21 /home/me/dlib-19.21/build)     

set(Lib1_SRC ./lib1.cpp)

add_library(lib1 SHARED  ${Lib1_SRC})

# Link
target_link_libraries(lib1 ${TORCH_LIBRARIES})
target_link_libraries(lib1 ${OpenCV_LIBS})
target_link_libraries(lib1 dlib::dlib)

install(TARGETS lib1 LIBRARY DESTINATION lib)

This is the CMakeList.txt for lib1_test:

cmake_minimum_required(VERSION 3.11)
project(Lib1_Test)
set(CMAKE_CXX_STANDARD 17)

find_package(Torch REQUIRED)
find_package(OpenCV REQUIRED)

set(CMAKE_INSTALL_PREFIX /home/me/Desktop/LibtorchPort/built_stuff)
include_directories( /home/me/Desktop/LibtorchPort/Dependencies/include ${TORCH_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS})
 
# Link
add_executable(lib1_dynamic_test ./Lib1_Test.cpp)
target_link_directories(lib1_dynamic_test PRIVATE /home/me/Desktop/LibtorchPort/Lib1/build)

target_link_libraries(lib1_dynamic_test lib1 )
target_link_libraries(lib1_dynamic_test ${TORCH_LIBRARIES} )
target_link_libraries(lib1_dynamic_test ${OpenCV_LIBS})
install(TARGETS lib1_dynamic_test DESTINATION bin)

This is the CMakeList.txt for lib2:

cmake_minimum_required(VERSION 3.11)
project(Lib2)

set(CMAKE_CXX_STANDARD 17)

find_package(Torch REQUIRED)
find_package(OpenCV REQUIRED)

add_definitions(-D_LIB2_BUILD_DLL) 

set(CMAKE_INSTALL_PREFIX /home/me/Desktop/LibtorchPort/built_stuff)

include_directories( /home/me/Desktop/LibtorchPort/Dependencies/include ${TORCH_INCLUDE_DIRS} ${OpenCV_INCLUDE_DIRS})

LINK_DIRECTORIES(/home/me/libtorch-cxx11-abi-shared-with-deps-1.6.0+cpu/libtorch/lib)
LINK_DIRECTORIES(/home/me/Desktop/LibtorchPort/Lib1/build)

set(LIB2_SRC  ./lib2.cpp )

add_library(lib2_dynamic SHARED ${LIB2_SRC} )

target_link_directories(lib2_dynamic PRIVATE /home/me/Desktop/LibtorchPort/Lib1/build)

target_link_libraries(lib2_dynamic  lib1)
target_link_libraries(lib2_dynamic  ${TORCH_LIBRARIES})
target_link_libraries(lib2_dynamic  ${OpenCV_LIBS})

install(TARGETS lib2_dynamic LIBRARY DESTINATION lib)

And finally here is the CMakeList for lib2_test:

cmake_minimum_required(VERSION 3.11)
project(Lib2_Test)

set(CMAKE_CXX_STANDARD 17)
set(CMAKE_INSTALL_PREFIX /home/me/Desktop/LibtorchPort/built_stuff)

#find_package(Torch REQUIRED)
find_package(OpenCV REQUIRED)

include_directories( /home/me/Desktop/LibtorchPort/Dependencies/include ${OpenCV_INCLUDE_DIRS})
LINK_DIRECTORIES(/home/me/Desktop/LibtorchPort/Lib1/build)

add_executable(Lib2_Test ./lib2_test.cpp)

target_link_directories(Lib2_Test PRIVATE /home/me/Desktop/LibtorchPort/Lib2/build)
#target_link_directories(Lib2_Test PUBLIC /home/me/Desktop/LibtorchPort/Lib1/build)

#Link
target_link_libraries(Lib2_Test ${OpenCV_LIBS})
target_link_libraries(Lib2_Test lib2_dynamic)
install(TARGETS Lib2_Test DESTINATION bin)

running the readelf -d lib1 this is the output I get:

Dynamic section at offset 0x908f8 contains 38 entries:
  Tag        Type                         Name/Value
 0x0000000000000001 (NEEDED)             Shared library: [libtorch.so]
 0x0000000000000001 (NEEDED)             Shared library: [libc10.so]
 0x0000000000000001 (NEEDED)             Shared library: [libtorch_cpu.so]
 0x0000000000000001 (NEEDED)             Shared library: [libopencv_highgui.so.3.4]
 0x0000000000000001 (NEEDED)             Shared library: [libopencv_imgproc.so.3.4]
 0x0000000000000001 (NEEDED)             Shared library: [libopencv_core.so.3.4]
 0x0000000000000001 (NEEDED)             Shared library: [libcblas.so.3]
 0x0000000000000001 (NEEDED)             Shared library: [liblapack.so.3]
 0x0000000000000001 (NEEDED)             Shared library: [libstdc++.so.6]
 0x0000000000000001 (NEEDED)             Shared library: [libm.so.6]
 0x0000000000000001 (NEEDED)             Shared library: [libgcc_s.so.1]
 0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
 0x0000000000000001 (NEEDED)             Shared library: [ld-linux-x86-64.so.2]
 0x000000000000000e (SONAME)             Library soname: [libLib1.so]
 0x000000000000001d (RUNPATH)            Library runpath: [/home/me/anaconda3/lib/python3.8/site-packages/torch/lib:/home/me/libtorch-cxx11-abi-shared-with-deps-1.6.0+cpu/libtorch/lib:/usr/local/lib:]
 0x000000000000000c (INIT)               0x1a000
 0x000000000000000d (FINI)               0x57e00
 0x0000000000000019 (INIT_ARRAY)         0x90b28
 0x000000000000001b (INIT_ARRAYSZ)       32 (bytes)
 0x000000000000001a (FINI_ARRAY)         0x90b48
 0x000000000000001c (FINI_ARRAYSZ)       8 (bytes)
 0x000000006ffffef5 (GNU_HASH)           0x328
 0x0000000000000005 (STRTAB)             0x6840
 0x0000000000000006 (SYMTAB)             0x1758
 0x000000000000000a (STRSZ)              56053 (bytes)
 0x000000000000000b (SYMENT)             24 (bytes)
 0x0000000000000003 (PLTGOT)             0x92000
 0x0000000000000002 (PLTRELSZ)           8112 (bytes)
 0x0000000000000014 (PLTREL)             RELA
 0x0000000000000017 (JMPREL)             0x17ff0
 0x0000000000000007 (RELA)               0x14b58
 0x0000000000000008 (RELASZ)             13464 (bytes)
 0x0000000000000009 (RELAENT)            24 (bytes)
 0x000000006ffffffe (VERNEED)            0x149f8
 0x000000006fffffff (VERNEEDNUM)         5
 0x000000006ffffff0 (VERSYM)             0x14336
 0x000000006ffffff9 (RELACOUNT)          6
 0x0000000000000000 (NULL)               0x0

and this is the output for lib2:

Dynamic section at offset 0x37ba0 contains 32 entries:
  Tag        Type                         Name/Value
 0x0000000000000001 (NEEDED)             Shared library: [libLib1.so]
 0x0000000000000001 (NEEDED)             Shared library: [libtorch.so]
 0x0000000000000001 (NEEDED)             Shared library: [libtorch_cpu.so]
 0x0000000000000001 (NEEDED)             Shared library: [libopencv_core.so.3.4]
 0x0000000000000001 (NEEDED)             Shared library: [libstdc++.so.6]
 0x0000000000000001 (NEEDED)             Shared library: [libgcc_s.so.1]
 0x0000000000000001 (NEEDED)             Shared library: [libc.so.6]
 0x000000000000000e (SONAME)             Library soname: [libLib2_dynamic.so]
 0x000000000000001d (RUNPATH)            Library runpath: [/home/me/anaconda3/lib/python3.8/site-packages/torch/lib:/home/me/libtorch-cxx11-abi-shared-with-deps-1.6.0+cpu/libtorch/lib:/home/me/Desktop/LibtorchPort/Lib1/build:/usr/local/lib:]
 0x000000000000000c (INIT)               0x1e000
 0x000000000000000d (FINI)               0x2ec10
 0x0000000000000019 (INIT_ARRAY)         0x38108
 0x000000000000001b (INIT_ARRAYSZ)       16 (bytes)
 0x000000000000001a (FINI_ARRAY)         0x38118
 0x000000000000001c (FINI_ARRAYSZ)       8 (bytes)
 0x000000006ffffef5 (GNU_HASH)           0x2f0
 0x0000000000000005 (STRTAB)             0x7d88
 0x0000000000000006 (SYMTAB)             0x1dd0
 0x000000000000000a (STRSZ)              62708 (bytes)
 0x000000000000000b (SYMENT)             24 (bytes)
 0x0000000000000003 (PLTGOT)             0x39000
 0x0000000000000002 (PLTRELSZ)           14784 (bytes)
 0x0000000000000014 (PLTREL)             RELA
 0x0000000000000017 (JMPREL)             0x19e90
 0x0000000000000007 (RELA)               0x17b38
 0x0000000000000008 (RELASZ)             9048 (bytes)
 0x0000000000000009 (RELAENT)            24 (bytes)
 0x000000006ffffffe (VERNEED)            0x17a78
 0x000000006fffffff (VERNEEDNUM)         3
 0x000000006ffffff0 (VERSYM)             0x1727c
 0x000000006ffffff9 (RELACOUNT)          4
 0x0000000000000000 (NULL)               0x0

And yet the lib1 builds and links just fine, while lib2 that is dependant on lib1, has issues when being linked to its test or any other libs. I'm clueless at this point and have no idea what could have caused this. What am I missing?

Update 1

This is the lib2_test.cpp : https://paste.ee/p/pOgFk and this is how the header file looks like :

#ifndef Lib2_H
#define Lib2_H

/* If we are we on Windows, we want a single define for it.*/
#if !defined(_WIN32) && (defined(__WIN32__) || defined(WIN32) || defined(__MINGW32__))
#define _WIN32
#endif /* _WIN32 */

#if defined(_WIN32) && defined(_Lib2_BUILD_DLL)
/* We are building Lib2 as a Win32 DLL */
#define LIB2_API __declspec(dllexport)
#elif defined(_WIN32) && defined(Lib2_DLL)
/* We are calling Lib2 as a Win32 DLL */
#define LIB2_API __declspec(dllimport)
#elif defined(__GNUC__) && defined(_Lib2_BUILD_DLL)
/* We are building Lib2 as a shared / dynamic library */
#define LIB2_API __attribute__((visibility("default")))
#else
/* We are building or calling Lib2 as a static library */
#define LIB2_API
#endif

#include <string>
#include <vector>
#include <map>
#include <memory>
#include <opencv2/core.hpp>


enum class ValidationStatus
{
    None = -1,
    UnderValidation = 0,
    Validated = 1,
    Invalidated = 2,
    AnomalyDetected = 3,
    ToomuchAnonalyDetected = 4
};

typedef std::tuple<ValidationStatus, std::string, bool> Lib2Result;

class Lib2Impl;

class LIB2_API Lib2
{
private:
    std::shared_ptr<Lib2Impl> Lib2;

public:
    Lib2(std::string shape_predictor_path = "", std::string eyeNet_path = "", int valid_presence_delay = 5, int fpasPassed = 0);

    std::vector<Lib2Result> Run(std::map<std::string, bool>& validity_result, 
                                       std::vector<std::tuple<std::string, float>>& ids,
                                       std::vector<cv::Mat>& faces, 
                                       cv::Mat& originalImage,
                                       bool show_debug_info=false);

    Lib2Result Evaluate(bool& status, std::string& name, float& theta, cv::Mat& face_image, cv::Mat& originalImage, bool debug_info = true);
    ~Lib2();

};

#endif // !Lib2_H

Concerning the comments, as you can see the calls on the lib2_test.cpp is OK and it uses the same signature.

Update 2

I need to add this as well that the project was built just fine in Windows using Visual Studio and also cmake! However, under Linux (Ubuntu 20.04), I'm facing these issues. So this is not just calling different/wrong methods or with wrong signatures. This code should compile and link just fine but I'm doing something wrong here which I'm not sure what it is.

Note 2

The lib1 and lib2 are just made up name for the actual filenames (I just chose lib1 and lib2 to keep things simpler and replaced the names here, so if you see difference in cases, don't mind it, the actual files are the same.

Upvotes: 0

Views: 2025

Answers (1)

Hossein
Hossein

Reputation: 25994

Summary

The third party lib (torch) was built using a Pre-cxx11 ABI, and libs built with that couldn't obviously be linked to the object that was using cxx11 ABI!

Long explanation

After hours of debugging of a code that worked just fine in Windows both in Visual Studio and CMake while being a pain in the neck in Linux I found the culprit!

The libtorch is shipped with two types of build Pre-cxx11 and cxx11 ABIs! It was using the Pre-cxx11 built libs that was shipped with its Python package (torch1.6cpu) and since Anaconda3 was in the Path and I was also using it to build the libs I faced this issue.

What gave it away after all these hours was the weird arguments to the undefined methods which were : std::__cxx11::basic_string where it should have been simply std::string. I didn't expect this two to be different and thought, that's a weird naming scheme the g++ is using until moments ago, that made me say, let's give that a search maybe I can get something out it! Lo and behold! this was the case :

If you get linker errors about undefined references to symbols that involve types in the std::__cxx11 namespace or the tag [abi:cxx11] then it probably indicates that you are trying to link together object files that were compiled with different values for the _GLIBCXX_USE_CXX11_ABI macro. This commonly happens when linking to a third-party library that was compiled with an older version of GCC. If the third-party library cannot be rebuilt with the new ABI then you will need to recompile your code with the old ABI.

ref

To fix this I simply directly used the libtorch cxx11 in all libs creation and that just did it. meaning, unlike what is being shown in the Pytorch's official documentation. Do not do:

cmake -DCMAKE_PREFIX_PATH="$(python -c 'import torch.utils; print(torch.utils.cmake_prefix_path)')" ..

By doing this all hell will break loose if you, like me expected the libs to have been built with cxx11! Because they have not!

So if you are on Linux, just grab and use the prebuilt libs with CXX11 ABI! and avoid what ships with Pytorch!

Note

If you are building a Python extension using Pybind11 and libtorch, make sure your Pytorch is also built using the GLIBCXX_USE_CXX11_ABI=1 or else you'll see the undefined references for the reason you now know! You can check this simply by running the following snippet in terminal/cmd:

python -c "import torch; print(f'GLIBCXX_USE_CXX11_ABI = {int(torch._C._GLIBCXX_USE_CXX11_ABI)}')"

Based on the information provided here conda packages (cuda builds only) should be shipped with GLIBCXX_USE_CXX11_ABI=1. I tested the 1.6cpu using pip and conda but they both reported GLIBCXX_USE_CXX11_ABI = 0. So be aware of that.

If you happened to need to build from source, you can follow this guide.

As to why Pytorch ships like this :

we have that flag set because we build with gcc 4.9.x, which only has the old ABI. In GCC 5.1, the ABI for std::string was changed, and binaries compiling with gcc >= 5.1 are not ABI-compatible with binaries build with gcc < 5.1 (like pytorch) unless you set that flag. ref

Upvotes: 5

Related Questions