Reputation: 2973
In C++, we can let a inline asm to return 2 values, like:
ulonglong2 ThreadLoad<cub_modifier, ulonglong2*>(ulonglong2* ptr)
{
ulonglong2 retval;
asm volatile ("ld."#ptx_modifier".v2.u64 {%0, %1}, [%2];"
"=l"(retval.x),
"=l"(retval.y) :
_CUB_ASM_PTR_(ptr));
return retval;
}
how can we create LLVM IR for this usage? I have tried:
%3 = alloca { i32, i32 }, align 8
%40 = getelementptr inbounds { i32, i32 }* %38, i64 %39
%41 = getelementptr inbounds { i32, i32 }* %3, i32 0, i32 0
%42 = getelementptr inbounds { i32, i32 }* %3, i32 0, i32 1
call void asm sideeffect alignstack "ld.cg.v2.u32 \09{$0, $1}, [$2];", "=l,=l,l"(i32* %41, i32* %42, { i32, i32 }* %40)
%43 = load { i32, i32 }* %3, align 8
And this doesn't work.
Upvotes: 1
Views: 988
Reputation: 5482
I made a toy example of your problem:
struct ulonglong2
{
unsigned long long x;
unsigned long long y;
};
ulonglong2 ThreadLoad(ulonglong2* ptr)
{
ulonglong2 retval;
asm volatile ("ld.cg.v2.u64 {%0, %1}, [%2];" : "=l"(retval.x), "=l"(retval.y) : "l" (ptr));
return retval;
}
int main() {
ulonglong2 val;
ulonglong2 x = ThreadLoad(&val);
return 0;
}
and let clang emit the LLVM IR (command: clang -emit-llvm test.cpp -c -S -o ptx.ll
):
; Function Attrs: nounwind uwtable
define { i64, i64 } @_Z10ThreadLoadP10ulonglong2(%struct.ulonglong2* %ptr) #0 {
%1 = alloca %struct.ulonglong2, align 8
%2 = alloca %struct.ulonglong2*, align 8
store %struct.ulonglong2* %ptr, %struct.ulonglong2** %2, align 8
%3 = getelementptr inbounds %struct.ulonglong2* %1, i32 0, i32 0
%4 = getelementptr inbounds %struct.ulonglong2* %1, i32 0, i32 1
%5 = load %struct.ulonglong2** %2, align 8
%6 = call { i64, i64 } asm sideeffect "ld.cg.v2.u64 $($0, $1$), [$2];", "=l,=l,l,~{dirflag},~{fpsr},~{flags}"(%struct.ulonglong2* %5) #1, !srcloc !1
%7 = extractvalue { i64, i64 } %6, 0
%8 = extractvalue { i64, i64 } %6, 1
store i64 %7, i64* %3, align 8
store i64 %8, i64* %4, align 8
%9 = bitcast %struct.ulonglong2* %1 to { i64, i64 }*
%10 = load { i64, i64 }* %9, align 1
ret { i64, i64 } %10
}
The line that maters:
%6 = call { i64, i64 } asm sideeffect "ld.cg.v2.u64 $($0, $1$), [$2];", "=l,=l,l,~{dirflag},~{fpsr},~{flags}"(%struct.ulonglong2* %5) #1, !srcloc !1
Your asm call needs {i64, i64}
as return type and just takes one input parameter (since you only declare one in your asm line). Everything else is strait forward. Taking the pointers where to store the two ulonglong
values and store it. Everything after %9
is just to return the values as struct. If you pass -O3
to clang you will see that everything but the asm call disappears and the pointer is directly forwarded to the asm call.
Upvotes: 2