Reputation: 659
I need to compare pHashes (phash.org) with a hamming distance function.
I tried the one from pg_similarity, but it doesn't seem to work right. (identical pHashes don't have a hamming distance of 0).
So I figured I'd just use a c-extension to use the ph_hamming_distance
function that's part of the pHash library.
What I've got: phash.c
#include <postgres.h>
#include <pHash.h>
#include <fmgr.h>
#include <utils/bytea.h>
#include <utils/datum.h>
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
PG_FUNCTION_INFO_V1(phash_hamming);
Datum phash_hamming(PG_FUNCTION_ARGS) {
bytea *bytea1 = PG_GETARG_BYTEA_P(0);
bytea *bytea2 = PG_GETARG_BYTEA_P(1);
//FIXME - length of bytea1 & bytea2 must be 4 bytes (64bits)
ulong64 long1 = *((ulong64*) bytea1);
ulong64 long2 = *((ulong64*) bytea2);
int32 ret = ph_hamming_distance(long1, long2);
PG_RETURN_INT32(ret);
}
Makefile
CXXFLAGS=-I/usr/include/postgresql/server
LDFLAGS=-Bstatic -lpHash
all: phash.o
phash.o:
$(CXX) $(CXXFLAGS) -fpic -c phash.c
$(CXX) $(LDFLAGS) -shared -o phash.so phash.o
install:
cp phash.so `pg_config --pkglibdir`
clean:
rm -f phash.o phash.so
SQL
CREATE FUNCTION phash_hamming (bytea1 bytea, bytea2 bytea) RETURNS int AS '$libdir/phash' LANGUAGE C;
Error that I'm getting:
ERROR: could not load library "/usr/lib/postgresql/phash.so": /usr/lib/postgresql/phash.so: undefined symbol: _Z16pg_detoast_datumP7varlena
I must not be linking right to postgresql somehow?
Upvotes: 0
Views: 1958
Reputation: 1144
It's an old question, but...
You need extern "C" both PostgreSQL headers and PostgreSQL macros.
extern "C" {
#include <postgres.h>
#include <fmgr.h>
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC
#endif
}
Upvotes: 2
Reputation: 659
I'm still convinced there might be a better way but this is what I did that worked.
(I will add range-checking, instead of just assuming all bytea's are 4-bytes... eventually, leaving a potential segfault in production would be bad, so it's a good thing this is just a toy project)
phash.c - pure C file, compiled with gcc
#include <postgres.h>
#include <fmgr.h>
#include <utils/bytea.h>
#include <utils/datum.h>
//typedef unsigned __int64 ulong64;
#if defined(_MSC_VER) || defined(__BORLANDC__)
typedef unsigned __int64 ulong64;
#else
typedef unsigned long long ulong64;
#endif
extern int32 c_ph_hamming_distance (ulong64 b1, ulong64 b2);
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
PG_FUNCTION_INFO_V1(phash_hamming);
Datum phash_hamming(PG_FUNCTION_ARGS) {
bytea *bytea1 = PG_GETARG_BYTEA_P(0);
bytea *bytea2 = PG_GETARG_BYTEA_P(1);
//FIXME - length of bytea1 & bytea2 must be 4 bytes (64bits)
ulong64 long1 = *((ulong64*) bytea1);
ulong64 long2 = *((ulong64*) bytea2);
int32 ret = c_ph_hamming_distance(long1, long2);
PG_RETURN_INT32(ret);
}
phash_wrapper.cpp - make convert a version of ph_hamming_distance with c-linking instead of cpp linking (compiled with g++)
#include <pHash.h>
extern "C" {
int c_ph_hamming_distance (ulong64 b1, ulong64 b2){
return ph_hamming_distance(b1, b2);
}
}
Makefile
CFLAGS=-I/usr/include/postgresql/server
LDFLAGS=-lpHash
all: phash.so
phash_wrapper.o: phash_wrapper.cpp
$(CXX) $(CXXFLAGS) -fpic -c phash_wrapper.cpp
phash.o: phash.c
$(CC) $(CFLAGS) -fpic -c phash.c
phash.so: phash.o phash_wrapper.o
$(CC) $(LDFLAGS) -shared -o phash.so phash.o phash_wrapper.o
install:
cp phash.so `pg_config --pkglibdir`
clean:
rm -f phash.o phash.so phash_wrapper.o
SQL - the same
CREATE FUNCTION phash_hamming (bytea1 bytea, bytea2 bytea) RETURNS int AS '$libdir/phash' LANGUAGE C;
Upvotes: 1