Reputation: 63
I've been on a search for the fastest algorithm out there to sort 1,000,000 integers. So far, surprisingly, C's built in qsort function seems to be the fastest out of anything that I've tried (I've tested pre sorted, reverse sorted, and random input files). On average, I get about .07 seconds for pre and reverse sorted and .2 seconds for random.
How can I optimize it to run even faster? Are there any quick tricks? I know that C++'s std sort is faster, but that can't be used in C... I've attached my code.
int compare(const void *x, const void *y){
return ( *(int*)x >= *(int*)y );
}
qsort(list, 1000000, sizeof(int), compare);
Upvotes: 0
Views: 1714
Reputation: 28921
This counting / radix sort will sort 1,000,000 32 bit unsigned integers in about .01 seconds on my system (Intel 2600K 3.4ghz), but it uses a second array (pTemp) the same size as the original array (pData), so it needs twice the memory.
typedef unsigned int UI32;
UI32 * RadixSort(UI32 * pData, UI32 * pTemp, size_t count)
{
size_t mIndex[4][256] = {0}; // index matrix
UI32 *pDst, *pSrc, *pTmp;
size_t i,j,m,n;
UI32 u;
for(i = 0; i < count; i++){ // generate histograms
u = pData[i];
for(j = 0; j < 4; j++){
mIndex[j][(size_t)(u & 0xff)]++;
u >>= 8;
}
}
for(j = 0; j < 4; j++){ // convert to indices
n = 0;
for(i = 0; i < 256; i++){
m = mIndex[j][i];
mIndex[j][i] = n;
n += m;
}
}
pDst = pTemp; // radix sort
pSrc = pData;
for(j = 0; j < 4; j++){
for(i = 0; i < count; i++){
u = pSrc[i];
m = (size_t)(u >> (j<<3)) & 0xff;
pDst[mIndex[j][m]++] = u;
}
pTmp = pSrc;
pSrc = pDst;
pDst = pTmp;
}
return(pSrc);
}
For signed integers, just need to complement the sign bit of an integer when it's used for indexing:
typedef int I32;
typedef unsigned int UI32;
I32 * RadixSort(I32 * pData, I32 * pTemp, size_t count)
{
size_t mIndex[4][256] = {0}; // index matrix
UI32 *pDst, *pSrc, *pTmp;
size_t i,j,m,n;
UI32 u;
for(i = 0; i < count; i++){ // generate histograms
u = pData[i];
for(j = 0; j < 4; j++){
if(j != 3) // signed integer handling
mIndex[j][(size_t)(u & 0xff)]++;
else
mIndex[j][(size_t)((u^0x80) & 0xff)]++;
u >>= 8;
}
}
for(j = 0; j < 4; j++){ // convert to indices
n = 0;
for(i = 0; i < 256; i++){
m = mIndex[j][i];
mIndex[j][i] = n;
n += m;
}
}
pDst = (UI32 *)pTemp; // radix sort
pSrc = (UI32 *)pData;
for(j = 0; j < 4; j++){
for(i = 0; i < count; i++){
u = pSrc[i];
if(j != 3) // signed integer handling
m = (size_t)(u >> (j<<3)) & 0xff;
else
m = (size_t)((u >> (j<<3))^0x80) & 0xff;
pDst[mIndex[j][m]++] = u;
}
pTmp = pSrc;
pSrc = pDst;
pDst = pTmp;
}
return((I32 *)pSrc);
}
Upvotes: 1