Reputation: 7360
Consider the following code:
extern int conn_fds[MAX_CLIENTS];
extern int fl_conn_indexes[MAX_CLIENTS];
extern int fl_req_bufs[MAX_CLIENTS];
extern struct epoll_event estab_events[MAX_THREADS];
extern req_buf_t req_bufs[MAX_REQ_BUFS];
extern int fl_req_bufs_top;
extern int conn_statuses[MAX_CLIENTS];
extern int fl_conn_indexes_top;
extern tcpl_nc_t nc_http_list;
extern struct sockaddr_in conn_addresses[MAX_CLIENTS];
void accept_connections(unsigned int num_conns) {
int fds[MAX_THREADS];
int conn_indexes[MAX_THREADS];
int conn_idx=0;
int new_bottom;
socklen_t slenghts[MAX_THREADS];
void *labels1[MAX_THREADS] = {&&a0,&&a1,&&a2,&&a3,&&a4,&&a5,&&a6,&&a7,&&a8,&&a9,&&a10,&&a11,&&a12,&&a13,&&a14,&&a15};
void *labels2[MAX_THREADS] = {&&b0,&&b1,&&b2,&&b3,&&b4,&&b5,&&b6,&&b7,&&b8,&&b9,&&b10,&&b11,&&b12,&&b13,&&b14,&&b15};
void *labels3[MAX_THREADS] = {&&c0,&&c1,&&c2,&&c3,&&c4,&&c5,&&c6,&&c7,&&c8,&&c9,&&c10,&&c11,&&c12,&&c13,&&c14,&&c15};
new_bottom=fl_conn_indexes_top-num_conns;
if (new_bottom<=0) return;
goto *labels1[num_conns];
a15:
conn_indexes[MAX_THREADS- 1]=fl_conn_indexes[fl_conn_indexes_top- 1];
a14:
conn_indexes[MAX_THREADS- 2]=fl_conn_indexes[fl_conn_indexes_top- 2];
a13:
conn_indexes[MAX_THREADS- 3]=fl_conn_indexes[fl_conn_indexes_top- 3];
a12:
conn_indexes[MAX_THREADS- 4]=fl_conn_indexes[fl_conn_indexes_top- 4];
a11:
conn_indexes[MAX_THREADS- 5]=fl_conn_indexes[fl_conn_indexes_top- 5];
a10:
conn_indexes[MAX_THREADS- 6]=fl_conn_indexes[fl_conn_indexes_top- 6];
a9:
conn_indexes[MAX_THREADS- 7]=fl_conn_indexes[fl_conn_indexes_top- 7];
a8:
conn_indexes[MAX_THREADS- 8]=fl_conn_indexes[fl_conn_indexes_top- 8];
a7:
conn_indexes[MAX_THREADS- 9]=fl_conn_indexes[fl_conn_indexes_top- 9];
a6:
conn_indexes[MAX_THREADS-10]=fl_conn_indexes[fl_conn_indexes_top-10];
a5:
conn_indexes[MAX_THREADS-11]=fl_conn_indexes[fl_conn_indexes_top-11];
a4:
conn_indexes[MAX_THREADS-12]=fl_conn_indexes[fl_conn_indexes_top-12];
a3:
conn_indexes[MAX_THREADS-13]=fl_conn_indexes[fl_conn_indexes_top-13];
a2:
conn_indexes[MAX_THREADS-14]=fl_conn_indexes[fl_conn_indexes_top-14];
a1:
conn_indexes[MAX_THREADS-15]=fl_conn_indexes[fl_conn_indexes_top-15];
a0:
conn_indexes[MAX_THREADS-16]=fl_conn_indexes[fl_conn_indexes_top-16];
fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
goto *labels2[num_conns];
b15:
conn_fds[conn_indexes[MAX_THREADS- 1]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 1]],&slenghts[MAX_THREADS- 1]);
b14:
conn_fds[conn_indexes[MAX_THREADS- 2]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 2]],&slenghts[MAX_THREADS- 2]);
b13:
conn_fds[conn_indexes[MAX_THREADS- 3]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 3]],&slenghts[MAX_THREADS- 3]);
b12:
conn_fds[conn_indexes[MAX_THREADS- 4]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 4]],&slenghts[MAX_THREADS- 4]);
b11:
conn_fds[conn_indexes[MAX_THREADS- 5]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 5]],&slenghts[MAX_THREADS- 5]);
b10:
conn_fds[conn_indexes[MAX_THREADS- 6]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 6]],&slenghts[MAX_THREADS- 6]);
b9:
conn_fds[conn_indexes[MAX_THREADS- 7]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 7]],&slenghts[MAX_THREADS- 7]);
b8:
conn_fds[conn_indexes[MAX_THREADS- 8]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 8]],&slenghts[MAX_THREADS- 8]);
b7:
conn_fds[conn_indexes[MAX_THREADS- 9]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 9]],&slenghts[MAX_THREADS- 9]);
b6:
conn_fds[conn_indexes[MAX_THREADS-10]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-10]],&slenghts[MAX_THREADS-10]);
b5:
conn_fds[conn_indexes[MAX_THREADS-11]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-11]],&slenghts[MAX_THREADS-11]);
b4:
conn_fds[conn_indexes[MAX_THREADS-12]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-12]],&slenghts[MAX_THREADS-12]);
b3:
conn_fds[conn_indexes[MAX_THREADS-13]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-13]],&slenghts[MAX_THREADS-13]);
b2:
conn_fds[conn_indexes[MAX_THREADS-14]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-14]],&slenghts[MAX_THREADS-14]);
b1:
conn_fds[conn_indexes[MAX_THREADS-15]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-15]],&slenghts[MAX_THREADS-15]);
b0:
conn_fds[conn_indexes[MAX_THREADS-16]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS-16]],&slenghts[MAX_THREADS-16]);
goto *labels3[num_conns];
c15:
conn_statuses[conn_indexes[MAX_THREADS- 1]]=CONN_STATUS_READING;
c14:
conn_statuses[conn_indexes[MAX_THREADS- 2]]=CONN_STATUS_READING;
c13:
conn_statuses[conn_indexes[MAX_THREADS- 3]]=CONN_STATUS_READING;
c12:
conn_statuses[conn_indexes[MAX_THREADS- 4]]=CONN_STATUS_READING;
c11:
conn_statuses[conn_indexes[MAX_THREADS- 5]]=CONN_STATUS_READING;
c10:
conn_statuses[conn_indexes[MAX_THREADS- 6]]=CONN_STATUS_READING;
c9:
conn_statuses[conn_indexes[MAX_THREADS- 7]]=CONN_STATUS_READING;
c8:
conn_statuses[conn_indexes[MAX_THREADS- 8]]=CONN_STATUS_READING;
c7:
conn_statuses[conn_indexes[MAX_THREADS- 9]]=CONN_STATUS_READING;
c6:
conn_statuses[conn_indexes[MAX_THREADS-10]]=CONN_STATUS_READING;
c5:
conn_statuses[conn_indexes[MAX_THREADS-11]]=CONN_STATUS_READING;
c4:
conn_statuses[conn_indexes[MAX_THREADS-12]]=CONN_STATUS_READING;
c3:
conn_statuses[conn_indexes[MAX_THREADS-13]]=CONN_STATUS_READING;
c2:
conn_statuses[conn_indexes[MAX_THREADS-14]]=CONN_STATUS_READING;
c1:
conn_statuses[conn_indexes[MAX_THREADS-15]]=CONN_STATUS_READING;
c0:
conn_statuses[conn_indexes[MAX_THREADS-16]]=CONN_STATUS_READING;
}
What I am doing here is processing in parallel MAX_THREADS number of connections. Loops were unrolled on purpose for speed. But the code is very long. I need to reduce it using macros, kind of like this:
extern int conn_fds[MAX_CLIENTS];
extern int fl_conn_indexes[MAX_CLIENTS];
extern int fl_req_bufs[MAX_CLIENTS];
extern struct epoll_event estab_events[MAX_THREADS];
extern req_buf_t req_bufs[MAX_REQ_BUFS];
extern int fl_req_bufs_top;
extern int conn_statuses[MAX_CLIENTS];
extern int fl_conn_indexes_top;
extern tcpl_nc_t nc_http_list;
extern struct sockaddr_in conn_addresses[MAX_CLIENTS];
void accept_connections(unsigned int num_conns) {
int fds[MAX_THREADS];
int conn_indexes[MAX_THREADS];
int conn_idx=0;
int new_bottom;
socklen_t slenghts[MAX_THREADS];
void *labels1[MAX_THREADS] = {&&a0,&&a1,&&a2,&&a3,&&a4,&&a5,&&a6,&&a7,&&a8,&&a9,&&a10,&&a11,&&a12,&&a13,&&a14,&&a15};
void *labels2[MAX_THREADS] = {&&b0,&&b1,&&b2,&&b3,&&b4,&&b5,&&b6,&&b7,&&b8,&&b9,&&b10,&&b11,&&b12,&&b13,&&b14,&&b15};
void *labels3[MAX_THREADS] = {&&c0,&&c1,&&c2,&&c3,&&c4,&&c5,&&c6,&&c7,&&c8,&&c9,&&c10,&&c11,&&c12,&&c13,&&c14,&&c15};
new_bottom=fl_conn_indexes_top-num_conns;
if (new_bottom<=0) return;
goto *labels1[num_conns];
LOOP(1,MAX_THREADS) {
a{ITERATOR}:
conn_indexes[MAX_THREADS- {ITERATOR}]=fl_conn_indexes[fl_conn_indexes_top- {ITERATOR}];
}
fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
goto *labels2[num_conns];
LOOP(1,MAX_THREADS) {
b{ITERATOR}:
conn_fds[conn_indexes[MAX_THREADS- {ITERATOR}]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[MAX_THREADS- 1{ITERATOR}]],&slenghts[MAX_THREADS- {ITERATOR}]);
}
goto *labels3[num_conns];
LOOP(1,MAX_THREADS) {
c{ITERATOR}:
conn_statuses[conn_indexes[MAX_THREADS- {ITERATOR}]]=CONN_STATUS_READING;
}
}
The {ITERATOR} goes from 1 to MAX_THREADS inside the LOOP() The labels are neded because the number of connections can be lower than MAX_THREADS and I have to skip empty variables otherwise I will get segfaults.
Can this be implemented with the preprocessor of GCC easily? If not, maybe I could use m4, but how to integrate m4 with GCC for comfortable development. Because, I am going to edit these macros very often along many other files within the source code and easiness of coding is needed.
Will appreciate very much your help
Upvotes: 1
Views: 527
Reputation: 1216
If you do require an answer in m4
:
dnl usage: unroll(var, initial, max, code)
dnl e.g. unroll(`i', 0, 10, ``total += arr[i]'')
dnl The above goes from 0 to 9, it does not include the final 10.
define(`unroll',
`define(`$1', `$2')ifelse(eval($2 < $3),
1,
`$4`'unroll(`$1',incr($2),$3,`$4')',
`undefine(`$1')')')dnl
void accept_connections(unsigned int num_conns) {
int fds[MAX_THREADS];
int conn_indexes[MAX_THREADS];
int conn_idx=0;
int new_bottom;
socklen_t slenghts[MAX_THREADS];
void *labels1[MAX_THREADS] = {unroll(`i', 0, 16, `&&a`'i,')};
void *labels2[MAX_THREADS] = {unroll(`i', 0, 16, `&&b`'i,')};
void *labels3[MAX_THREADS] = {unroll(`i', 0, 16, `&&c`'i,')};
new_bottom=fl_conn_indexes_top-num_conns;
if (new_bottom<=0) return;
define(`MAX_NUM_CONNS', 16)
goto *labels1[num_conns];
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
a`'eval(MAX_NUM_CONNS() - count):
conn_indexes[MAX_THREADS - count]=fl_conn_indexes[fl_conn_indexes_top - count];
')dnl
fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
goto *labels2[num_conns];
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
define(`index', `MAX_THREADS - count')dnl
b`'eval(MAX_NUM_CONNS() - count):
conn_fds[conn_indexes[index]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[index]],&slenghts[index]);
')dnl
undefine(`index')
goto *labels3[num_conns];
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
c`'eval(MAX_NUM_CONNS() - count):
conn_statuses[conn_indexes[MAX_THREADS - count]]=CONN_STATUS_READING;
')dnl
}
Pay special attention to the macros now within the code: define
, unroll
, eval
, MAX_NUM_CONNS
, count
, index
, i
. Also notice the quirky quoting which m4 likes.
I can suggest an improvement to this (regardless of whether or not you use m4
). Your use case here of gotos following directly after each other is one already exactly covered by a more well-known control structure: the switch
(because fallthrough is the default behaviour without a break
):
define(`MAX_NUM_CONNS', 16)
switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
case eval(MAX_NUM_CONNS() - count):
conn_indexes[MAX_THREADS - count]=fl_conn_indexes[fl_conn_indexes_top - count];
')dnl
}
fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
define(`index', `MAX_THREADS - count')dnl
case eval(MAX_NUM_CONNS() - count):
conn_fds[conn_indexes[index]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[index]],&slenghts[index]);
')dnl
}
undefine(`index')
switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
case eval(MAX_NUM_CONNS() - count):
conn_statuses[conn_indexes[MAX_THREADS - count]]=CONN_STATUS_READING;
')dnl
}
If you do this conversion to a switch, you can remove your label arrays.
Finally, perhaps it could all be one switch:
define(`MAX_NUM_CONNS', 16)
switch (num_conns) {
unroll(`count', 1, incr(MAX_NUM_CONNS()), `dnl
define(`index', `MAX_THREADS - count')dnl
case eval(MAX_NUM_CONNS() - count):
conn_indexes[index]=fl_conn_indexes[fl_conn_indexes_top - count];
conn_fds[conn_indexes[index]]=accept(nc_http_list.sock_fd,(struct sockaddr*) &conn_addresses[conn_indexes[index]],&slenghts[index]);
conn_statuses[conn_indexes[index]]=CONN_STATUS_READING;
')dnl
}
undefine(`index')
fl_conn_indexes_top=fl_conn_indexes_top-num_conns;
Note that I've moved the fl_conn_indexes_top
modification to after the whole unrolled loop is finished - I'm not sure if that's a valid thing to do, because it depends on when that variable is read from or written to. You can make the judgement call there.
As I mentioned in a comment, this can be done with compiler flags too. GCC supports the -funroll-loops
option, which does this for you at the compiler level. GCC also supports -fprofile-use
, which (when fed a profile from -fprofile-generate
) will unroll loops which proved to cause performance issues.
Finally:
I think this is all premature optimization. I strongly doubt that a loop's comparison check is the bottleneck when each iteration calls accept
!
Upvotes: 2
Reputation: 1853
You can abuse the C preprocessor with the chaos-pp preprocessor library
#include <chaos/preprocessor/arithmetic/dec.h>
#include <chaos/preprocessor/config.h>
#include <chaos/preprocessor/lambda/ops.h>
#include <chaos/preprocessor/logical/bool.h>
#include <chaos/preprocessor/recursion/expr.h>
#include <chaos/preprocessor/repetition/for.h>
#define PRED(s, x) CHAOS_PP_BOOL(x)
#define OP(s, x) CHAOS_PP_DEC(x)
#define CONN_IDX(s , idx) \
a##idx: \
conn_indexes[MAX_THREADS - (16 - idx)]=fl_conn_indexes[fl_conn_indexes_top - (16 - idx)];
CHAOS_PP_EXPR(CHAOS_PP_FOR(PRED, OP, CONN_IDX, 15))
The (formatted) output being:
a15:
conn_indexes[MAX_THREADS - (16 - 15)]=fl_conn_indexes[fl_conn_indexes_top - (16 - 15)];
a14:
conn_indexes[MAX_THREADS - (16 - 14)]=fl_conn_indexes[fl_conn_indexes_top - (16 - 14)];
...
Upvotes: 1