Reputation: 276
I am learning socket programming in C language, and this is an incomprehensible problem I encountered during my study.
Today I am trying to send a HTTP request to my test server which host an Apache example website, then receive the response from test server. Here is a part of my receive code.
unsigned long recv_size = 0;
unsigned long response_size = 4096;
int ret = 0;
char *recv_buff = (char *)malloc(response_size);
while (1)
{
// ret = recv(socket, recv_buff, response_size, MSG_WAITALL); // cannot get all data
ret = read(socket, recv_buff, response_size); // same effect as the above
recv_size += ret;
if (ret < 0)
error(strerror(errno));
else if (ret == 0)
break; // all data recved
}
The normal result of my test with burpsuit is this.
But what I received with the C language program was incomplete data.
I searched the reason for one night, but I still did not find a solution for my problem. Whether it is to set the buff to a super large size or any other method, the complete data cannot be accepted at all.
The traffic monitored from wireshark is ok, but my program still cannot receive the complete data. What is the problem?
If you know why, please let me know. THX. (o゜▽゜)o☆
UPDATE
The while
loop will execute twice, and first time the value of ret
is 3343
, and second time is 0
, so the loop will stop here.
Upvotes: 0
Views: 1516
Reputation: 33601
You can get a short read on a socket.
But, your code to handle that has a few issues.
You're allocating a buffer of size response_size
. You are always reading that amount instead of reducing the amount read by the amount you've already read on a prior loop iteration.
This can cause you to read past the end of the buffer causing UB (undefined behavior).
Your termination condition is if (ret == 0)
. This can fail if another packet arrives "early". You'll never see a ret
of 0, because the partial data from the next packet will make it non-zero
Here's the corrected code:
#if 0
unsigned long recv_size = 0;
#endif
unsigned long response_size = 4096;
int ret = 0;
char *recv_buff = (char *) malloc(response_size);
#if 1
unsigned long remaining_size = response_size;
unsigned long offset = 0;
#endif
for (; remaining_size > 0; remaining_size -= ret, offset += ret) {
ret = read(socket, &recv_buff[offset], remaining_size);
if (ret < 0)
error(strerror(errno));
}
UPDATE:
The above code corrects some of the issues. But, for a variable length source [such as http
], we don't know how much to read at the outset.
So, we have to parse the headers and look for the "Content-Length" field. This will tell us how much to read.
So, we'd like to have line oriented input for the headers. Or, manage our own buffer
Assuming we can parse that value, we have to wait for the empty line to denote the start of the payload. And, then we can loop on that exact amount.
Here's some code that attempts the header parsing and saving of the payload. I've coded it, but not compiled it. So, you can take it as pseudo code:
unsigned long recv_size = 0;
unsigned long response_size = 4096;
char *recv_buff = malloc(response_size + 1);
// line oriented header buffer
char *endl = NULL;
unsigned long linelen;
char linebuf[1000];
int ret = 0;
// read headers
while (1) {
// fill up a chunk of data
while (recv_size < response_size) {
recv_buff[recv_size] = 0;
// do we have a line end?
endl = strstr(recv_buff,"\r\n");
if (endl != NULL)
break;
ret = read(socket, &recv_buff[recv_size], response_size - recv_size);
if (ret < 0)
error(strerror(errno));
if (ret == 0)
break;
recv_size += ret;
}
// error -- no line end but short read
if (endl == NULL)
error(strerror(errno));
// copy header to work buffer
linelen = endl - recv_buff;
memcpy(linebuf,recv_buff,linelen);
linebuf[linelen] = 0;
// remove header from receive buffer
linelen += 2;
recv_size -= linelen;
if (recv_size > 0)
memcpy(recv_buff,&recv_buff[linelen],recv_size);
// stop on end of headers (back to back "\r\n")
if ((recv_size >= 2) && (recv_buff[0] == '\r') && (recv_buff[1] == '\n')) {
memcpy(recv_buff,&recv_buff[2],recv_size - 2);
recv_size -= 2;
break;
}
// parse line work buffer for keywords ... (e.g.)
content_length = ...;
}
// save payload to file
while (content_length > 0) {
// write out prior payload amount
if (recv_size > 0) {
write(file_fd,recv_buff,recv_size);
content_length -= recv_size;
recv_size = 0;
continue;
}
recv_size = read(socket,recv_buff,response_size);
if (recv_size < 0)
error(strerror(errno));
if (recv_size == 0)
break;
}
UPDATE #2:
Yeah, it hard to make the pseudo code run, and the returned values are all garbled
Okay, here is a soup-to-nuts working version that I've tested against my own http
server.
I had to create my own routines for the parts you didn't post (e.g. connect
, etc.).
At the core, there might have been a minor tweak to the buffer slide code [it was sliding by an extra 2 bytes in one place], but, otherwise it was pretty close to my previous version
// htprcv/htprcv.c -- HTTP receiver
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <error.h>
#include <netdb.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <sys/socket.h>
typedef unsigned char byte;
#define HTPSLIDE(_rmlen) \
recv_size = htpslide(recv_buff,recv_size,_rmlen)
#define _dbgprt(_fmt...) \
fprintf(stderr,_fmt)
#if DEBUG || _USE_ZPRT_
#define dbgprt(_lvl,_fmt...) \
do { \
if (dbgok(_lvl)) \
_dbgprt(_fmt); \
} while (0)
#define dbgexec(_lvl,_expr) \
do { \
if (dbgok(_lvl)) \
_expr; \
} while (0)
#else
#define dbgprt(_lvl,_fmt...) \
do { \
} while (0)
#define dbgexec(_lvl,_expr) \
do { \
} while (0)
#endif
#define dbgok(_lvl) \
opt_d[(byte) #_lvl[0]]
byte opt_d[256];
char *opt_o;
#define HEXMAX 16
// htpconn -- do connect to server
int
htpconn(const char *hostname,unsigned short portno)
{
struct addrinfo hints, *res;
struct hostent *hostent;
int ret;
char portstr[20];
int sockfd;
/* Prepare hint (socket address input). */
hostent = gethostbyname(hostname);
if (hostent == NULL)
error(1,errno,"htpconn: gethostbyname -- %s\n",hostname);
memset(&hints, 0, sizeof hints);
hints.ai_family = AF_INET; // ipv4
hints.ai_socktype = SOCK_STREAM; // tcp
hints.ai_flags = AI_PASSIVE; // fill in my IP for me
sprintf(portstr, "%d", portno);
getaddrinfo(NULL, portstr, &hints, &res);
sockfd = socket(res->ai_family, res->ai_socktype, res->ai_protocol);
if (sockfd < 0)
error(1,errno,"htpconn: socket\n");
// do the actual connection
ret = connect(sockfd, res->ai_addr, res->ai_addrlen);
if (ret < 0)
error(1,errno,"htprcv: read header\n");
return sockfd;
}
// htpslide -- slide buffer (strip out processed data)
size_t
htpslide(char *recv_buff,size_t recv_size,int slidelen)
{
size_t new_size;
if (slidelen > recv_size)
slidelen = recv_size;
new_size = recv_size - slidelen;
dbgprt(S,"htpslide: slidelen=%d recv_size=%zu new_size=%zu\n",
slidelen,recv_size,new_size);
memcpy(&recv_buff[0],&recv_buff[slidelen],new_size);
return new_size;
}
// _htphex -- dump a line in hex
void
_htphex(unsigned long off,const void *vp,size_t xlen)
{
const byte *bp = vp;
int idx;
int chr;
char hexbuf[200];
char alfbuf[200];
char *hexptr = hexbuf;
char *alfptr = alfbuf;
for (idx = 0; idx < HEXMAX; ++idx) {
chr = bp[idx];
if ((idx % 4) == 0)
*hexptr++ = ' ';
if (idx < xlen) {
hexptr += sprintf(hexptr,"%2.2X",chr);
if ((chr < 0x20) || (chr > 0x7E))
chr = '.';
}
else {
hexptr += sprintf(hexptr," ");
chr = ' ';
}
*alfptr++ = chr;
}
*hexptr = 0;
*alfptr = 0;
_dbgprt(" %8.8lX: %s *%s*\n",off,hexbuf,alfbuf);
}
// htphex -- dump a buffer in hex
void
htphex(const char *buf,size_t buflen,const char *reason)
{
size_t off = 0;
size_t xlen;
if (reason != NULL)
_dbgprt("htphex: DUMP buf=%p buflen=%zu (from %s)\n",
buf,buflen,reason);
for (; buflen > 0; buflen -= xlen, buf += xlen, off += xlen) {
xlen = buflen;
if (xlen > HEXMAX)
xlen = HEXMAX;
_htphex(off,buf,xlen);
}
}
// htpsym -- get symbol/value
int
htpsym(char *linebuf,char *sym,char *val)
{
char *cp;
int match;
dbgprt(H,"htpsym: PARAM linebuf='%s'\n",linebuf);
// FORMAT:
// foo-bar: baz
do {
match = 0;
cp = strchr(linebuf,':');
if (cp == NULL)
break;
*cp++ = 0;
strcpy(sym,linebuf);
for (; (*cp == ' ') || (*cp == '\t'); ++cp);
strcpy(val,cp);
match = 1;
dbgprt(H,"htpsym: SYMBOL sym='%s' val='%s'\n",sym,val);
} while (0);
return match;
}
// htprcv -- receive server response
void
htprcv(int sockfd,int fdout)
{
size_t recv_size = 0;
size_t response_size = 4096;
char *recv_buff = malloc(response_size + 1);
// line oriented header buffer
char *endl = NULL;
size_t linelen;
char linebuf[1000];
ssize_t ret = 0;
off_t content_length = 0;
// read headers
while (1) {
// fill up a chunk of data
while (recv_size < response_size) {
recv_buff[recv_size] = 0;
// do we have a line end?
endl = strstr(recv_buff,"\r\n");
if (endl != NULL)
break;
// read a chunk of data
ret = read(sockfd,&recv_buff[recv_size],response_size - recv_size);
if (ret < 0)
error(1,errno,"htprcv: read header\n");
if (ret == 0)
break;
recv_size += ret;
dbgprt(R,"htprcv: READ ret=%zd\n",ret);
dbgexec(R,htphex(recv_buff,recv_size,"htprcv/READ"));
}
// error -- no line end but short read
if (endl == NULL)
error(1,0,"htprcv: no endl\n");
// copy header to work buffer
linelen = endl - recv_buff;
memcpy(linebuf,recv_buff,linelen);
linebuf[linelen] = 0;
// remove header from receive buffer
linelen += 2;
HTPSLIDE(linelen);
// stop on end of headers (back to back "\r\n")
if ((recv_size >= 2) &&
(recv_buff[0] == '\r') && (recv_buff[1] == '\n')) {
HTPSLIDE(2);
break;
}
// parse line work buffer for keywords ...
char sym[100];
char val[1000];
if (! htpsym(linebuf,sym,val))
continue;
if (strcasecmp(sym,"Content-Length") == 0) {
content_length = atoi(val);
continue;
}
}
// save payload to file
while (content_length > 0) {
// write out prior payload amount
if (recv_size > 0) {
dbgexec(W,htphex(recv_buff,recv_size,"htprcv/WRITE"));
ret = write(fdout,recv_buff,recv_size);
if (ret < 0)
error(1,errno,"htprcv: write body\n");
content_length -= recv_size;
recv_size = 0;
continue;
}
// read in new chunk of payload
ret = read(sockfd,recv_buff,response_size);
if (ret < 0)
error(1,errno,"htprcv: read body\n");
if (ret == 0)
break;
recv_size = ret;
}
free(recv_buff);
}
// htpget -- do initial dialog
void
htpget(int sockfd,const char *hostname,const char *file)
{
char *bp;
char buf[1024];
ssize_t resid;
ssize_t xlen;
size_t off;
bp = buf;
if (file == NULL)
file = "/";
bp += sprintf(bp,"GET %s HTTP/1.1\r\n",file);
if (hostname == NULL)
hostname = "localhost";
bp += sprintf(bp,"Host: %s\r\n",hostname);
if (0) {
bp += sprintf(bp,"User-Agent: %s\r\n","curl/7.61.1");
}
else {
bp += sprintf(bp,"User-Agent: %s\r\n","htprcv");
}
bp += sprintf(bp,"Accept: */*\r\n");
bp += sprintf(bp,"\r\n");
resid = bp - buf;
off = 0;
for (; resid > 0; resid -= xlen, off += xlen) {
xlen = write(sockfd,buf,resid);
if (xlen < 0)
error(1,errno,"htpget: write error\n");
}
}
// main -- main program
int
main(int argc,char **argv)
{
char *cp;
char *portstr;
unsigned short portno;
int sockfd;
int filefd;
char url[1000];
--argc;
++argv;
//setlinebuf(stdout);
setlinebuf(stderr);
for (; argc > 0; --argc, ++argv) {
cp = *argv;
if (*cp != '-')
break;
cp += 2;
switch(cp[-1]) {
case 'd': // debug options
if (*cp == 0)
cp = "SHRW";
for (; *cp != 0; ++cp)
opt_d[(byte) *cp] = 1;
break;
case 'o': // output file
opt_o = cp;
break;
}
}
// get the remote host:port
do {
if (argc <= 0) {
strcpy(url,"localhost:80");
break;
}
strcpy(url,*argv++);
--argc;
} while (0);
// get remote port number
portstr = strchr(url,':');
if (portstr != NULL)
*portstr++ = 0;
else
portstr = "80";
portno = atoi(portstr);
// open the output file (or send to stdout)
do {
if (opt_o == NULL) {
filefd = 1;
break;
}
filefd = open(opt_o,O_WRONLY | O_CREAT,0644);
if (filefd < 0)
filefd = 1;
} while (0);
// establish connection
sockfd = htpconn(url,portno);
// send the file request
htpget(sockfd,NULL,"/");
// receive the server response
htprcv(sockfd,filefd);
close(sockfd);
return 0;
}
Upvotes: 1