Hamza
Hamza

Reputation: 1085

C getting a file using CURL

I'm trying to get Images and PDF files from my Amazon S3 via a C wrapper using CURL Lib, currently I can get like txt files, HTTP HTML response like error, but when I try to GET PDF and JPEG files I get as response :

(DEBUG) Size : 90343
(DEBUG) Response : ����

I don't know where the weird characters came from, but I doubt that I need to add CURLOPT_BINARYTRANSFER

Bellow the HTTP GET function and definitions using CURL :

const char *url = "http://bucket.s3.amazonaws.com/file.pdf";
            long rc;
            struct buf_string response = {0};
            rc = http_get(url, &response, (const char *[]){"Accept: */*", "Accept-Encoding: gzip, deflate", "User-Agent: web-service/0.1", NULL});
            if (rc != 200) { /* error */ }

            logprintf(ts, D_DEBUG, "Size : %d", response.pos);
            logprintf(ts, D_ERROR, "Response : %s", response.buf);


long
http_get(const char *url, struct buf_string *response, const char *headers[]) {
    struct http *h = http_create();
    long rc;

    h->method = "GET";
    h->url = url;
    h->headers = headers;

    if (response) {
        memset(response, 0, sizeof(*response));
        h->response_body = response;
        h->write_function = default_write_data;
    }

    rc = http_perform(h);

    http_free(h);
    return rc;
}

struct http *
http_create() {
    struct http *h;

    h = zmalloc(sizeof(struct http));
    h->write_function = null_write_data;
    h->header_write_function = null_write_data;

    return h;
}

void
http_free(struct http *h) {
    curl_easy_cleanup(h->ch);
    free(h);
}

long
http_perform(struct http *h) {
    long response_code;
    struct curl_slist *header_list = NULL;

    if (h->response_body && (h->write_function == NULL || h->write_function == null_write_data)) 
        h->write_function = default_write_data;

    if (h->response_header && (h->header_write_function == NULL || h->header_write_function == null_write_data)) 
        h->header_write_function = default_write_data;

    h->curl_status = CURLE_OK;

    if ((h->ch = curl_easy_init()) == NULL) {
        return -1;
    }

    curl_easy_setopt(h->ch, CURLOPT_URL, h->url);
    curl_easy_setopt(h->ch, CURLOPT_CUSTOMREQUEST, h->method);

    curl_easy_setopt(h->ch, CURLOPT_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);
    curl_easy_setopt(h->ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_easy_setopt(h->ch, CURLOPT_REDIR_PROTOCOLS, CURLPROTO_HTTP | CURLPROTO_HTTPS);

    curl_easy_setopt(h->ch, CURLOPT_WRITEFUNCTION, h->write_function);
    curl_easy_setopt(h->ch, CURLOPT_WRITEDATA, h->response_body);

    curl_easy_setopt(h->ch, CURLOPT_HEADERFUNCTION, h->header_write_function);
    curl_easy_setopt(h->ch, CURLOPT_HEADERDATA, h->response_header);

    curl_easy_setopt(h->ch, CURLOPT_ERRORBUFFER, h->error);

    curl_easy_setopt(h->ch, CURLOPT_NOSIGNAL, 1);

    if (h->body) {
        curl_easy_setopt(h->ch, CURLOPT_POSTFIELDS, (const char *) h->body);
        curl_easy_setopt(h->ch, CURLOPT_POSTFIELDSIZE_LARGE, (curl_off_t)h->body_length);
    }

    if (h->headers) {
        while (*(h->headers)) {
            header_list = curl_slist_append(header_list, *h->headers++);
        }
        curl_easy_setopt(h->ch, CURLOPT_HTTPHEADER, header_list);
    }

    if (h->extra_config_function && h->extra_config_function(h->ch, h->extra_config_data) != 0) {
        response_code = -1;
        goto out;
    }

    if ((h->curl_status = curl_easy_perform(h->ch)) != CURLE_OK) {
        response_code = -1;
        goto out;
    }

    curl_easy_getinfo(h->ch, CURLINFO_RESPONSE_CODE, &response_code);

out:
    curl_slist_free_all(header_list);
    return response_code;
}

Solved : Based to theamk answer, I managed to resolve the issue by base64 directly the file then pass it to the template engine I have, and it works, the file is successfully returned when HTTP response code is 200, just printf escape it.

   long rc;
    struct buf_string response = {0};
    rc = http_get(url, &response, (const char *[]){"Accept: */*", "Accept-Encoding: gzip, deflate", "User-Agent: micro-service/0.1", NULL});

    /* If the file is not found, abort */
    if (rc != 200) {
        cs->error = "ATTACHMENT_FILE_NOT_FOUND";
        continue;
    }

    /* Convert the file payload to Base64, then pass it to the email params */
    int len = response.pos;
    size_t sz =  BASE64_NEEDED(len);
    char *b64 = zmalloc(sz);
    base64_encode(b64, response.buf, len);

    template_parse("send_reply.tmpl", "attachment_data", b64);
    free(b64);

Upvotes: 2

Views: 339

Answers (1)

theamk
theamk

Reputation: 1664

You are getting the file properly, you simply cannot print out PDFs and JPEGs to screen -- you will get random characters like you have seen.

Specifically, your 4-character example seems to be from a JPEG file -- many of these start with ff:d8:ff:e0:00 characters, which would be printed as string below (four invalid characters, and then the 'printf' would stop printing because it would have encountered character with code 0)

To make sure you are downloading the files properly, save the data to file and then open the file with JPEG/PDF viewer. Make sure to use fwrite() when writing data (printf() or fputs() would not work because of the embedded binary zeros)

Upvotes: 1

Related Questions