Raul Radulescu
Raul Radulescu

Reputation: 21

curl_multi_exec with binded IPs connection reuse/connect before request

To bring some context, I want to execute some requests as fast as possible, testing, benchmarking for now.

The endpoint is behind cloudfront.

The logic is 1st to get to the server out of N connections.

I've selected 4 IPs and I want to submit 2 requests per IP.

I want to open a connection prior to that, wait for 5 seconds and then do this 4 x 2 requests on those opened connections.

I've butchered the code trying to make it run on those IPs, the 1st(4x2) request to open in parallel works, but when I try to reuse those connections they are all using the same IP.

$ips = [
    1 => 'x.x.x.x',
    2 => 'x.x.x.x',
    3 => 'x.x.x.x',
    4 => 'x.x.x.x',
    5 => 'x.x.x.x',
    6 => 'x.x.x.x',
    7 => 'x.x.x.x',
    8 => 'x.x.x.x',
];

$multiCurl = array();
// data to be returned
$result1 = $result2 = $info = $info2 = $headers1 = $headers2 = array();
// multi handle
$mh = curl_multi_init();
for($i=1; $i<=8; $i++){
    // URL from which data will be fetched
    $fetchURL = 'https://endpoint/timestamp?'.$i;
    $multiCurl[$i] = curl_init();
    curl_setopt($multiCurl[$i], CURLOPT_URL,$fetchURL);
    curl_setopt($multiCurl[$i], CURLOPT_HEADER,1);
    curl_setopt($multiCurl[$i], CURLOPT_RETURNTRANSFER,1);
    curl_setopt($multiCurl[$i], CURLOPT_RESOLVE , ['endpoint:443:'.$ips[$i]] );

    curl_multi_add_handle($mh, $multiCurl[$i]);
}

// dd($multiCurl);
$index=null;

$t1 = microtime(true);

do {
    curl_multi_exec($mh,$index);
} while($index > 0);

$t2 = microtime(true);



foreach($multiCurl as $k => $ch) {
    $response = curl_multi_getcontent($ch);

    $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
    $header = substr($response, 0, $header_size);
    $headers1[$k] = $this->get_headers($header);
    $body = substr($response, $header_size);

    // dd($response, $header, $body);

    $result1[$k] = json_decode($body, true);

    curl_multi_remove_handle($mh, $ch);

    $info[$k]['CURLINFO_EFFECTIVE_URL'] = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
    $info[$k]['CURLINFO_TOTAL_TIME'] = curl_getinfo($ch, CURLINFO_TOTAL_TIME);
    $info[$k]['CURLINFO_NAMELOOKUP_TIME'] = curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME);
    $info[$k]['CURLINFO_CONNECT_TIME'] = curl_getinfo($ch, CURLINFO_CONNECT_TIME);
    $info[$k]['CURLINFO_PRETRANSFER_TIME'] = curl_getinfo($ch, CURLINFO_PRETRANSFER_TIME);
    $info[$k]['CURLINFO_STARTTRANSFER_TIME'] = curl_getinfo($ch, CURLINFO_STARTTRANSFER_TIME);
    $info[$k]['CURLINFO_STARTTRANSFER_TIME'] = curl_getinfo($ch, CURLINFO_STARTTRANSFER_TIME);
    $info[$k]['CURLINFO_PRIMARY_IP'] = curl_getinfo($ch, CURLINFO_PRIMARY_IP);
    $info[$k]['CURLINFO_NUM_CONNECTS'] = curl_getinfo($ch, CURLINFO_NUM_CONNECTS);
    $info[$k]['CURLINFO_APPCONNECT_TIME'] = curl_getinfo($ch, CURLINFO_APPCONNECT_TIME);
    $info[$k]['CURLINFO_APPCONNECT_TIME_T'] = curl_getinfo($ch, CURLINFO_APPCONNECT_TIME_T);
    $info[$k]['CURLINFO_CONNECT_TIME_T'] = curl_getinfo($ch, CURLINFO_CONNECT_TIME_T);
    $info[$k]['CURLINFO_NAMELOOKUP_TIME_T'] = curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME_T);
    $info[$k]['CURLINFO_PRETRANSFER_TIME_T'] = curl_getinfo($ch, CURLINFO_PRETRANSFER_TIME_T);
    $info[$k]['CURLINFO_STARTTRANSFER_TIME_T'] = curl_getinfo($ch, CURLINFO_STARTTRANSFER_TIME_T);
    $info[$k]['CURLINFO_TOTAL_TIME_T'] = curl_getinfo($ch, CURLINFO_TOTAL_TIME_T);
}

sleep(5);

for($i=1; $i<=8; $i++){
    $fetchURL = 'https://endpoint/timestamp?'.$i+10;
    $multiCurl[$i] = curl_init();
    curl_setopt($multiCurl[$i], CURLOPT_URL,$fetchURL);
    curl_setopt($multiCurl[$i], CURLOPT_HEADER,1);
    curl_setopt($multiCurl[$i], CURLOPT_RETURNTRANSFER,1);

    curl_multi_add_handle($mh, $multiCurl[$i]);
}

$index=null;

$t3 = microtime(true);

do {
    curl_multi_exec($mh,$index);
} while($index > 0);

$t4 = microtime(true);

foreach($multiCurl as $k => $ch) {

    $response = curl_multi_getcontent($ch);

    $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
    $header = substr($response, 0, $header_size);
    $headers2[$k] = $this->get_headers($header);
    $body = substr($response, $header_size);

    $result2[$k] = json_decode($body, true);

    curl_multi_remove_handle($mh, $ch);

    $info2[$k]['CURLINFO_EFFECTIVE_URL'] = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL);
    $info2[$k]['CURLINFO_TOTAL_TIME'] = curl_getinfo($ch, CURLINFO_TOTAL_TIME);
    $info2[$k]['CURLINFO_NAMELOOKUP_TIME'] = curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME);
    $info2[$k]['CURLINFO_CONNECT_TIME'] = curl_getinfo($ch, CURLINFO_CONNECT_TIME);
    $info2[$k]['CURLINFO_PRETRANSFER_TIME'] = curl_getinfo($ch, CURLINFO_PRETRANSFER_TIME);
    $info2[$k]['CURLINFO_STARTTRANSFER_TIME'] = curl_getinfo($ch, CURLINFO_STARTTRANSFER_TIME);
    $info2[$k]['CURLINFO_STARTTRANSFER_TIME'] = curl_getinfo($ch, CURLINFO_STARTTRANSFER_TIME);
    $info2[$k]['CURLINFO_PRIMARY_IP'] = curl_getinfo($ch, CURLINFO_PRIMARY_IP);
    $info2[$k]['CURLINFO_NUM_CONNECTS'] = curl_getinfo($ch, CURLINFO_NUM_CONNECTS);
    $info2[$k]['CURLINFO_APPCONNECT_TIME'] = curl_getinfo($ch, CURLINFO_APPCONNECT_TIME);
    $info2[$k]['CURLINFO_APPCONNECT_TIME_T'] = curl_getinfo($ch, CURLINFO_APPCONNECT_TIME_T);
    $info2[$k]['CURLINFO_CONNECT_TIME_T'] = curl_getinfo($ch, CURLINFO_CONNECT_TIME_T);
    $info2[$k]['CURLINFO_NAMELOOKUP_TIME_T'] = curl_getinfo($ch, CURLINFO_NAMELOOKUP_TIME_T);
    $info2[$k]['CURLINFO_PRETRANSFER_TIME_T'] = curl_getinfo($ch, CURLINFO_PRETRANSFER_TIME_T);
    $info2[$k]['CURLINFO_STARTTRANSFER_TIME_T'] = curl_getinfo($ch, CURLINFO_STARTTRANSFER_TIME_T);
    $info2[$k]['CURLINFO_TOTAL_TIME_T'] = curl_getinfo($ch, CURLINFO_TOTAL_TIME_T);
    // dd($headers2[$k]);
    if(!isset($weight))$weight = $headers2[$k][0]['response-time'];
    if($headers2[$k][0]['response-time']<$weight)$weight=$headers2[$k][0]['response-time'];


}

// close
curl_multi_close($mh);

output first parallel request

"1": { "CURLINFO_PRIMARY_IP": "1st ip", //...... }, "2": { "CURLINFO_PRIMARY_IP": "2nd IP", //...... }, "3": { "CURLINFO_PRIMARY_IP": "3rd ip", //...... }, //............ }

output second parallel request that should run on those opened connections curl handles

"1": { "CURLINFO_PRIMARY_IP": "1st ip", //...... }, "2": { "CURLINFO_PRIMARY_IP": "1st ip", //...... }, "3": { "CURLINFO_PRIMARY_IP": "1st ip", //...... }, //............ }

Without using the CURLOPT_RESOLVE and just relying on DNS the

1st batch of request do take increased time to open the connection, SSL handshakes, etc

2nd bacth I think is reusing those connections because app_connect_time is 0 and the requests take substantially less time... but I'm not sure that how cul_multi_exec does under the hood.

Upvotes: 1

Views: 59

Answers (0)

Related Questions