Tom Canfarotta
Tom Canfarotta

Reputation: 773

Call cURL with modified offset values in a loop while more results are available

I am attempting to integrate a web service where the JSON response returns a URL for the next 1,000 results. What I have to do is loop the cURL to continue to process the request until the "next" variable is empty.

I am having an issue getting this done because I can not loop back up once I see the variable returned.

$url = "https://xxxxxxxxxxxxxx.com/process/api/transactions/from/$yesterday/to/$today/for/company/21?offset=0&limit=1000"; 

$ch = curl_init();
curl_setopt($ch, CURLOPT_URL,$url);                                                              
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");                                                                                                                                    
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);                                                                                                                                            

$result = curl_exec($ch);

file_put_contents(rj_array, $result, FILE_APPEND);

$server = file_get_contents(rj_array);

$json_result = json_decode($server, true);

$next = $json_result['next'];

I was thinking of doing something like:

while(!empty($next)){
    //Do the curl request again with the new URL $next
}

But I can not reset the $next variable each time.

Upvotes: 2

Views: 2214

Answers (3)

mickmackusa
mickmackusa

Reputation: 47874

I would not recommend a pre-test loop or recursion, this is a task for a post-test loop.

Declare unchanging aspects of your curl call before entering the loop. Inside the loop manipulate the offset and limit values and monitor the next value to determine if another iteration is required. Use array_push() with the spread operator to append new rows of data to your result array.

Code: (Mock Demo)

$url = "https://example.com/process/api/transactions/"
     . "from/$yesterday/to/$today/for/company/$company?offset=%d&limit=%d"; 

$ch = curl_init();
curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");                                                                                                                                    
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FAILONERROR, 1);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);                                                                                                                                            

$offset = 0;
$limit = 1000;
$result = [];
do {
    curl_setopt($ch, CURLOPT_URL, sprintf($url, $offset, $limit));
    $json = curl_exec($ch);
    // save the returned json string to your system if you wish
    $response = json_decode($json, true);
    array_push($result, ...$response['results']);
    $offset += $limit;
} while (!empty($response['next']));
var_export($result);

Upvotes: 0

Felippe Duarte
Felippe Duarte

Reputation: 15131

I do like the recursive function approach:

function crawler($url, $return) {
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL,$url);
    curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FAILONERROR, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);

    $result = curl_exec($ch);

    file_put_contents(rj_array, $result, FILE_APPEND);

    $server = file_get_contents(rj_array);

    $json_result = json_decode($server, true);
    $return[$url] = $json_result; //set your return information in some array

    $next = $json_result['next'];

    if(!empty($next)) {
        return crawler($next, $return); //call again same function with next url and array of data that you would use later
    }

    return $return;
}

$url = "https://xxxxxxxxxxxxxx.com/process/api/transactions/from/$yesterday/to/$today/for/company/21?offset=0&limit=1000";
$allJsonResults = crawler($url, []);

Upvotes: 1

Dom
Dom

Reputation: 3080

Why not

$base_url = "https://xxxxxxxxxxxxxx.com/process/api/transactions/from/$yesterday/to/$today/for/company/21?limit=1000";
$next = 0;
while( $next !== false ) {
    $url = $base_url . "&offset=" . $next;

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL,$url);                                                              
    curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");                                                                                                                                    
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($ch, CURLOPT_FAILONERROR, 1);
    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
    curl_setopt($ch, CURLOPT_HTTPHEADER, array("Cookie: $cookie"));
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);                                                                                                                                            

    $result = curl_exec($ch);

    file_put_contents(rj_array, $result, FILE_APPEND);

    $server = file_get_contents(rj_array);

    $json_result = json_decode($server, true);

    $next = $json_result['next'];
    if( /*test if next is empty*/ ) {
        $next = false;
    }
}

Upvotes: 2

Related Questions