KJThaDon
KJThaDon

Reputation: 416

Dumping tables into json from external page

I have this partially working. I need to grab the data of each player, and present a variable for each "cricket" and "x01" games. I am able to grab the data from the top table, however the 2nd one is not showing any data in my code. I am probably missing something simple, but I can't figure it out.

I want the output to show like this. The part under the line break is what I am missing.

"Howard Hill": {
        "name": "Howard Hill",
        "team": "Team 2",
        "ppd_01": "34.54",
        "games_01": "153",
        "wins_01": "999",
        "assists_01": "69",
        "sspre_01": "7.876",

        "mpr_crk": "9.99",
        "games_crk": "999",
        "wins_crk": "999",
        "assists_crk": "99",
        "sspre_crk": "9.999"
}

Here is my code

<?php
ini_set('default_socket_timeout', 180); // 900 Seconds = 15 Minutes
libxml_use_internal_errors(true);

$doc = new DOMDocument();
$doc->loadHTML(file_get_contents('http://freerdarts.com/past_stats/tues-2018-player-standings.html'));
$doc->strictErrorChecking = false;

$pre = [];
foreach ($doc->getElementsByTagName('table') as $table) {
    foreach ($table->getElementsByTagName('tr') as $i => $tr) {
        $y = 0;
        foreach ($tr->childNodes as $td) {
            $text = trim($td->nodeValue);

            if ($y > 7) {
                unset($pre[$i]);
                continue;
            }

            if (empty($text)) {
                continue;
            }

            $pre[$i][] = $text;
            $y++;
        }
    }
}

// normalise
$pstats = [];
foreach ($pre as $row) {

    $pstats[$row[0]] = [
        'name' => $row[0],
        'team' => $row[1],
        'ppd_01' => $row[2],
        'games_01' => $row[3],
        'wins_01' => $row[4],
        'sspre_01' => $row[5],

    ];
}

echo '<pre>'.json_encode($pstats, JSON_PRETTY_PRINT).'</pre>';
//echo $pstats['Scott Sandberg']['01'];
?>

Upvotes: 2

Views: 64

Answers (3)

EternalHour
EternalHour

Reputation: 8651

It seems to me that you want to combine the x01 table values with the crk table values under the same name. Here is the code that I think you are looking for with an example.

$x01 = [];
$crk = [];

$keys_01 = ['name', 'team', 'ppd_01', 'games_01', 'wins_01', 'sspre_01'];
$keys_crk = ['name', 'team', 'mpr_crk', 'games_crk', 'wins_crk', 'assists_crk', 'sspre_crk'];
$table_num = 1;
foreach ($doc->getElementsByTagName('table') as $table) {
    if (strpos($table->getAttribute('class'), 'report') === false) {
        continue;
    }
    foreach ($table->getElementsByTagName('tr') as $i => $tr) {
        if ($tr->parentNode->nodeName === 'thead') continue; // skip headers 
        $row_values = [];
        foreach ($tr->childNodes as $td) {
            $text = trim($td->nodeValue);
            if ($text === '') continue;
            $row_values[] = $text;
        }
        // build x01 array
        if ($table_num === 1) {
            $row_values = array_combine($keys_01, $row_values);
            $x01[$row_values['name']] = $row_values;
        // build crk array
        } else {
            $row_values = array_combine($keys_crk, $row_values);
            $crk[$row_values['name']] = $row_values;
        }
    }
    $table_num++;
}

$combined = array_merge_recursive($x01, $crk);

// after arrays are merged, remove duplicate values
foreach ($combined as $name => $value) {
    if ($value['name']) {
        $combined[$name]['name'] = $name;
    }
    if ($value['team']) {
        $combined[$name]['team'] = $value['team'][0];
    }
}

echo json_encode($combined, JSON_PRETTY_PRINT);

Upvotes: 1

Sangita Kendre
Sangita Kendre

Reputation: 429

I have modified @Ghost code. Try below code.

<?php
libxml_use_internal_errors(true);

$doc = new DOMDocument();
$doc->loadHTML(file_get_contents('http://freerdarts.com/past_stats/tues-2018-player-standings.html'));
$doc->strictErrorChecking = false;

$pre = [];
$keys = ['name', 'team', 'ppd', 'games', 'wins', 'sspre'];
$keys2 = ['name', 'mpr', 'games', 'wins','assists', 'sspre'];
foreach ($doc->getElementsByTagName('table') as $k => $table) {

    if (strpos($table->getAttribute('class'), 'report') === false) {
        continue;
    }
    foreach ($table->getElementsByTagName('tr') as $i => $tr) {
        if ($tr->parentNode->nodeName === 'thead') continue; // skip headers 
        $row_values = [];
        foreach ($tr->childNodes as $td) {
            $text = trim($td->nodeValue);
            if ($text === '') continue;
            $row_values[] = $text;
        }

        if($k == 1 ){           

            $row_values = array_combine($keys, $row_values);

        }elseif($k == 2 ){
            unset($row_values[1]);
        $row_values = array_combine($keys2, $row_values);

        }
          $pre[$row_values['name']][] = $row_values;

    }


}


$new_arr = [];
foreach($pre as $name => $row){
    $new_arr[$name] = [
        "name"=> $name,
        "team"=> $row[0]['team'],
        "ppd_01" => $row[0]['ppd'],
        "games_01" => $row[0]['games'],
        "wins_01" => $row[0]['wins'],
        "sspre_01" => $row[0]['sspre'],
        "mpr_crk" => $row[1]['mpr'],
        "games_crk" => $row[1]['games'],
        "wins_crk" => $row[1]['wins'],
        "assists_crk" => $row[1]['assists'],
        "sspre_crk" => $row[1]['sspre']
    ];
}

echo '<pre>'.json_encode($new_arr, JSON_PRETTY_PRINT).'</pre>';

Here is sample output https://www.tehplayground.com/Du5rId3iRx3NH6UL

Upvotes: 1

Kevin
Kevin

Reputation: 41893

One problem you're facing is that you're not getting the proper table that needs parsing.

Take note there are multiple tables inside that page.

You need to point out inside the loop that you're skipping other tables in the HTML page and only choose to process the score report table, nothing else:

if (strpos($table->getAttribute('class'), 'report') === false) {
    continue;
}

So after getting other tables out of the way, you can start processing the data inside the specific table results that you want to store.

Another thing to point out is you need to skip the headers inside the table. You don't need to anyways.

if ($tr->parentNode->nodeName === 'thead') continue; // skip headers 

After that, its just a matter of looping on each <td>.

One gotcha on the tables is that one table has six 6 columns. Another one has 7 so first gather all <td> values. After gathering just unset it from the gathered data so that you have a uniform column layout structure. (I assume you're trying to skip out assists)

Here's the full code:

$pre = []; // initialize container
$keys = ['name', 'team', 'ppd', 'games', 'wins', 'sspre']; // keys needed to be used in the json
foreach ($doc->getElementsByTagName('table') as $table) { // loop all found tables
    if (strpos($table->getAttribute('class'), 'report') === false) {
        continue; // if its not the report table, skip
    }
    foreach ($table->getElementsByTagName('tr') as $i => $tr) { // loop each row of report table
        if ($tr->parentNode->nodeName === 'thead') continue; // skip headers 
        $row_values = []; // initialize container for each row
        foreach ($tr->childNodes as $td) { // loop each cell
            $text = trim($td->nodeValue); //
            if ($text === '') continue;
            $row_values[] = $text;
        }
        // unset assist if this table has 7 columns
        if (count($row_values) === 7) unset($row_values[5]);
        $row_values = array_combine($keys, $row_values); // combine the keys and values
        $pre[$row_values['name']] = $row_values; // push them inside
    }
}
// finally encode in the end
echo json_encode($pre);

Here's the sample output

Upvotes: 1

Related Questions