KJThaDon
KJThaDon

Reputation: 416

Scraping file to convert to json

I am not getting any output from the bottom half as I was expecting. I can grab the top table's data, but I am also trying to grab the bottom table data and place encode them into json. The columns I need to grab are

1. Week Date Home Away At Notes

<?php

libxml_use_internal_errors(true);

$doc = new DOMDocument();
$doc->loadHTML(file_get_contents('https://www.leagueleader.net/sharedreport.php?operatorid=98&code=bc155b01-7492-412d-aa75-3c1e357248f1'));
$doc->strictErrorChecking = false;

$pre = [];
$keys = ['team', 'div', 'team-site-name', 'site-address', 'site-phone'];
$keys2 = ['week', 'date', 'home', 'away', 'at', 'notes'];
foreach ($doc->getElementsByTagName('table') as $k => $table) {

    if (strpos($table->getAttribute('class'), 'report') === false) {
        continue;
    }
    foreach ($table->getElementsByTagName('tr') as $i => $tr) {
        if ($tr->parentNode->nodeName === 'thead') continue; // skip headers 
        $row_values = [];
        foreach ($tr->childNodes as $td) {
            $text = trim($td->nodeValue);
            if ($text === '') continue;
            $row_values[] = $text;
        }

        if($k == 1 ){


            $row_values = array_combine($keys, $row_values);

        }else   if($k == 2 ){
            unset($row_values[1]);
        $row_values = array_combine($keys2, $row_values);

        }
          $pre[$row_values['name']][] = $row_values;

    }

}
$combined = [];
foreach($pre as $week => $row){
    $combined[$name] = [
        "week"=> $week,
        "team"=> $row[0]['team'],
        "div"=> $row[0]['div'],
        "team-site-name" => $row[0]['team-site-name'],
        "site-address" => $row[0]['site-address'],
        "site-phone" => $row[0]['site-phone'],

        //"week" => $row[1]['week'],
        "date" => $row[1]['date'],
        "home" => $row[1]['home'],
        "away" => $row[1]['away'],
        "at" => $row[1]['at'],
        "notes" => $row[1]['notes']
    ];
}

echo '<pre>'.json_encode($combined, JSON_PRETTY_PRINT).'</pre>';
?>

Here is the output

{  
    "": {  
        "week": "",  
        "team": "1",  
        "div": "A",  
        "team-site-name": "Team 01Freer Bar",  
        "site-address": "\u00a07355 Michigan Ave Detroit, MI 48210",  
        "site-phone": "\u00a03138993699",  
        "date": null,  
        "home": null,  
        "away": null,  
        "at": null,  
        "notes": null  
    }  
}  

Upvotes: 0

Views: 116

Answers (1)

Nigel Ren
Nigel Ren

Reputation: 57131

To get the data from the second table with the matches, I've changed the processing to use XPath. This extracts the <tr> tags from the body of the second table with class='report' (using //table[@class='report'][2]/tbody/tr).

So this will return all of the rows in the body of the table. Then extract all of the <td> elements and pick out the details in the row. If there is a week/date present it just overwrites the current data, if there are match details it creates a row on the output...

$xpath = new DOMXPath($doc);
$reportRow = $xpath->query("//table[@class='report'][2]/tbody/tr");
$matches = [];
$week = '';
$date = '';
foreach ($reportRow as $row) {
    $cells = $row->getElementsByTagName("td");
    // Set week and date if present in the current row
    $week = trim($cells[0]->textContent)?:$week;
    $date = trim($cells[1]->textContent)?:$date;
    // Extract the other details
    $teamHome = trim($cells[2]->textContent);
    $teamAway = trim($cells[3]->textContent);
    $at = trim($cells[4]->textContent);
    $notes = trim($cells[5]->textContent);

    // If there are some match details, the store them
    if ( !empty($teamHome) )    {
        $matches[] = ["week" => $week, "date" => $date,
            "teamHome" =>$teamHome, "teamAway" =>$teamAway,
            "at" => $at, "notes" => $notes
        ];
    }
}
print_r($matches);

This gives...

Array
(
    [0] => Array
        (
            [week] => 1
            [date] => 09/10/2019
            [teamHome] => Team 01
            [teamAway] => BYE
            [at] => BYE
            [notes] => 
        )

Upvotes: 1

Related Questions