Ahmad
Ahmad

Reputation: 95

Convert nested <ul><li> to PHP nested array

I want to convert nested ul li to a PHP array.

The HTML code that I have is something looks like:

<ul id="main-menu">
    <li id="firstNavItem"><a href="index.html">Home</li>
    <li><a href="Warp.html">Warp</a>
        <ul>
            <li><a href="Warp-how-it-works.html">How it works</a>
            </li>
            <li><a href="Warp-Engine.html">Warp Engine</a>
            </li>
            <li><a href="WarpFactors.html">Warp Factors</a>
            </li>
            <li><a href="">Fuel</a>
                <ul>
                    <li><a href="Anti-Matter.html">Anti-Matter</a>
                    </li>
                    <li><a href="Deuterium.html">Deuterium</a>
                    </li>
                </ul>
            </li>
        </ul>
    </li>
    <li><a href="Fact-or-Fiction.html">Fact or Fiction</li>
    <li><a href="StarTrek.html">Star Trek</a>
        <ul>
            <li><a href="Enterprise.html">Enterprise</a>
            </li>
            <li><a href="Voyager.html">Voyager</a>
            </li>
        </ul>
    </li>
    <li><a href="about.html">About</a>
    </li> </ul>

It must be converted to an array.

I tried several ways to parse, But I fail.

One of the ways that I've used is:

$doc = new \DOMDocument();
$doc->preserveWhiteSpace = false;
$doc->loadHTML($data);
$i = 0;

while( is_object($finance = $doc->getElementsByTagName("li")->item($i)) )
{
    foreach($finance->childNodes as $nodename)
    {
        if($nodename->nodeName == 'li')
        {
            foreach($nodename->childNodes as $subNodes)
            {
                $arr[$i] = $subNodes->nodeValue.PHP_EOL;
            }
        }
        else
        {
            $s = explode('             ', $nodename->nodeValue);
            if (count($s) == 1)
            {
                $arr[$i] =$nodename->nodeValue;
            }
            else
            {
                $arr[$i] =  $s;
            }

        }
    }

    $i++;
}

Upvotes: 1

Views: 1925

Answers (3)

user3834658
user3834658

Reputation: 346

The following code gives a nested array. I don't think it's clear exactly what the outputted array should look like, but this code gives the following:

Array
(
    [0] => Array
        (
            [key] => Home
            [items] => Array
                (
                )

        )

    [1] => Array
        (
            [key] => Warp
            [items] => Array
                (
                    [0] => Array
                        (
                            [key] => How it works
                            [items] => Array
                                (
                                )

                        )

                    [1] => Array
                        (
                            [key] => Warp Engine
                            [items] => Array
                                (
                                )

                        )

                    [2] => Array
                        (
                            [key] => Warp Factors
                            [items] => Array
                                (
                                )

                        )

                    [3] => Array
                        (
                            [key] => Fuel
                            [items] => Array
                                (
                                    [0] => Array
                                        (
                                            [key] => Anti-Matter
                                            [items] => Array
                                                (
                                                )

                                        )

                                    [1] => Array
                                        (
                                            [key] => Deuterium
                                            [items] => Array
                                                (
                                                )

                                        )

                                )

                        )

                )

        )

    [2] => Array
        (
            [key] => Fact or Fiction
            [items] => Array
                (
                )

        )

    [3] => Array
        (
            [key] => Star Trek
            [items] => Array
                (
                    [0] => Array
                        (
                            [key] => Enterprise
                            [items] => Array
                                (
                                )

                        )

                    [1] => Array
                        (
                            [key] => Voyager
                            [items] => Array
                                (
                                )

                        )

                )

        )

    [4] => Array
        (
            [key] => About
            [items] => Array
                (
                )

        )

)

Code:

<?php

class Parser {

    private $elements = [];

    public function parse() {
        $doc = new \DOMDocument();
        $doc->preserveWhiteSpace = false;
        $doc->loadHTMLFile("./html.html");

        $this->parseChildNodes($doc, $this->elements);
    }

    private function parseChildNodes($node, & $arrayToPush) {
        $indexPushed = count($arrayToPush);

        if ($node->nodeName == "li") {
            $representation = [
                "key" => $this->getDisplayValueFromNode($node),
                "items" => []
            ];
            array_push($arrayToPush, $representation);
            $arrayToPush = & $arrayToPush[$indexPushed]["items"];
        }

        if ($node->childNodes == null) {
            return;
        }
        foreach ($node->childNodes as $child) {
            $this->parseChildNodes($child, $arrayToPush);
        }
    }

    /**
     * Get the value of the node's first element
     * In our case this is the text value of the anchor tag
     *
     * @param $node
     * @return String
     */
    private function getDisplayValueFromNode($node) {
        return $node->firstChild->nodeValue;
    }

    public function getElements() {
        return $this->elements;
    }
}

$parser = new Parser();
$parser->parse();
print_r($parser->getElements());

Upvotes: 4

OrderAndChaos
OrderAndChaos

Reputation: 3860

It was not easy, but I wasn't aware that you could access the DOM with PHP, so it was an interesting challenge.

This will work for nested lists up to two deep, you could refactor this to make it easier to handle deeper lists.

The below code should help you to get your lists into arrays. I have left echo statements in for ease of demonstration.

<?php
    $data = <<<EOT
<ul id="main-menu">
    <li id="firstNavItem"><a href="index.html">Home</li>
    <li><a href="Warp.html">Warp</a>
        <ul>
            <li><a href="Warp-how-it-works.html">How it works</a>
            </li>
            <li><a href="Warp-Engine.html">Warp Engine</a>
            </li>
            <li><a href="WarpFactors.html">Warp Factors</a>
            </li>
            <li><a href="">Fuel</a>
                <ul>
                    <li><a href="Anti-Matter.html">Anti-Matter</a>
                    </li>
                    <li><a href="Deuterium.html">Deuterium</a>
                    </li>
                </ul>
            </li>
        </ul>
    </li>
    <li><a href="Fact-or-Fiction.html">Fact or Fiction</li>
    <li><a href="StarTrek.html">Star Trek</a>
        <ul>
            <li><a href="Enterprise.html">Enterprise</a>
            </li>
            <li><a href="Voyager.html">Voyager</a>
            </li>
        </ul>
    </li>
    <li><a href="about.html">About</a>
    </li>
</ul>
EOT;

    $doc = new \DOMDocument();
    $doc->preserveWhiteSpace = false;
    $doc->loadHTML($data);

    $list = $doc->getElementsByTagName('ul')->item(0);
    foreach ($list->childNodes as $node) {
        if ($node->nodeName == 'li'
            &&
            $node->lastChild->nodeName != 'ul'
        ) {
            echo $node->textContent . "<br>";
        } else {
            if ($node->lastChild->childNodes) {
                foreach ($node->lastChild->childNodes as $node2) {
                    if ($node2->nodeName == 'li'
                        &&
                        $node2->lastChild->nodeName != 'ul'
                    ) {
                        echo "&bull; " . $node2->textContent . "<br>";
                    } else {
                        if ($node2->lastChild->childNodes) {
                            foreach ($node2->lastChild->childNodes as $node3) {
                                if ($node3->nodeName == 'li'
                                    &&
                                    $node3->lastChild->nodeName != 'ul'
                                ) {
                                    echo "&bull; &bull; " . $node3->textContent . "<br>";
                                }
                            }
                        }
                    }
                }
            }
        }
    }

Upvotes: 1

user3834658
user3834658

Reputation: 346

getElementsByTagName() returns all nodes with that name (including nested ones), so there is no need to additionally search through the child nodes. The code in the snippet below returns this array:

Array
(
    [0] => Home
    [1] => Warp
    [2] => How it works
    [3] => Warp Engine
    [4] => Warp Factors
    [5] => Fuel
    [6] => Anti-Matter
    [7] => Deuterium
    [8] => Fact or Fiction
    [9] => Star Trek
    [10] => Enterprise
    [11] => Voyager
    [12] => About
)

Code:

<?php

class Parser {

    private $elements = [];

    public function parse() {
        $doc = new \DOMDocument();
        $doc->preserveWhiteSpace = false;
        $doc->loadHTMLFile("./html.html");

        foreach($doc->getElementsByTagName("li") as $node) {
            array_push($this->elements, $node->firstChild->nodeValue);
        }
    }

    /**
     * Get the value of the node's first element
     * In our case this is the text value of the anchor tag
     *
     * @param $node
     * @return String
     */
    private function getDisplayValueFromNode($node) {
        return $node->firstChild->nodeValue;
    }

    public function getElements() {
        return $this->elements;
    }
}

$parser = new Parser();
$parser->parse();
print_r($parser->getElements());

Upvotes: 0

Related Questions