Wednesday, September 8, 2010

Performance xpath->query vs dom->getElementsByTagName

xpath works much faster, with speed of over 3 times faster compared to that of doing normal traversal using DOM.

// dom method
$xpath = new DOMXPath($dom);
$nodes = $dom->getElementsByTagName('td');
foreach($nodes as $node) {

if($node->nodeName == 'td'){

$inodes = $node->childNodes;

if ($node->getAttribute('class') == 'usedcar_InfoName usedcar_InfoName_padding') {
echo $node->nodeValue . "
";
}

if ($node->getAttribute('class') == 'usedcar_InfoContent usedcar_InfoContent_padding') {
echo @mb_convert_encoding(htmlspecialchars($node->nodeValue), 'utf-8') . "
";
}

foreach($inodes as $inode){

if($inode->nodeName == 'a' && $inode->getAttribute('class') == 'breadcrumb_link') {
echo $inode->nodeValue . "
";
}

if($inode->nodeName == 'td' && $inode->getAttribute('class') == 'usedcar_InfoName usedcar_InfoName_padding') {
echo $inode->nodeValue . "
";

}
}
}
}

// xpath method
$xpath = new DOMXPath($dom);
$title = $xpath->query("/html/title");
$elements = $xpath->query("//*[@class='usedcar_InfoName usedcar_InfoName_padding']"); // header
$elements2 = $xpath->query("//*[@class='usedcar_InfoContent usedcar_InfoContent_padding']"); // content

// dump all the header tag into an array
$headers = array();
if (!is_null($elements)) {
foreach ($elements as $element) {
$nodes = $element->childNodes;
foreach ($nodes as $node) {
array_push($headers, $node->nodeValue);
}
}
}

// dump all content tag into an array
$contents = array();
if (!is_null($elements2)) {
foreach ($elements2 as $element) {
$nodes = $element->childNodes;
$count =0;
foreach ($nodes as $node) {
if ($count == 0)
array_push($contents, $node->nodeValue);
$count++;
}
}
}

for($count=0; $count < sizeof($headers); $count++)
echo $headers[$count] . ": " . $contents[$count] . "
";

No comments: