<?php // namespace components\com_jmap\libraries\xml; /** * @package JMAP::FRAMEWORK::components::com_jmap * @subpackage framework * @subpackage xml * @author Joomla! Extensions Store * @copyright (C) 2015 - Joomla! Extensions Store * @license GNU/GPLv2 http://www.gnu.org/licenses/gpl-2.0.html */ defined('_JEXEC') or die('Restricted access'); jimport ( 'joomla.utilities.date' ); /** * XML files splitter public responsibilities * * @package JMAP::FRAMEWORK::components::com_jmap * @subpackage framework * @subpackage xml * @since 1.0 */ interface IJMapXmlSplitter { /** * Chunks accessor method * * @access public * @return mixed */ public function getChunks(); /** * Start chunking, given an input XML string * * @access public * @param string $string * @param string $tag * @param number $howmany * @return void */ public function chunkXMLString($xmlString, $tag = 'url', $howmany = 5, $precachedSitemapDirectFile = false); } /** * XML files splitter for sitemap chunking * * @package JMAP::FRAMEWORK::components::com_jmap * @subpackage libraries * @subpackage xml * @since 1.0 */ class JMapXmlSplitter implements IJMapXmlSplitter { /** * Chunks array container * * @access private * @var array */ private $chunkFiles; /** * Chunks counter * * @access private * @var int */ private $chunksCounter; /** * Chunk payload * * @access private * @var string */ private $chunkPayload; /** * Target tag to extract chunks * * @access private * @var string */ private $targetTag; /** * Item count * * @access private * @var int */ private $itemCount; /** * Items limit to split chunk * * @access private * @var int */ private $itemLimit; /** * Items total nel file * * @access private * @var int */ private $itemTotal; /** * Format chunks file * * @access private * @var string */ private $format; /** * Language chunks file if any, not required * * @access private * @var string */ private $language; /** * Dataset chunk if any, not required * * @access private * @var string */ private $dataset; /** * Itemid chunk if any, not required * * @access private * @var string */ private $itemid; /** * XML Root Node detection * * @access private * @var string */ private $xmlRootNode; /** * DOM Document instance * * @access private * @var Object */ private $doc; /** * Live site string * * @access private * @var string */ private $liveSite; /** * ISO date string * * @access private * @var string */ private $ISO8601Date; /** * Default hardcoded root nodes based on sitemap type * * @access private * @var array */ private $defaultRootNodes; /** * Component params * * @access private * @var array */ private $cParams; /** * Start tag elem processing * * @access private * @param string $xml * @param string $tag * @param array $attrs * @return void */ private function startElement($xml, $tag, $attrs = array()) { if (!($this->chunksCounter || $this->itemCount)) if ($this->targetTag == strtolower($tag)) $this->chunkPayload = ''; $this->chunkPayload .= "<$tag"; foreach ($attrs as $k => $v) $this->chunkPayload .= " $k=" . '"' . addslashes($v) . '"'; $this->chunkPayload .= '>'; } /** * End tag elem processing * * @access private * @param string $xml * @param string $tag * @return void */ private function endElement($xml, $tag) { $this->chunkPayload .= "</$tag>"; if ($this->targetTag == strtolower($tag)) { if (++$this->itemCount >= $this->itemLimit || ($this->itemTotal == 1 && ($this->itemCount <= $this->itemLimit))) { $this->processChunk($this->chunkPayload); $this->chunkPayload = ''; $this->itemCount = 0; } $this->itemTotal -= 1; } } /** * Data handler concatenate del payload * * @access private * @param string $xml * @param string $tag * @return void */ private function dataHandler($xml, $data) { $this->chunkPayload .= $data; } /** * Default handler * * @access private * @param string $xml * @param string $tag * @return void */ private function defaultHandler($xml, $data) { // a.k.a. Wild Text Fallback Handler, or WTFHandler for short. } /** * Setting create del parser XML * * @access private * @param string $CHARSET * @param boolean $bareXML * @return Resource */ private function createXMLParser($CHARSET, $bareXML = false) { $CURRXML = xml_parser_create($CHARSET); xml_parser_set_option($CURRXML, XML_OPTION_CASE_FOLDING, false); xml_parser_set_option($CURRXML, XML_OPTION_TARGET_ENCODING, $CHARSET); xml_set_element_handler($CURRXML, array($this, 'startElement'), array($this, 'endElement')); xml_set_character_data_handler($CURRXML, array($this, 'dataHandler')); xml_set_default_handler($CURRXML, array($this, 'defaultHandler')); if ($bareXML) xml_parse($CURRXML, '<?xml version="1.0"?>', 0); return $CURRXML; } /** * Processa il punto di arrivo di un chunk memorizzandolo * * @access private * @param string $xmlstring * @return void */ private function processChunk($xmlstring) { // Init chunk file with data and name $data = null; $data .= "<?xml version='1.0' encoding='UTF-8'?>\n"; $data .= "<" . $this->xmlRootNode['rootNodeName'] . " " . $this->xmlRootNode['rootNodeAttributes'] . ">\n"; $data .= JFilterOutput::ampReplace(trim($xmlstring, "\n")); $data .= "\n</" . $this->xmlRootNode['rootNodeName'] .">"; $name = "sitemap_" . $this->format . $this->language . $this->dataset . $this->itemid . '_' . $this->chunksCounter . '.xml'; $file = array('data' => $data, 'name' => $name); // Assign chunk to container $this->chunkFiles[] = $file; // Increment counter for chunks $this->chunksCounter++; } /** * function getXMLRootNode * @param string An xml string * @return string Return XML root node name */ private function getXMLRootNode($xmlstr) { // Load the XML string if (!$this->doc->loadXML($xmlstr)) { throw new JMapException('Unable to parse XML string', 'warning'); } // If default root nodes need to be retrieved hardcoded get it and avoid parsing if($this->cParams->get('splitting_hardcoded_rootnode', true)) { return $this->defaultRootNodes[$this->format]; } // Find the root tag name $root = $this->doc->documentElement; if (!isset($root)) { throw new JMapException('Unable to find XML root node', 'warning'); } if (!isset($root->nodeName)) { throw new JMapException('Unable to find XML root node name', 'warning'); } if($root->hasAttributes()) { $attributes = array(); foreach ($root->attributes as $attr) { $attributes[] = $attr->nodeName . '="' . $attr->nodeValue . '" '; } } $xpath = new DOMXPath($this->doc); foreach( $xpath->query('namespace::*', $root) as $ns ) { if($ns->nodeName == 'xmlns:xml') { continue; } $attributes[] = $ns->nodeName . '="' . $ns->nodeValue . '" '; } $attributes = array_reverse($attributes); $rootNodeInfo = array('rootNodeName'=>$root->nodeName, 'rootNodeAttributes'=>trim(implode('', $attributes))); return $rootNodeInfo; } /** * Generate index XML file for sitemap chunks * * @access private * @return void */ private function generateIndexFile() { // Get current chunk files element in array // For every chunks generate an XML entry with lastmod date $data = null; $data .= "<?xml version='1.0' encoding='UTF-8'?>\n"; $data .= "<sitemapindex xmlns='http://www.sitemaps.org/schemas/sitemap/0.9'>\n"; foreach ($this->chunkFiles as $file) { $data .= "<sitemap>\n" . "<loc>" . $this->liveSite . $file['name'] . "</loc>\n" . "<lastmod>" . $this->ISO8601Date . "</lastmod>\n" . "</sitemap>\n"; } $data .= "</sitemapindex>"; // Finally add itself as a file to chunk files array $name = "sitemapindex_" . $this->format . $this->language . $this->dataset . $this->itemid . '.xml'; $file = array('data' => $data, 'name' => $name); // Assign chunk to container $this->chunkFiles[] = $file; // Increment counter for chunks $this->chunksCounter++; } /** * Chunks accessor method * * @access public * @return mixed */ public function getChunks() { if (!$this->chunksCounter) { return false; } return $this->chunkFiles; } /** * Start chunking, given an input XML string * * @access public * @param string $string * @param string $tag * @param number $howmany * @return void */ public function chunkXMLString($xmlString, $tag = 'url', $howmany = 5, $precachedSitemapDirectFile = false) { $this->targetTag = $tag; $this->itemLimit = (int)$howmany; // Extract del root node contestuale alla mappa $this->xmlRootNode = $this->getXMLRootNode($xmlString); $this->itemTotal = $this->doc->getElementsByTagName('url')->length; $xml = $this->createXMLParser('UTF-8', false); if(!$precachedSitemapDirectFile) { $fp = fopen('data://text/plain,' . urlencode($xmlString), 'r'); } else { $fp = fopen($precachedSitemapDirectFile, 'r'); } while (!feof($fp)) { $chunk = fgets($fp, 10240); xml_parse($xml, $chunk, feof($fp)); } xml_parser_free($xml); // Finally add the index to the bunch of chunks to be included in zip $this->generateIndexFile(); } /** * Class constructor * * @access public * @param string $format * @param string $language * @param int $dataset * @param int $itemid * @return Object& */ public function __construct($format, $language, $dataset, $itemid) { // Init properties $this->format = $format; $this->language = $language; $this->dataset = $dataset; $this->itemid = $itemid; $this->cParams = JComponentHelper::getParams('com_jmap'); // Default root nodes $this->defaultRootNodes = array( 'xml'=>array('rootNodeName'=>'urlset', 'rootNodeAttributes'=>'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'), 'images'=>array('rootNodeName'=>'urlset', 'rootNodeAttributes'=>'xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'), 'gnews'=>array('rootNodeName'=>'urlset', 'rootNodeAttributes'=>'xmlns:news="http://www.google.com/schemas/sitemap-news/0.9" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'), 'mobile'=>array('rootNodeName'=>'urlset', 'rootNodeAttributes'=>'xmlns:mobile="http://www.google.com/schemas/sitemap-mobile/1.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'), 'videos'=>array('rootNodeName'=>'urlset', 'rootNodeAttributes'=>'xmlns:video="http://www.google.com/schemas/sitemap-video/1.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'), 'hreflang'=>array('rootNodeName'=>'urlset', 'rootNodeAttributes'=>'xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"'), 'amp'=>array('rootNodeName'=>'urlset', 'rootNodeAttributes'=>'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"') ); // Date format for index XML file $dateObj = new JDate(); $globalConfig = JFactory::getConfig(); $dateObj->setTimezone(new DateTimeZone($globalConfig->get('offset'))); $this->ISO8601Date = $dateObj->toISO8601(true); // Live site for index XML file $this->liveSite = JUri::root(false); // Create DOM model $this->doc = new DOMDocument(); $this->chunkFiles = array(); $this->targetTag = null; $this->chunksCounter = 0; $this->itemLimit = 5; } }