Blame view

administrator/components/com_finder/helpers/indexer/token.php 3.93 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
<?php
/**
 * @package     Joomla.Administrator
 * @subpackage  com_finder
 *
 * @copyright   Copyright (C) 2005 - 2019 Open Source Matters, Inc. All rights reserved.
 * @license     GNU General Public License version 2 or later; see LICENSE.txt
 */

defined('_JEXEC') or die;

use Joomla\String\StringHelper;

/**
 * Token class for the Finder indexer package.
 *
 * @since  2.5
 */
class FinderIndexerToken
{
	/**
	 * This is the term that will be referenced in the terms table and the
	 * mapping tables.
	 *
	 * @var    string
	 * @since  2.5
	 */
	public $term;

	/**
	 * The stem is used to match the root term and produce more potential
	 * matches when searching the index.
	 *
	 * @var    string
	 * @since  2.5
	 */
	public $stem;

	/**
	 * If the token is numeric, it is likely to be short and uncommon so the
	 * weight is adjusted to compensate for that situation.
	 *
	 * @var    boolean
	 * @since  2.5
	 */
	public $numeric;

	/**
	 * If the token is a common term, the weight is adjusted to compensate for
	 * the higher frequency of the term in relation to other terms.
	 *
	 * @var    boolean
	 * @since  2.5
	 */
	public $common;

	/**
	 * Flag for phrase tokens.
	 *
	 * @var    boolean
	 * @since  2.5
	 */
	public $phrase;

	/**
	 * The length is used to calculate the weight of the token.
	 *
	 * @var    integer
	 * @since  2.5
	 */
	public $length;

	/**
	 * The weight is calculated based on token size and whether the token is
	 * considered a common term.
	 *
	 * @var    integer
	 * @since  2.5
	 */
	public $weight;

	/**
	 * The simple language identifier for the token.
	 *
	 * @var    string
	 * @since  2.5
	 */
	public $language;

	/**
	 * The container for matches.
	 *
	 * @var    array
	 * @since  3.8.12
	 */
	public $matches = array();

	/**
	 * Is derived token (from individual words)
	 *
	 * @var    boolean
	 * @since  3.8.12
	 */
	public $derived;

	/**
	 * The suggested term
	 *
	 * @var    string
	 * @since  3.8.12
	 */
	public $suggestion;

	/**
	 * Method to construct the token object.
	 *
	 * @param   mixed   $term    The term as a string for words or an array for phrases.
	 * @param   string  $lang    The simple language identifier.
	 * @param   string  $spacer  The space separator for phrases. [optional]
	 *
	 * @since   2.5
	 */
	public function __construct($term, $lang, $spacer = ' ')
	{
		$this->language = $lang;

		// Tokens can be a single word or an array of words representing a phrase.
		if (is_array($term))
		{
			// Populate the token instance.
			$this->term = implode($spacer, $term);
			$this->stem = implode($spacer, array_map(array('FinderIndexerHelper', 'stem'), $term, array($lang)));
			$this->numeric = false;
			$this->common = false;
			$this->phrase = true;
			$this->length = StringHelper::strlen($this->term);

			/*
			 * Calculate the weight of the token.
			 *
			 * 1. Length of the token up to 30 and divide by 30, add 1.
			 * 2. Round weight to 4 decimal points.
			 */
			$this->weight = (($this->length >= 30 ? 30 : $this->length) / 30) + 1;
			$this->weight = round($this->weight, 4);
		}
		else
		{
			// Populate the token instance.
			$this->term = $term;
			$this->stem = FinderIndexerHelper::stem($this->term, $lang);
			$this->numeric = (is_numeric($this->term) || (bool) preg_match('#^[0-9,.\-\+]+$#', $this->term));
			$this->common = $this->numeric ? false : FinderIndexerHelper::isCommon($this->term, $lang);
			$this->phrase = false;
			$this->length = StringHelper::strlen($this->term);

			/*
			 * Calculate the weight of the token.
			 *
			 * 1. Length of the token up to 15 and divide by 15.
			 * 2. If common term, divide weight by 8.
			 * 3. If numeric, multiply weight by 1.5.
			 * 4. Round weight to 4 decimal points.
			 */
			$this->weight = ($this->length >= 15 ? 15 : $this->length) / 15;
			$this->weight = $this->common === true ? $this->weight / 8 : $this->weight;
			$this->weight = $this->numeric === true ? $this->weight * 1.5 : $this->weight;
			$this->weight = round($this->weight, 4);
		}
	}
}