Blame view

administrator/components/com_finder/helpers/indexer/stemmer/snowball.php 2.72 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
<?php
/**
 * @package     Joomla.Administrator
 * @subpackage  com_finder
 *
 * @copyright   Copyright (C) 2005 - 2019 Open Source Matters, Inc. All rights reserved.
 * @license     GNU General Public License version 2 or later; see LICENSE.txt
 */

defined('_JEXEC') or die;

JLoader::register('FinderIndexerStemmer', dirname(__DIR__) . '/stemmer.php');

/**
 * Snowball stemmer class for the Finder indexer package.
 *
 * @since  2.5
 */
class FinderIndexerStemmerSnowball extends FinderIndexerStemmer
{
	/**
	 * Method to stem a token and return the root.
	 *
	 * @param   string  $token  The token to stem.
	 * @param   string  $lang   The language of the token.
	 *
	 * @return  string  The root token.
	 *
	 * @since   2.5
	 */
	public function stem($token, $lang)
	{
		// Language to use if All is specified.
		static $defaultLang = '';

		// If language is All then try to get site default language.
		if ($lang === '*' && $defaultLang === '')
		{
			$languages = JLanguageHelper::getLanguages();
			$defaultLang = isset($languages[0]->sef) ? $languages[0]->sef : '*';
			$lang = $defaultLang;
		}

		// Stem the token if it is not in the cache.
		if (!isset($this->cache[$lang][$token]))
		{
			// Get the stem function from the language string.
			switch ($lang)
			{
				// Danish stemmer.
				case 'da':
					$function = 'stem_danish';
					break;

				// German stemmer.
				case 'de':
					$function = 'stem_german';
					break;

				// English stemmer.
				default:
				case 'en':
					$function = 'stem_english';
					break;

				// Spanish stemmer.
				case 'es':
					$function = 'stem_spanish';
					break;

				// Finnish stemmer.
				case 'fi':
					$function = 'stem_finnish';
					break;

				// French stemmer.
				case 'fr':
					$function = 'stem_french';
					break;

				// Hungarian stemmer.
				case 'hu':
					$function = 'stem_hungarian';
					break;

				// Italian stemmer.
				case 'it':
					$function = 'stem_italian';
					break;

				// Norwegian stemmer.
				case 'nb':
					$function = 'stem_norwegian';
					break;

				// Dutch stemmer.
				case 'nl':
					$function = 'stem_dutch';
					break;

				// Portuguese stemmer.
				case 'pt':
					$function = 'stem_portuguese';
					break;

				// Romanian stemmer.
				case 'ro':
					$function = 'stem_romanian';
					break;

				// Russian stemmer.
				case 'ru':
					$function = 'stem_russian_unicode';
					break;

				// Swedish stemmer.
				case 'sv':
					$function = 'stem_swedish';
					break;

				// Turkish stemmer.
				case 'tr':
					$function = 'stem_turkish_unicode';
					break;
			}

			// Stem the word if the stemmer method exists.
			$this->cache[$lang][$token] = function_exists($function) ? $function($token) : $token;
		}

		return $this->cache[$lang][$token];
	}
}