snowball.php
2.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
<?php
/**
* @package Joomla.Administrator
* @subpackage com_finder
*
* @copyright Copyright (C) 2005 - 2019 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
defined('_JEXEC') or die;
JLoader::register('FinderIndexerStemmer', dirname(__DIR__) . '/stemmer.php');
/**
* Snowball stemmer class for the Finder indexer package.
*
* @since 2.5
*/
class FinderIndexerStemmerSnowball extends FinderIndexerStemmer
{
/**
* Method to stem a token and return the root.
*
* @param string $token The token to stem.
* @param string $lang The language of the token.
*
* @return string The root token.
*
* @since 2.5
*/
public function stem($token, $lang)
{
// Language to use if All is specified.
static $defaultLang = '';
// If language is All then try to get site default language.
if ($lang === '*' && $defaultLang === '')
{
$languages = JLanguageHelper::getLanguages();
$defaultLang = isset($languages[0]->sef) ? $languages[0]->sef : '*';
$lang = $defaultLang;
}
// Stem the token if it is not in the cache.
if (!isset($this->cache[$lang][$token]))
{
// Get the stem function from the language string.
switch ($lang)
{
// Danish stemmer.
case 'da':
$function = 'stem_danish';
break;
// German stemmer.
case 'de':
$function = 'stem_german';
break;
// English stemmer.
default:
case 'en':
$function = 'stem_english';
break;
// Spanish stemmer.
case 'es':
$function = 'stem_spanish';
break;
// Finnish stemmer.
case 'fi':
$function = 'stem_finnish';
break;
// French stemmer.
case 'fr':
$function = 'stem_french';
break;
// Hungarian stemmer.
case 'hu':
$function = 'stem_hungarian';
break;
// Italian stemmer.
case 'it':
$function = 'stem_italian';
break;
// Norwegian stemmer.
case 'nb':
$function = 'stem_norwegian';
break;
// Dutch stemmer.
case 'nl':
$function = 'stem_dutch';
break;
// Portuguese stemmer.
case 'pt':
$function = 'stem_portuguese';
break;
// Romanian stemmer.
case 'ro':
$function = 'stem_romanian';
break;
// Russian stemmer.
case 'ru':
$function = 'stem_russian_unicode';
break;
// Swedish stemmer.
case 'sv':
$function = 'stem_swedish';
break;
// Turkish stemmer.
case 'tr':
$function = 'stem_turkish_unicode';
break;
}
// Stem the word if the stemmer method exists.
$this->cache[$lang][$token] = function_exists($function) ? $function($token) : $token;
}
return $this->cache[$lang][$token];
}
}