31 private static $regex_consonant =
'(?:[bcdfghjklmnpqrstvwxz]|(?<=[aeiou])y|^y)';
38 private static $regex_vowel =
'(?:[aeiou]|(?<![aeiou])y)';
46 private static $cache = array();
56 public static function Stem($word, $cache =
false)
58 if (strlen($word) <= 2) {
63 if ($cache AND !empty(self::$cache[$word])) {
64 return self::$cache[$word];
70 $word = preg_replace(
"/('ve|n't|'d)$/",
'', $word);
72 $stem = self::step1ab($word);
73 $stem = self::step1c($stem);
74 $stem = self::step2($stem);
75 $stem = self::step3($stem);
76 $stem = self::step4($stem);
77 $stem = self::step5($stem);
81 self::$cache[$word] = $stem;
91 private static function step1ab($word)
94 if (substr($word, -1) ==
's') {
96 self::replace($word,
'sses',
'ss')
97 OR self::replace($word, 'ies', 'i')
98 OR self::replace($word, 'ss', 'ss')
99 OR self::replace($word, 's', '');
103 if (substr($word, -2, 1) != 'e' OR !self::replace($word, 'eed', 'ee', 0)) {
104 $v = self::$regex_vowel;
107 if ( preg_match(
"#$v+#", substr($word, 0, -3)) && self::replace($word,
'ing',
'')
108 OR preg_match(
"#$v+#", substr($word, 0, -2)) && self::replace($word,
'ed',
'')) {
111 if ( !self::replace($word,
'at',
'ate')
112 AND !self::replace($word,
'bl',
'ble')
113 AND !self::replace($word,
'iz',
'ize')) {
116 if ( self::doubleConsonant($word)
117 AND substr($word, -2) !=
'll'
118 AND substr($word, -2) !=
'ss'
119 AND substr($word, -2) !=
'zz') {
121 $word = substr($word, 0, -1);
123 }
else if (self::m($word) == 1 AND self::cvc($word)) {
139 private static function step1c($word)
141 $v = self::$regex_vowel;
143 if (substr($word, -1) ==
'y' && preg_match(
"#$v+#", substr($word, 0, -1))) {
144 self::replace($word,
'y',
'i');
156 private static function step2($word)
158 switch (substr($word, -2, 1)) {
160 self::replace($word,
'ational',
'ate', 0)
161 OR self::replace($word, 'tional', 'tion', 0);
165 self::replace($word, 'enci', 'ence', 0)
166 OR self::replace($word, 'anci', 'ance', 0);
170 self::replace($word, 'izer', 'ize', 0);
174 self::replace($word, 'logi', 'log', 0);
178 self::replace($word, 'entli', 'ent', 0)
179 OR self::replace($word, 'ousli', 'ous', 0)
180 OR self::replace($word, 'alli', 'al', 0)
181 OR self::replace($word, 'bli', 'ble', 0)
182 OR self::replace($word, 'eli', 'e', 0);
186 self::replace($word, 'ization', 'ize', 0)
187 OR self::replace($word, 'ation', 'ate', 0)
188 OR self::replace($word, 'ator', 'ate', 0);
192 self::replace($word, 'iveness', 'ive', 0)
193 OR self::replace($word, 'fulness', 'ful', 0)
194 OR self::replace($word, 'ousness', 'ous', 0)
195 OR self::replace($word, 'alism', 'al', 0);
199 self::replace($word, 'biliti', 'ble', 0)
200 OR self::replace($word, 'aliti', 'al', 0)
201 OR self::replace($word, 'iviti', 'ive', 0);
214 private static function step3($word)
216 switch (substr($word, -2, 1)) {
218 self::replace($word,
'ical',
'ic', 0);
222 self::replace($word,
'alise',
'al', 0)
223 OR self::replace($word, 'ness', '', 0);
227 self::replace($word, 'icate', 'ic', 0)
228 OR self::replace($word, 'iciti', 'ic', 0);
232 self::replace($word, 'ful', '', 0);
236 self::replace($word, 'ative', '', 0);
240 self::replace($word, 'alize', 'al', 0);
253 private static function step4($word)
255 switch (substr($word, -2, 1)) {
257 self::replace($word,
'al',
'', 1);
261 self::replace($word,
'ance',
'', 1)
262 OR self::replace($word, 'ence', '', 1);
266 self::replace($word, 'er', '', 1);
270 self::replace($word, 'ic', '', 1);
274 self::replace($word, 'able', '', 1)
275 OR self::replace($word, 'ible', '', 1);
279 self::replace($word, 'ant', '', 1)
280 OR self::replace($word, 'ement', '', 1)
281 OR self::replace($word, 'ment', '', 1)
282 OR self::replace($word, 'ent', '', 1);
286 if (substr($word, -4) == 'tion' OR substr($word, -4) == 'sion') {
287 self::replace($word,
'ion',
'', 1);
289 self::replace($word,
'ou',
'', 1);
294 self::replace($word,
'ism',
'', 1);
298 self::replace($word,
'ate',
'', 1)
299 OR self::replace($word, 'iti', '', 1);
303 self::replace($word, 'ous', '', 1);
307 self::replace($word, 'ive', '', 1);
311 self::replace($word, 'ize', '', 1);
324 private static function step5($word)
327 if (substr($word, -1) ==
'e') {
328 if (self::m(substr($word, 0, -1)) > 1) {
329 self::replace($word,
'e',
'');
331 }
else if (self::m(substr($word, 0, -1)) == 1) {
333 if (!self::cvc(substr($word, 0, -1))) {
334 self::replace($word,
'e',
'');
340 if (self::m($word) > 1 AND self::doubleConsonant($word) AND substr($word, -1) ==
'l') {
341 $word = substr($word, 0, -1);
360 private static function replace(&$str, $check, $repl, $m = null)
362 $len = 0 - strlen($check);
364 if (substr($str, $len) == $check) {
365 $substr = substr($str, 0, $len);
366 if (is_null($m) OR self::m($substr) > $m) {
367 $str = $substr . $repl;
392 private static function m($str)
394 $c = self::$regex_consonant;
395 $v = self::$regex_vowel;
397 $str = preg_replace(
"#^$c+#",
'', $str);
398 $str = preg_replace(
"#$v+$#",
'', $str);
400 preg_match_all(
"#($v+$c+)#", $str, $matches);
402 return count($matches[1]);
413 private static function doubleConsonant($str)
415 $c = self::$regex_consonant;
417 return preg_match(
"#$c{2}$#", $str, $matches) AND $matches[0]{0} == $matches[0]{1};
427 private static function cvc($str)
429 $c = self::$regex_consonant;
430 $v = self::$regex_vowel;
432 return preg_match(
"#($c$v$c)$#", $str, $matches)
433 AND strlen($matches[1]) == 3
434 AND $matches[1]{2} !=
'w'
435 AND $matches[1]{2} !=
'x'
436 AND $matches[1]{2} !=
'y';