string.php
9.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
<?php
/**
* @package Joomla.Libraries
* @subpackage HTML
*
* @copyright Copyright (C) 2005 - 2019 Open Source Matters, Inc. All rights reserved.
* @license GNU General Public License version 2 or later; see LICENSE.txt
*/
defined('JPATH_PLATFORM') or die;
use Joomla\String\StringHelper;
/**
* HTML helper class for rendering manipulated strings.
*
* @since 1.6
*/
abstract class JHtmlString
{
/**
* Truncates text blocks over the specified character limit and closes
* all open HTML tags. The method will optionally not truncate an individual
* word, it will find the first space that is within the limit and
* truncate at that point. This method is UTF-8 safe.
*
* @param string $text The text to truncate.
* @param integer $length The maximum length of the text.
* @param boolean $noSplit Don't split a word if that is where the cutoff occurs (default: true).
* @param boolean $allowHtml Allow HTML tags in the output, and close any open tags (default: true).
*
* @return string The truncated text.
*
* @since 1.6
*/
public static function truncate($text, $length = 0, $noSplit = true, $allowHtml = true)
{
// Assume a lone open tag is invalid HTML.
if ($length === 1 && $text[0] === '<')
{
return '...';
}
// Check if HTML tags are allowed.
if (!$allowHtml)
{
// Deal with spacing issues in the input.
$text = str_replace('>', '> ', $text);
$text = str_replace(array(' ', ' '), ' ', $text);
$text = StringHelper::trim(preg_replace('#\s+#mui', ' ', $text));
// Strip the tags from the input and decode entities.
$text = strip_tags($text);
$text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
// Remove remaining extra spaces.
$text = str_replace(' ', ' ', $text);
$text = StringHelper::trim(preg_replace('#\s+#mui', ' ', $text));
}
// Whether or not allowing HTML, truncate the item text if it is too long.
if ($length > 0 && StringHelper::strlen($text) > $length)
{
$tmp = trim(StringHelper::substr($text, 0, $length));
if ($tmp[0] === '<' && strpos($tmp, '>') === false)
{
return '...';
}
// $noSplit true means that we do not allow splitting of words.
if ($noSplit)
{
// Find the position of the last space within the allowed length.
$offset = StringHelper::strrpos($tmp, ' ');
$tmp = StringHelper::substr($tmp, 0, $offset + 1);
// If there are no spaces and the string is longer than the maximum
// we need to just use the ellipsis. In that case we are done.
if ($offset === false && strlen($text) > $length)
{
return '...';
}
if (StringHelper::strlen($tmp) > $length - 3)
{
$tmp = trim(StringHelper::substr($tmp, 0, StringHelper::strrpos($tmp, ' ')));
}
}
if ($allowHtml)
{
// Put all opened tags into an array
preg_match_all("#<([a-z][a-z0-9]*)\b.*?(?!/)>#i", $tmp, $result);
$openedTags = $result[1];
// Some tags self close so they do not need a separate close tag.
$openedTags = array_diff($openedTags, array('img', 'hr', 'br'));
$openedTags = array_values($openedTags);
// Put all closed tags into an array
preg_match_all("#</([a-z][a-z0-9]*)\b(?:[^>]*?)>#iU", $tmp, $result);
$closedTags = $result[1];
$numOpened = count($openedTags);
// Not all tags are closed so trim the text and finish.
if (count($closedTags) !== $numOpened)
{
// Closing tags need to be in the reverse order of opening tags.
$openedTags = array_reverse($openedTags);
// Close tags
for ($i = 0; $i < $numOpened; $i++)
{
if (!in_array($openedTags[$i], $closedTags))
{
$tmp .= '</' . $openedTags[$i] . '>';
}
else
{
unset($closedTags[array_search($openedTags[$i], $closedTags)]);
}
}
}
// Check if we are within a tag
if (StringHelper::strrpos($tmp, '<') > StringHelper::strrpos($tmp, '>'))
{
$offset = StringHelper::strrpos($tmp, '<');
$tmp = StringHelper::trim(StringHelper::substr($tmp, 0, $offset));
}
}
if ($tmp === false || strlen($text) > strlen($tmp))
{
$text = trim($tmp) . '...';
}
}
// Clean up any internal spaces created by the processing.
$text = str_replace(' </', '</', $text);
$text = str_replace(' ...', '...', $text);
return $text;
}
/**
* Method to extend the truncate method to more complex situations
*
* The goal is to get the proper length plain text string with as much of
* the html intact as possible with all tags properly closed.
*
* @param string $html The content of the introtext to be truncated
* @param integer $maxLength The maximum number of characters to render
* @param boolean $noSplit Don't split a word if that is where the cutoff occurs (default: true).
*
* @return string The truncated string. If the string is truncated an ellipsis
* (...) will be appended.
*
* @note If a maximum length of 3 or less is selected and the text has more than
* that number of characters an ellipsis will be displayed.
* This method will not create valid HTML from malformed HTML.
*
* @since 3.1
*/
public static function truncateComplex($html, $maxLength = 0, $noSplit = true)
{
// Start with some basic rules.
$baseLength = strlen($html);
// If the original HTML string is shorter than the $maxLength do nothing and return that.
if ($baseLength <= $maxLength || $maxLength === 0)
{
return $html;
}
// Take care of short simple cases.
if ($maxLength <= 3 && $html[0] !== '<' && strpos(substr($html, 0, $maxLength - 1), '<') === false && $baseLength > $maxLength)
{
return '...';
}
// Deal with maximum length of 1 where the string starts with a tag.
if ($maxLength === 1 && $html[0] === '<')
{
$endTagPos = strlen(strstr($html, '>', true));
$tag = substr($html, 1, $endTagPos);
$l = $endTagPos + 1;
if ($noSplit)
{
return substr($html, 0, $l) . '</' . $tag . '...';
}
// TODO: $character doesn't seem to be used...
$character = substr(strip_tags($html), 0, 1);
return substr($html, 0, $l) . '</' . $tag . '...';
}
// First get the truncated plain text string. This is the rendered text we want to end up with.
$ptString = JHtml::_('string.truncate', $html, $maxLength, $noSplit, $allowHtml = false);
// It's all HTML, just return it.
if ($ptString === '')
{
return $html;
}
// If the plain text is shorter than the max length the variable will not end in ...
// In that case we use the whole string.
if (substr($ptString, -3) !== '...')
{
return $html;
}
// Regular truncate gives us the ellipsis but we want to go back for text and tags.
if ($ptString === '...')
{
$stripped = substr(strip_tags($html), 0, $maxLength);
$ptString = JHtml::_('string.truncate', $stripped, $maxLength, $noSplit, $allowHtml = false);
}
// We need to trim the ellipsis that truncate adds.
$ptString = rtrim($ptString, '.');
// Now deal with more complex truncation.
while ($maxLength <= $baseLength)
{
// Get the truncated string assuming HTML is allowed.
$htmlString = JHtml::_('string.truncate', $html, $maxLength, $noSplit, $allowHtml = true);
if ($htmlString === '...' && strlen($ptString) + 3 > $maxLength)
{
return $htmlString;
}
$htmlString = rtrim($htmlString, '.');
// Now get the plain text from the HTML string and trim it.
$htmlStringToPtString = JHtml::_('string.truncate', $htmlString, $maxLength, $noSplit, $allowHtml = false);
$htmlStringToPtString = rtrim($htmlStringToPtString, '.');
// If the new plain text string matches the original plain text string we are done.
if ($ptString === $htmlStringToPtString)
{
return $htmlString . '...';
}
// Get the number of HTML tag characters in the first $maxLength characters
$diffLength = strlen($ptString) - strlen($htmlStringToPtString);
if ($diffLength <= 0)
{
return $htmlString . '...';
}
// Set new $maxlength that adjusts for the HTML tags
$maxLength += $diffLength;
}
}
/**
* Abridges text strings over the specified character limit. The
* behavior will insert an ellipsis into the text replacing a section
* of variable size to ensure the string does not exceed the defined
* maximum length. This method is UTF-8 safe.
*
* For example, it transforms "Really long title" to "Really...title".
*
* Note that this method does not scan for HTML tags so will potentially break them.
*
* @param string $text The text to abridge.
* @param integer $length The maximum length of the text (default is 50).
* @param integer $intro The maximum length of the intro text (default is 30).
*
* @return string The abridged text.
*
* @since 1.6
*/
public static function abridge($text, $length = 50, $intro = 30)
{
// Abridge the item text if it is too long.
if (StringHelper::strlen($text) > $length)
{
// Determine the remaining text length.
$remainder = $length - ($intro + 3);
// Extract the beginning and ending text sections.
$beg = StringHelper::substr($text, 0, $intro);
$end = StringHelper::substr($text, StringHelper::strlen($text) - $remainder);
// Build the resulting string.
$text = $beg . '...' . $end;
}
return $text;
}
}