wCMF  3.6
 All Classes Namespaces Files Functions Variables Groups Pages
class.StringUtil.php
Go to the documentation of this file.
1 <?php
2 /**
3  * wCMF - wemove Content Management Framework
4  * Copyright (C) 2005-2014 wemove digital solutions GmbH
5  *
6  * Licensed under the terms of any of the following licenses
7  * at your choice:
8  *
9  * - GNU Lesser General Public License (LGPL)
10  * http://www.gnu.org/licenses/lgpl.html
11  * - Eclipse Public License (EPL)
12  * http://www.eclipse.org/org/documents/epl-v10.php
13  *
14  * See the license.txt file distributed with this work for
15  * additional information.
16  *
17  * $Id: class.StringUtil.php 1462 2014-02-04 23:52:27Z iherwig $
18  */
19 
20 /**
21  * @class StringUtil
22  * @ingroup Util
23  * @brief StringUtil provides support for string manipulation.
24  *
25  * @author ingo herwig <ingo@wemove.com>
26  */
28 {
29  /**
30  * Get the dump of a variable as string.
31  * @param var The variable to dump.
32  * @return String
33  */
34  function getDump ($var)
35  {
36  ob_start();
37  var_dump($var);
38  $out = ob_get_contents();
39  ob_end_clean();
40  return $out;
41  }
42  /**
43  * Truncate a string up to a number of characters while preserving whole words and HTML tags
44  * code from: http://alanwhipple.com/2011/05/25/php-truncate-string-preserving-html-tags-words/
45  * @param text String to truncate.
46  * @param length Length of returned string, including ellipsis.
47  * @param ending Ending to be appended to the trimmed string.
48  * @param exact If false, $text will not be cut mid-word
49  * @param considerHtml If true, HTML tags would be handled correctly
50  * @return String
51  */
52  function cropString($text, $length=100, $ending='...', $exact=false, $considerHtml=true)
53  {
54  if ($considerHtml) {
55  // if the plain text is shorter than the maximum length, return the whole text
56  if (strlen(preg_replace('/<.*?>/', '', $text)) <= $length) {
57  return $text;
58  }
59  // splits all html-tags to scanable lines
60  preg_match_all('/(<.+?>)?([^<>]*)/s', $text, $lines, PREG_SET_ORDER);
61  $total_length = strlen($ending);
62  $open_tags = array();
63  $truncate = '';
64  foreach ($lines as $line_matchings) {
65  // if there is any html-tag in this line, handle it and add it (uncounted) to the output
66  if (!empty($line_matchings[1])) {
67  // if it's an "empty element" with or without xhtml-conform closing slash
68  if (preg_match('/^<(\s*.+?\/\s*|\s*(img|br|input|hr|area|base|basefont|col|frame|isindex|link|meta|param)(\s.+?)?)>$/is', $line_matchings[1])) {
69  // do nothing
70  // if tag is a closing tag
71  } else if (preg_match('/^<\s*\/([^\s]+?)\s*>$/s', $line_matchings[1], $tag_matchings)) {
72  // delete tag from $open_tags list
73  $pos = array_search($tag_matchings[1], $open_tags);
74  if ($pos !== false) {
75  unset($open_tags[$pos]);
76  }
77  // if tag is an opening tag
78  } else if (preg_match('/^<\s*([^\s>!]+).*?>$/s', $line_matchings[1], $tag_matchings)) {
79  // add tag to the beginning of $open_tags list
80  array_unshift($open_tags, strtolower($tag_matchings[1]));
81  }
82  // add html-tag to $truncate'd text
83  $truncate .= $line_matchings[1];
84  }
85  // calculate the length of the plain text part of the line; handle entities as one character
86  $content_length = strlen(preg_replace('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i', ' ', $line_matchings[2]));
87  if ($total_length+$content_length> $length) {
88  // the number of characters which are left
89  $left = $length - $total_length;
90  $entities_length = 0;
91  // search for html entities
92  if (preg_match_all('/&[0-9a-z]{2,8};|&#[0-9]{1,7};|[0-9a-f]{1,6};/i', $line_matchings[2], $entities, PREG_OFFSET_CAPTURE)) {
93  // calculate the real length of all entities in the legal range
94  foreach ($entities[0] as $entity) {
95  if ($entity[1]+1-$entities_length <= $left) {
96  $left--;
97  $entities_length += strlen($entity[0]);
98  } else {
99  // no more characters left
100  break;
101  }
102  }
103  }
104  $truncate .= substr($line_matchings[2], 0, $left+$entities_length);
105  // maximum lenght is reached, so get off the loop
106  break;
107  } else {
108  $truncate .= $line_matchings[2];
109  $total_length += $content_length;
110  }
111  // if the maximum length is reached, get off the loop
112  if($total_length>= $length) {
113  break;
114  }
115  }
116  } else {
117  if (strlen($text) <= $length) {
118  return $text;
119  } else {
120  $truncate = substr($text, 0, $length - strlen($ending));
121  }
122  }
123  // if the words shouldn't be cut in the middle...
124  if (!$exact) {
125  // ...search the last occurance of a space...
126  $spacepos = strrpos($truncate, ' ');
127  if (isset($spacepos)) {
128  // ...and cut the text in this position
129  $truncate = substr($truncate, 0, $spacepos);
130  }
131  }
132  // add the defined ending to the text
133  $truncate .= $ending;
134  if($considerHtml) {
135  // close all unclosed html-tags
136  foreach ($open_tags as $tag) {
137  $truncate .= '</' . $tag . '>';
138  }
139  }
140  return $truncate;
141  }
142  /**
143  * Remove a trailing comma, if existing.
144  * @param string The string to crop
145  * @return The string
146  */
147  function removeTrailingComma($string)
148  {
149  return preg_replace('/, ?$/', '', $string);
150  }
151  /**
152  * Extraxt urls from a string.
153  * @param string The string to search in
154  * @return An array with urls
155  * @note This method searches for occurences of <a..href="xxx"..>, <img..src="xxx"..>,
156  * <input..src="xxx"..> or <form..action="xxx"..> and extracts xxx.
157  */
158  function getUrls($string)
159  {
160  preg_match_all("/<a[^>]+href=\"([^\">]+)/i", $string, $links);
161 
162  // find urls in javascript popup links
163  for ($i=0; $i<sizeof($links[1]); $i++)
164  if (preg_match_all("/javascript:.*window.open[\(]*'([^']+)/i", $links[1][$i], $popups))
165  $links[1][$i] = $popups[1][0];
166 
167  // remove mailto links
168  for ($i=0; $i<sizeof($links[1]); $i++)
169  if (preg_match("/^mailto:/i", $links[1][$i]))
170  unset($links[1][$i]);
171 
172  preg_match_all("/<img[^>]+src=\"([^\">]+)/i", $string, $images);
173  preg_match_all("/<input[^>]+src=\"([^\">]+)/i", $string, $buttons);
174  preg_match_all("/<form[^>]+action=\"([^\">]+)/i", $string, $actions);
175  return array_merge($links[1], $images[1], $buttons[1], $actions[1]);
176  }
177  /**
178  * Split a quoted string
179  * code from: http://php3.de/manual/de/function.split.php
180  * @code
181  * $string = '"hello, world", "say \"hello\"", 123, unquotedtext';
182  * $result = quotsplit($string);
183  *
184  * // results in:
185  * // ['hello, world'] [say "hello"] [123] [unquotedtext]
186  *
187  * @endcode
188  *
189  * @param s The string to split
190  * @return An array of strings
191  */
192  function quotesplit($s)
193  {
194  $r = Array();
195  $p = 0;
196  $l = strlen($s);
197  while ($p < $l)
198  {
199  while (($p < $l) && (strpos(" \r\t\n",$s[$p]) !== false)) $p++;
200  if ($s[$p] == '"')
201  {
202  $p++;
203  $q = $p;
204  while (($p < $l) && ($s[$p] != '"'))
205  {
206  if ($s[$p] == '\\') { $p+=2; continue; }
207  $p++;
208  }
209  $r[] = stripslashes(substr($s, $q, $p-$q));
210  $p++;
211  while (($p < $l) && (strpos(" \r\t\n",$s[$p]) !== false)) $p++;
212  $p++;
213  }
214  else if ($s[$p] == "'")
215  {
216  $p++;
217  $q = $p;
218  while (($p < $l) && ($s[$p] != "'"))
219  {
220  if ($s[$p] == '\\') { $p+=2; continue; }
221  $p++;
222  }
223  $r[] = stripslashes(substr($s, $q, $p-$q));
224  $p++;
225  while (($p < $l) && (strpos(" \r\t\n",$s[$p]) !== false)) $p++;
226  $p++;
227  }
228  else
229  {
230  $q = $p;
231  while (($p < $l) && (strpos(",;",$s[$p]) === false))
232  {
233  $p++;
234  }
235  $r[] = stripslashes(trim(substr($s, $q, $p-$q)));
236  while (($p < $l) && (strpos(" \r\t\n",$s[$p]) !== false)) $p++;
237  $p++;
238  }
239  }
240  return $r;
241  }
242  /**
243  * Split string preserving quoted strings
244  * code based on: http://www.php.net/manual/en/function.explode.php#94024
245  * @param str String to split
246  * @param delim Regexp to use in preg_split
247  * @param quoteChr Quote character
248  * @param preserve Boolean whether to preserve the quote character or not
249  * @return Array
250  */
251  function splitQuoted($str, $delim='/ /', $quoteChr='"', $preserve=false) {
252  $resArr = array();
253  $n = 0;
254  $expEncArr = explode($quoteChr, $str);
255  foreach($expEncArr as $encItem) {
256  if ($n++%2) {
257  array_push($resArr, array_pop($resArr) . ($preserve?$quoteChr:'') . $encItem.($preserve?$quoteChr:''));
258  }
259  else {
260  $expDelArr = preg_split($delim, $encItem);
261  array_push($resArr, array_pop($resArr) . array_shift($expDelArr));
262  $resArr = array_merge($resArr, $expDelArr);
263  }
264  }
265  return $resArr;
266  }
267  /**
268  * Create an excerpt from the given text around the given phrase
269  * code based on: http://stackoverflow.com/questions/1292121/how-to-generate-the-snippet-like-generated-by-google-with-php-and-mysql
270  */
271  function excerpt($text, $phrase, $radius = 100) {
272  $phraseLen = strlen($phrase);
273  if ($radius < $phraseLen) {
274  $radius = $phraseLen;
275  }
276  $pos = strpos(strtolower($text), strtolower($phrase));
277 
278  $startPos = 0;
279  if ($pos > $radius) {
280  $startPos = $pos - $radius;
281  }
282  $textLen = strlen($text);
283 
284  $endPos = $pos + $phraseLen + $radius;
285  if ($endPos >= $textLen) {
286  $endPos = $textLen;
287  }
288 
289  // make sure to cut at spaces
290  $firstSpacePos = strpos($text, " ", $startPos);
291  $lastSpacePos = strrpos($text, " ", -(strlen($text)-$endPos));
292 
293  $excerpt1 = substr($text, $firstSpacePos, $lastSpacePos-$firstSpacePos);
294 
295  // remove open tags
296  $excerpt = preg_replace('/^[^<]*?>|<[^>]*?$/', '', $excerpt1);
297  return $excerpt;
298  }
299 }
300 ?>
cropString($text, $length=100, $ending='...', $exact=false, $considerHtml=true)
removeTrailingComma($string)
StringUtil provides support for string manipulation.
getUrls($string)
splitQuoted($str, $delim='//', $quoteChr='"', $preserve=false)
excerpt($text, $phrase, $radius=100)