wCMF  3.6
 All Classes Namespaces Files Functions Variables Groups Pages
class.EncodingUtil.php
Go to the documentation of this file.
1 <?php
2 /**
3  * wCMF - wemove Content Management Framework
4  * Copyright (C) 2005-2014 wemove digital solutions GmbH
5  *
6  * Licensed under the terms of any of the following licenses
7  * at your choice:
8  *
9  * - GNU Lesser General Public License (LGPL)
10  * http://www.gnu.org/licenses/lgpl.html
11  * - Eclipse Public License (EPL)
12  * http://www.eclipse.org/org/documents/epl-v10.php
13  *
14  * See the license.txt file distributed with this work for
15  * additional information.
16  *
17  * $Id: class.EncodingUtil.php 1462 2014-02-04 23:52:27Z iherwig $
18  */
19 
20 /**
21  * This structure encodes the difference between ISO-8859-1 and Windows-1252,
22  * as a map from the UTF-8 encoding of some ISO-8859-1 control characters to
23  * the UTF-8 encoding of the non-control characters that Windows-1252 places
24  * at the equivalent code points.
25  * code from: http://de3.php.net/manual/de/function.utf8-encode.php#45226
26  */
27 
28 $CP1252Map = array(
29  "\xc2\x80" => "\xe2\x82\xac", /* EURO SIGN */
30  "\xc2\x82" => "\xe2\x80\x9a", /* SINGLE LOW-9 QUOTATION MARK */
31  "\xc2\x83" => "\xc6\x92", /* LATIN SMALL LETTER F WITH HOOK */
32  "\xc2\x84" => "\xe2\x80\x9e", /* DOUBLE LOW-9 QUOTATION MARK */
33  "\xc2\x85" => "\xe2\x80\xa6", /* HORIZONTAL ELLIPSIS */
34  "\xc2\x86" => "\xe2\x80\xa0", /* DAGGER */
35  "\xc2\x87" => "\xe2\x80\xa1", /* DOUBLE DAGGER */
36  "\xc2\x88" => "\xcb\x86", /* MODIFIER LETTER CIRCUMFLEX ACCENT */
37  "\xc2\x89" => "\xe2\x80\xb0", /* PER MILLE SIGN */
38  "\xc2\x8a" => "\xc5\xa0", /* LATIN CAPITAL LETTER S WITH CARON */
39  "\xc2\x8b" => "\xe2\x80\xb9", /* SINGLE LEFT-POINTING ANGLE QUOTATION */
40  "\xc2\x8c" => "\xc5\x92", /* LATIN CAPITAL LIGATURE OE */
41  "\xc2\x8e" => "\xc5\xbd", /* LATIN CAPITAL LETTER Z WITH CARON */
42  "\xc2\x91" => "\xe2\x80\x98", /* LEFT SINGLE QUOTATION MARK */
43  "\xc2\x92" => "\xe2\x80\x99", /* RIGHT SINGLE QUOTATION MARK */
44  "\xc2\x93" => "\xe2\x80\x9c", /* LEFT DOUBLE QUOTATION MARK */
45  "\xc2\x94" => "\xe2\x80\x9d", /* RIGHT DOUBLE QUOTATION MARK */
46  "\xc2\x95" => "\xe2\x80\xa2", /* BULLET */
47  "\xc2\x96" => "\xe2\x80\x93", /* EN DASH */
48  "\xc2\x97" => "\xe2\x80\x94", /* EM DASH */
49 
50  "\xc2\x98" => "\xcb\x9c", /* SMALL TILDE */
51  "\xc2\x99" => "\xe2\x84\xa2", /* TRADE MARK SIGN */
52  "\xc2\x9a" => "\xc5\xa1", /* LATIN SMALL LETTER S WITH CARON */
53  "\xc2\x9b" => "\xe2\x80\xba", /* SINGLE RIGHT-POINTING ANGLE QUOTATION*/
54  "\xc2\x9c" => "\xc5\x93", /* LATIN SMALL LIGATURE OE */
55  "\xc2\x9e" => "\xc5\xbe", /* LATIN SMALL LETTER Z WITH CARON */
56  "\xc2\x9f" => "\xc5\xb8" /* LATIN CAPITAL LETTER Y WITH DIAERESIS*/
57 );
58 
59 /**
60  * @class EncodingUtil
61  * @ingroup Util
62  * @brief EncodingUtil provides helper functions for working with different encodings
63  * mainly UTF-8.
64  *
65  * @author ingo herwig <ingo@wemove.com>
66  */
68 {
69  /**
70  * Returns true if the given string is valid UTF-8 and false otherwise.
71  * @param string The string to be tested
72  * code from: http://us2.php.net/mb_detect_encoding
73  */
74  static function isUtf8($string)
75  {
76  if ($string === mb_convert_encoding(mb_convert_encoding($string, "UTF-32", "UTF-8"), "UTF-8", "UTF-32"))
77  return true;
78  else
79  return false;
80  }
81  /**
82  * Decodes mixed CP1252 UTF-8 strings to ISO.
83  * @param string The string to be decode
84  * code from: http://www.php.net/manual/en/function.utf8-decode.php#47146
85  */
86  static function convertCp1252Utf8ToIso($str)
87  {
88  global $CP1252Map;
89  return utf8_decode(strtr($str, array_flip($CP1252Map)));
90  }
91  /**
92  * Encodes ISO strings to mixed CP1252 UTF-8.
93  * @param string The string to be encode
94  * code from: http://www.php.net/manual/en/function.utf8-decode.php#47146
95  */
96  static function convertIsoToCp1252Utf8($str)
97  {
98  global $CP1252Map;
99  return strtr(utf8_encode($str), $CP1252Map);
100  }
101  /**
102  * Encodes an ISO-8859-1 mixed variable to UTF-8 (PHP 4, PHP 5 compat)
103  * @param input An array, associative or simple
104  * @param encodeKeys optional
105  * @return utf-8 encoded input
106  * code from: http://de3.php.net/utf8_encode
107  */
108  static function utf8EncodeMix($input, $encodeKeys=false)
109  {
110  global $CP1252Map;
111  if(is_array($input))
112  {
113  $result = array();
114  foreach($input as $k => $v)
115  {
116  $key = ($encodeKeys) ? self::convertIsoToCp1252Utf8($k) : $k;
117  $result[$key] = self::utf8EncodeMix($v, $encodeKeys);
118  }
119  }
120  else
121  {
122  if (!is_int($input) && !is_float($input) && !is_bool($input) && !EncodingUtil::isUtf8($input)) {
123  $result = self::convertIsoToCp1252Utf8($input);
124  } else {
125  $result = $input;
126  }
127  }
128  return $result;
129  }
130 }
131 ?>
static isUtf8($string)
static convertIsoToCp1252Utf8($str)
static utf8EncodeMix($input, $encodeKeys=false)
static convertCp1252Utf8ToIso($str)
EncodingUtil provides helper functions for working with different encodings mainly UTF-8...