ord.php
2.57 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
<?php
/**
* @package utf8
*/
//---------------------------------------------------------------
/**
* UTF-8 aware alternative to ord
* Returns the unicode ordinal for a character
*
* Joomla modification - As of PHP 7.4, curly brace access has been deprecated. As a result this function has been
* modified to use square brace syntax
* See https://github.com/php/php-src/commit/d574df63dc375f5fc9202ce5afde23f866b6450a
* for additional references
*
* @param string UTF-8 encoded character
* @return int unicode ordinal for the character
* @see http://www.php.net/ord
* @see http://www.php.net/manual/en/function.ord.php#46267
*/
function utf8_ord($chr) {
$ord0 = ord($chr);
if ( $ord0 >= 0 && $ord0 <= 127 ) {
return $ord0;
}
if ( !isset($chr[1]) ) {
trigger_error('Short sequence - at least 2 bytes expected, only 1 seen');
return FALSE;
}
$ord1 = ord($chr[1]);
if ( $ord0 >= 192 && $ord0 <= 223 ) {
return ( $ord0 - 192 ) * 64
+ ( $ord1 - 128 );
}
if ( !isset($chr[2]) ) {
trigger_error('Short sequence - at least 3 bytes expected, only 2 seen');
return FALSE;
}
$ord2 = ord($chr[2]);
if ( $ord0 >= 224 && $ord0 <= 239 ) {
return ($ord0-224)*4096
+ ($ord1-128)*64
+ ($ord2-128);
}
if ( !isset($chr[3]) ) {
trigger_error('Short sequence - at least 4 bytes expected, only 3 seen');
return FALSE;
}
$ord3 = ord($chr[3]);
if ($ord0>=240 && $ord0<=247) {
return ($ord0-240)*262144
+ ($ord1-128)*4096
+ ($ord2-128)*64
+ ($ord3-128);
}
if ( !isset($chr[4]) ) {
trigger_error('Short sequence - at least 5 bytes expected, only 4 seen');
return FALSE;
}
$ord4 = ord($chr[4]);
if ($ord0>=248 && $ord0<=251) {
return ($ord0-248)*16777216
+ ($ord1-128)*262144
+ ($ord2-128)*4096
+ ($ord3-128)*64
+ ($ord4-128);
}
if ( !isset($chr[5]) ) {
trigger_error('Short sequence - at least 6 bytes expected, only 5 seen');
return FALSE;
}
if ($ord0>=252 && $ord0<=253) {
return ($ord0-252) * 1073741824
+ ($ord1-128)*16777216
+ ($ord2-128)*262144
+ ($ord3-128)*4096
+ ($ord4-128)*64
+ (ord($chr[5])-128);
}
if ( $ord0 >= 254 && $ord0 <= 255 ) {
trigger_error('Invalid UTF-8 with surrogate ordinal '.$ord0);
return FALSE;
}
}