Update website
This commit is contained in:
parent
4413528994
commit
1d90fbf296
6865 changed files with 1091082 additions and 0 deletions
354
admin/phpMyAdmin/vendor/phpmyadmin/sql-parser/src/UtfString.php
vendored
Normal file
354
admin/phpMyAdmin/vendor/phpmyadmin/sql-parser/src/UtfString.php
vendored
Normal file
|
@ -0,0 +1,354 @@
|
|||
<?php
|
||||
|
||||
declare(strict_types=1);
|
||||
|
||||
namespace PhpMyAdmin\SqlParser;
|
||||
|
||||
use ArrayAccess;
|
||||
use Exception;
|
||||
use Stringable;
|
||||
|
||||
use function mb_check_encoding;
|
||||
use function mb_strlen;
|
||||
use function ord;
|
||||
|
||||
/**
|
||||
* Implementation for UTF-8 strings.
|
||||
*
|
||||
* The subscript operator in PHP, when used with string will return a byte and not a character. Because in UTF-8
|
||||
* strings a character may occupy more than one byte, the subscript operator may return an invalid character.
|
||||
*
|
||||
* Because the lexer relies on the subscript operator this class had to be implemented.
|
||||
*
|
||||
* Implements array-like access for UTF-8 strings.
|
||||
*
|
||||
* In this library, this class should be used to parse UTF-8 queries.
|
||||
*
|
||||
* @implements ArrayAccess<int, string>
|
||||
*/
|
||||
class UtfString implements ArrayAccess, Stringable
|
||||
{
|
||||
/**
|
||||
* The raw, multi-byte string.
|
||||
*
|
||||
* @var string
|
||||
*/
|
||||
public $str = '';
|
||||
|
||||
/**
|
||||
* The index of current byte.
|
||||
*
|
||||
* For ASCII strings, the byte index is equal to the character index.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $byteIdx = 0;
|
||||
|
||||
/**
|
||||
* The index of current character.
|
||||
*
|
||||
* For non-ASCII strings, some characters occupy more than one byte and
|
||||
* the character index will have a lower value than the byte index.
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $charIdx = 0;
|
||||
|
||||
/**
|
||||
* The length of the string (in bytes).
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $byteLen = 0;
|
||||
|
||||
/**
|
||||
* The length of the string (in characters).
|
||||
*
|
||||
* @var int
|
||||
*/
|
||||
public $charLen = 0;
|
||||
|
||||
/**
|
||||
* A map of ASCII binary values to their ASCII code
|
||||
* This is to improve performance and avoid calling ord($byte)
|
||||
*
|
||||
* Source: https://www.freecodecamp.org/news/ascii-table-hex-to-ascii-value-character-code-chart-2/
|
||||
*
|
||||
* @var array<int|string,int>
|
||||
*/
|
||||
protected static $asciiMap = [
|
||||
"\0" => 0, // (00000000) NUL Null
|
||||
"\t" => 9, // (00001001) HT Horizontal Tab
|
||||
"\n" => 10, // (00001010) LF Newline / Line Feed
|
||||
"\v" => 11, // (00001011) VT Vertical Tab
|
||||
"\f" => 12, // (00001100) FF Form Feed
|
||||
"\r" => 13, // (00001101) CR Carriage Return
|
||||
' ' => 32, // (00100000) SP Space
|
||||
'!' => 33, // (00100001) ! Exclamation mark
|
||||
'"' => 34, // (00100010) " Double quote
|
||||
'#' => 35, // (00100011) # Number
|
||||
'$' => 36, // (00100100) $ Dollar
|
||||
'%' => 37, // (00100101) % Percent
|
||||
'&' => 38, // (00100110) & Ampersand
|
||||
'\'' => 39, // (00100111) ' Single quote
|
||||
'(' => 40, // (00101000) ( Left parenthesis
|
||||
')' => 41, // (00101001) ) Right parenthesis
|
||||
'*' => 42, // (00101010) * Asterisk
|
||||
'+' => 43, // (00101011) + Plus
|
||||
',' => 44, // (00101100) , Comma
|
||||
'-' => 45, // (00101101) - Minus
|
||||
'.' => 46, // (00101110) . Period
|
||||
'/' => 47, // (00101111) / Slash
|
||||
'0' => 48, // (00110000) 0 Zero
|
||||
'1' => 49, // (00110001) 1 One
|
||||
'2' => 50, // (00110010) 2 Two
|
||||
'3' => 51, // (00110011) 3 Three
|
||||
'4' => 52, // (00110100) 4 Four
|
||||
'5' => 53, // (00110101) 5 Five
|
||||
'6' => 54, // (00110110) 6 Six
|
||||
'7' => 55, // (00110111) 7 Seven
|
||||
'8' => 56, // (00111000) 8 Eight
|
||||
'9' => 57, // (00111001) 9 Nine
|
||||
':' => 58, // (00111010) : Colon
|
||||
';' => 59, // (00111011) ; Semicolon
|
||||
'<' => 60, // (00111100) < Less than
|
||||
'=' => 61, // (00111101) = Equal sign
|
||||
'>' => 62, // (00111110) > Greater than
|
||||
'?' => 63, // (00111111) ? Question mark
|
||||
'@' => 64, // (01000000) @ At sign
|
||||
'A' => 65, // (01000001) A Uppercase A
|
||||
'B' => 66, // (01000010) B Uppercase B
|
||||
'C' => 67, // (01000011) C Uppercase C
|
||||
'D' => 68, // (01000100) D Uppercase D
|
||||
'E' => 69, // (01000101) E Uppercase E
|
||||
'F' => 70, // (01000110) F Uppercase F
|
||||
'G' => 71, // (01000111) G Uppercase G
|
||||
'H' => 72, // (01001000) H Uppercase H
|
||||
'I' => 73, // (01001001) I Uppercase I
|
||||
'J' => 74, // (01001010) J Uppercase J
|
||||
'K' => 75, // (01001011) K Uppercase K
|
||||
'L' => 76, // (01001100) L Uppercase L
|
||||
'M' => 77, // (01001101) M Uppercase M
|
||||
'N' => 78, // (01001110) N Uppercase N
|
||||
'O' => 79, // (01001111) O Uppercase O
|
||||
'P' => 80, // (01010000) P Uppercase P
|
||||
'Q' => 81, // (01010001) Q Uppercase Q
|
||||
'R' => 82, // (01010010) R Uppercase R
|
||||
'S' => 83, // (01010011) S Uppercase S
|
||||
'T' => 84, // (01010100) T Uppercase T
|
||||
'U' => 85, // (01010101) U Uppercase U
|
||||
'V' => 86, // (01010110) V Uppercase V
|
||||
'W' => 87, // (01010111) W Uppercase W
|
||||
'X' => 88, // (01011000) X Uppercase X
|
||||
'Y' => 89, // (01011001) Y Uppercase Y
|
||||
'Z' => 90, // (01011010) Z Uppercase Z
|
||||
'[' => 91, // (01011011) [ Left square bracket
|
||||
'\\' => 92, // (01011100) \ backslash
|
||||
']' => 93, // (01011101) ] Right square bracket
|
||||
'^' => 94, // (01011110) ^ Caret / circumflex
|
||||
'_' => 95, // (01011111) _ Underscore
|
||||
'`' => 96, // (01100000) ` Grave / accent
|
||||
'a' => 97, // (01100001) a Lowercase a
|
||||
'b' => 98, // (01100010) b Lowercase b
|
||||
'c' => 99, // (01100011) c Lowercase c
|
||||
'd' => 100, // (01100100) d Lowercase d
|
||||
'e' => 101, // (01100101) e Lowercase e
|
||||
'f' => 102, // (01100110) f Lowercase
|
||||
'g' => 103, // (01100111) g Lowercase g
|
||||
'h' => 104, // (01101000) h Lowercase h
|
||||
'i' => 105, // (01101001) i Lowercase i
|
||||
'j' => 106, // (01101010) j Lowercase j
|
||||
'k' => 107, // (01101011) k Lowercase k
|
||||
'l' => 108, // (01101100) l Lowercase l
|
||||
'm' => 109, // (01101101) m Lowercase m
|
||||
'n' => 110, // (01101110) n Lowercase n
|
||||
'o' => 111, // (01101111) o Lowercase o
|
||||
'p' => 112, // (01110000) p Lowercase p
|
||||
'q' => 113, // (01110001) q Lowercase q
|
||||
'r' => 114, // (01110010) r Lowercase r
|
||||
's' => 115, // (01110011) s Lowercase s
|
||||
't' => 116, // (01110100) t Lowercase t
|
||||
'u' => 117, // (01110101) u Lowercase u
|
||||
'v' => 118, // (01110110) v Lowercase v
|
||||
'w' => 119, // (01110111) w Lowercase w
|
||||
'x' => 120, // (01111000) x Lowercase x
|
||||
'y' => 121, // (01111001) y Lowercase y
|
||||
'z' => 122, // (01111010) z Lowercase z
|
||||
'{' => 123, // (01111011) { Left curly bracket
|
||||
'|' => 124, // (01111100) | Vertical bar
|
||||
'}' => 125, // (01111101) } Right curly bracket
|
||||
'~' => 126, // (01111110) ~ Tilde
|
||||
"\x7f" => 127, // (01111111) DEL Delete
|
||||
];
|
||||
|
||||
/**
|
||||
* @param string $str the string
|
||||
*/
|
||||
public function __construct($str)
|
||||
{
|
||||
$this->str = $str;
|
||||
$this->byteIdx = 0;
|
||||
$this->charIdx = 0;
|
||||
$this->byteLen = mb_strlen($str, '8bit');
|
||||
if (! mb_check_encoding($str, 'UTF-8')) {
|
||||
$this->charLen = 0;
|
||||
} else {
|
||||
$this->charLen = mb_strlen($str, 'UTF-8');
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if the given offset exists.
|
||||
*
|
||||
* @param int $offset the offset to be checked
|
||||
*
|
||||
* @return bool
|
||||
*/
|
||||
#[\ReturnTypeWillChange]
|
||||
public function offsetExists($offset)
|
||||
{
|
||||
return ($offset >= 0) && ($offset < $this->charLen);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the character at given offset.
|
||||
*
|
||||
* @param int $offset the offset to be returned
|
||||
*
|
||||
* @return string|null
|
||||
*/
|
||||
#[\ReturnTypeWillChange]
|
||||
public function offsetGet($offset)
|
||||
{
|
||||
if (($offset < 0) || ($offset >= $this->charLen)) {
|
||||
return null;
|
||||
}
|
||||
|
||||
$delta = $offset - $this->charIdx;
|
||||
|
||||
if ($delta > 0) {
|
||||
// Fast forwarding.
|
||||
while ($delta-- > 0) {
|
||||
$this->byteIdx += static::getCharLength($this->str[$this->byteIdx]);
|
||||
++$this->charIdx;
|
||||
}
|
||||
} elseif ($delta < 0) {
|
||||
// Rewinding.
|
||||
while ($delta++ < 0) {
|
||||
do {
|
||||
$byte = ord($this->str[--$this->byteIdx]);
|
||||
} while (($byte >= 128) && ($byte < 192));
|
||||
|
||||
--$this->charIdx;
|
||||
}
|
||||
}
|
||||
|
||||
$bytesCount = static::getCharLength($this->str[$this->byteIdx]);
|
||||
|
||||
$ret = '';
|
||||
for ($i = 0; $bytesCount-- > 0; ++$i) {
|
||||
$ret .= $this->str[$this->byteIdx + $i];
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the value of a character.
|
||||
*
|
||||
* @param int $offset the offset to be set
|
||||
* @param string $value the value to be set
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @throws Exception not implemented.
|
||||
*/
|
||||
#[\ReturnTypeWillChange]
|
||||
public function offsetSet($offset, $value)
|
||||
{
|
||||
throw new Exception('Not implemented.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Unsets an index.
|
||||
*
|
||||
* @param int $offset the value to be unset
|
||||
*
|
||||
* @return void
|
||||
*
|
||||
* @throws Exception not implemented.
|
||||
*/
|
||||
#[\ReturnTypeWillChange]
|
||||
public function offsetUnset($offset)
|
||||
{
|
||||
throw new Exception('Not implemented.');
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the length of an UTF-8 character.
|
||||
*
|
||||
* According to RFC 3629, a UTF-8 character can have at most 4 bytes.
|
||||
* However, this implementation supports UTF-8 characters containing up to 6
|
||||
* bytes.
|
||||
*
|
||||
* @see https://tools.ietf.org/html/rfc3629
|
||||
*
|
||||
* @param string $byte the byte to be analyzed
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public static function getCharLength($byte)
|
||||
{
|
||||
// Use the default ASCII map as queries are mostly ASCII chars
|
||||
// ord($byte) has a performance cost
|
||||
|
||||
if (! isset(static::$asciiMap[$byte])) {
|
||||
// Complete the cache with missing items
|
||||
static::$asciiMap[$byte] = ord($byte);
|
||||
}
|
||||
|
||||
$byte = static::$asciiMap[$byte];
|
||||
|
||||
if ($byte < 128) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
if ($byte < 224) {
|
||||
return 2;
|
||||
}
|
||||
|
||||
if ($byte < 240) {
|
||||
return 3;
|
||||
}
|
||||
|
||||
if ($byte < 248) {
|
||||
return 4;
|
||||
}
|
||||
|
||||
if ($byte < 252) {
|
||||
return 5; // unofficial
|
||||
}
|
||||
|
||||
return 6; // unofficial
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the length in characters of the string.
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function length()
|
||||
{
|
||||
return $this->charLen;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the contained string.
|
||||
*
|
||||
* @return string
|
||||
*/
|
||||
public function __toString()
|
||||
{
|
||||
return $this->str;
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue