UTF8 should be a top-level class because it is declared as final, from ab887e639d

This commit is contained in:
Woody Gilk 2009-06-18 18:15:57 -05:00
parent e440358c16
commit 4e269086a0
2 changed files with 715 additions and 718 deletions

View file

@ -1,717 +0,0 @@
<?php defined('SYSPATH') or die('No direct script access.');
/**
* A port of [phputf8][ref-p8] to a unified set of files. Provides multi-byte
* aware replacement string functions.
*
* For UTF-8 support to work correctly, the following requirements must be met:
*
* - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
* - Support for [Unicode properties][ref-uc] is highly recommended (--enable-unicode-properties)
* - UTF-8 conversion will be much more reliable if the [iconv extension][ref-ie] is loaded
* - The [mbstring extension][ref-mb] is highly recommended, but must not be overloading string functions
*
* *This file is licensed differently from the rest of Kohana. As a port of
* [phputf8][ref-p8], this file is released under the LGPL.*
*
* [ref-p8]: http://phputf8.sourceforge.net/
* [ref-uc]: http://php.net/manual/reference.pcre.pattern.modifiers.php
* [ref-ie]: http://php.net/iconv
* [ref-mb]: http://php.net/mbstring
*
* @package Kohana
* @author Kohana Team
* @copyright (c) 2007-2008 Kohana Team
* @copyright (c) 2005 Harry Fuecks
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
*/
final class UTF8 {
// Does the server support UTF-8 natively?
public static $server_utf8 = NULL;
// Called methods
public static $called = array();
/**
* Recursively cleans arrays, objects, and strings. Removes ASCII control
* codes and converts to the requested charset while silently discarding
* incompatible characters.
*
* @param mixed variable to clean
* @param string character set, defaults to UTF-8
* @return mixed
*/
public static function clean($var, $charset = 'UTF-8')
{
if (is_array($var) OR is_object($var))
{
foreach ($var as $key => $val)
{
// Recursion!
$var[self::clean($key)] = self::clean($val);
}
}
elseif (is_string($var) AND $var !== '')
{
// Remove control characters
$var = self::strip_ascii_ctrl($var);
if ( ! self::is_ascii($var))
{
// Disable notices
$ER = error_reporting(~E_NOTICE);
// iconv is expensive, so it is only used when needed
$var = iconv($charset, $charset.'//IGNORE', $var);
// Turn notices back on
error_reporting($ER);
}
}
return $var;
}
/**
* Tests whether a string contains only 7-bit ASCII bytes. This is used to
* determine when to use native functions or UTF-8 functions.
*
* @param string string to check
* @return bool
*/
public static function is_ascii($str)
{
return ! preg_match('/[^\x00-\x7F]/S', $str);
}
/**
* Strips out device control codes in the ASCII range.
*
* @param string string to clean
* @return string
*/
public static function strip_ascii_ctrl($str)
{
return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
}
/**
* Strips out all non-7bit ASCII bytes.
*
* @param string string to clean
* @return string
*/
public static function strip_non_ascii($str)
{
return preg_replace('/[^\x00-\x7F]+/S', '', $str);
}
/**
* Replaces special/accented UTF-8 characters by ASCII-7 'equivalents'.
*
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string string to transliterate
* @param integer -1 lowercase only, +1 uppercase only, 0 both cases
* @return string
*/
public static function transliterate_to_ascii($str, $case = 0)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _transliterate_to_ascii($str, $case);
}
/**
* Returns the length of the given string.
*
* @see http://php.net/strlen
*
* @param string string being measured for length
* @return integer
*/
public static function strlen($str)
{
if (UTF8::$server_utf8)
return mb_strlen($str);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strlen($str);
}
/**
* Finds position of first occurrence of a UTF-8 string.
*
* @see http://php.net/strlen
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string haystack
* @param string needle
* @param integer offset from which character in haystack to start searching
* @return integer position of needle
* @return boolean FALSE if the needle is not found
*/
public static function strpos($str, $search, $offset = 0)
{
if (UTF8::$server_utf8)
return mb_strpos($str, $search, $offset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strpos($str, $search, $offset);
}
/**
* Finds position of last occurrence of a char in a UTF-8 string.
*
* @see http://php.net/strrpos
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string haystack
* @param string needle
* @param integer offset from which character in haystack to start searching
* @return integer position of needle
* @return boolean FALSE if the needle is not found
*/
public static function strrpos($str, $search, $offset = 0)
{
if (UTF8::$server_utf8)
return mb_strrpos($str, $search, $offset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strrpos($str, $search, $offset);
}
/**
* Returns part of a UTF-8 string.
*
* @see http://php.net/substr
* @author Chris Smith <chris@jalakai.co.uk>
*
* @param string input string
* @param integer offset
* @param integer length limit
* @return string
*/
public static function substr($str, $offset, $length = NULL)
{
if (UTF8::$server_utf8)
return ($length === NULL) ? mb_substr($str, $offset) : mb_substr($str, $offset, $length);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _substr($str, $offset, $length);
}
/**
* Replaces text within a portion of a UTF-8 string.
*
* @see http://php.net/substr_replace
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string replacement string
* @param integer offset
* @return string
*/
public static function substr_replace($str, $replacement, $offset, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _substr_replace($str, $replacement, $offset, $length);
}
/**
* Makes a UTF-8 string lowercase.
*
* @see http://php.net/strtolower
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string mixed case string
* @return string
*/
public static function strtolower($str)
{
if (UTF8::$server_utf8)
return mb_strtolower($str);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strtolower($str);
}
/**
* Makes a UTF-8 string uppercase.
*
* @see http://php.net/strtoupper
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string mixed case string
* @return string
*/
public static function strtoupper($str)
{
if (UTF8::$server_utf8)
return mb_strtoupper($str);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strtoupper($str);
}
/**
* Makes a UTF-8 string's first character uppercase.
*
* @see http://php.net/ucfirst
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string mixed case string
* @return string
*/
public static function ucfirst($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ucfirst($str);
}
/**
* Makes the first character of every word in a UTF-8 string uppercase.
*
* @see http://php.net/ucwords
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string mixed case string
* @return string
*/
public static function ucwords($str)
{
if (UTF8::$server_utf8)
return mb_convert_case($str, MB_CASE_TITLE);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ucwords($str);
}
/**
* Case-insensitive UTF-8 string comparison.
*
* @see http://php.net/strcasecmp
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string string to compare
* @param string string to compare
* @return integer less than 0 if str1 is less than str2
* @return integer greater than 0 if str1 is greater than str2
* @return integer 0 if they are equal
*/
public static function strcasecmp($str1, $str2)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strcasecmp($str1, $str2);
}
/**
* Returns a string or an array with all occurrences of search in subject
* (ignoring case) and replaced with the given replace value.
*
* **Note:** This function is very slow compared to the native version.
*
* @see http://php.net/str_ireplace
* @author Harry Fuecks <hfuecks@gmail.com
*
* @param string|array text to replace
* @param string|array replacement text
* @param string|array subject text
* @param integer number of matched and replaced needles will be returned via this parameter which is passed by reference
* @return string if the input was a string
* @return array if the input was an array
*/
public static function str_ireplace($search, $replace, $str, & $count = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_ireplace($search, $replace, $str, $count);
}
/**
* Case-insenstive UTF-8 version of strstr. Returns all of input string
* from the first occurrence of needle to the end.
*
* @see http://php.net/stristr
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string needle
* @return string matched substring if found
* @return boolean FALSE if the substring was not found
*/
public static function stristr($str, $search)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _stristr($str, $search);
}
/**
* Finds the length of the initial segment matching mask.
*
* @see http://php.net/strspn
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string mask for search
* @param integer start position of the string to examine
* @param integer length of the string to examine
* @return integer length of the initial segment that contains characters in the mask
*/
public static function strspn($str, $mask, $offset = NULL, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strspn($str, $mask, $offset, $length);
}
/**
* Finds the length of the initial segment not matching mask.
*
* @see http://php.net/strcspn
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string mask for search
* @param integer start position of the string to examine
* @param integer length of the string to examine
* @return integer length of the initial segment that contains characters not in the mask
*/
public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strcspn($str, $mask, $offset, $length);
}
/**
* Pads a UTF-8 string to a certain length with another string.
*
* @see http://php.net/str_pad
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param integer desired string length after padding
* @param string string to use as padding
* @param string padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
* @return string
*/
public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_pad($str, $final_str_length, $pad_str, $pad_type);
}
/**
* Converts a UTF-8 string to an array.
*
* @see http://php.net/str_split
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param integer maximum length of each chunk
* @return array
*/
public static function str_split($str, $split_length = 1)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_split($str, $split_length);
}
/**
* Reverses a UTF-8 string.
*
* @see http://php.net/strrev
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string string to be reversed
* @return string
*/
public static function strrev($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strrev($str);
}
/**
* Strips whitespace (or other UTF-8 characters) from the beginning and
* end of a string.
*
* @see http://php.net/trim
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string input string
* @param string string of characters to remove
* @return string
*/
public static function trim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _trim($str, $charlist);
}
/**
* Strips whitespace (or other UTF-8 characters) from the beginning of a string.
*
* @see http://php.net/ltrim
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string input string
* @param string string of characters to remove
* @return string
*/
public static function ltrim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ltrim($str, $charlist);
}
/**
* Strips whitespace (or other UTF-8 characters) from the end of a string.
*
* @see http://php.net/rtrim
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string input string
* @param string string of characters to remove
* @return string
*/
public static function rtrim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _rtrim($str, $charlist);
}
/**
* Returns the unicode ordinal for a character.
*
* @see http://php.net/ord
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string UTF-8 encoded character
* @return integer
*/
public static function ord($chr)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ord($chr);
}
/**
* Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
* Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
* Occurrances of the BOM are ignored. Surrogates are not allowed.
*
* The Original Code is Mozilla Communicator client code.
* The Initial Developer of the Original Code is Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
* Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
* Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
*
* @param string UTF-8 encoded string
* @return array unicode code points
* @return boolean FALSE if the string is invalid
*/
public static function to_unicode($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _to_unicode($str);
}
/**
* Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
* Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
* Occurrances of the BOM are ignored. Surrogates are not allowed.
*
* The Original Code is Mozilla Communicator client code.
* The Initial Developer of the Original Code is Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
* Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
* Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
*
* @param array unicode code points representing a string
* @return string utf8 string of characters
* @return boolean FALSE if a code point cannot be found
*/
public static function from_unicode($arr)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _from_unicode($arr);
}
private function __construct()
{
// This is a static class
}
} // End utf8
if (UTF8::$server_utf8 === NULL)
{
// Determine if this server supports UTF-8 natively
UTF8::$server_utf8 = extension_loaded('mbstring');
}

View file

@ -1,3 +1,717 @@
<?php defined('SYSPATH') or die('No direct script access.');
/**
* A port of [phputf8][ref-p8] to a unified set of files. Provides multi-byte
* aware replacement string functions.
*
* For UTF-8 support to work correctly, the following requirements must be met:
*
* - PCRE needs to be compiled with UTF-8 support (--enable-utf8)
* - Support for [Unicode properties][ref-uc] is highly recommended (--enable-unicode-properties)
* - UTF-8 conversion will be much more reliable if the [iconv extension][ref-ie] is loaded
* - The [mbstring extension][ref-mb] is highly recommended, but must not be overloading string functions
*
* *This file is licensed differently from the rest of Kohana. As a port of
* [phputf8][ref-p8], this file is released under the LGPL.*
*
* [ref-p8]: http://phputf8.sourceforge.net/
* [ref-uc]: http://php.net/manual/reference.pcre.pattern.modifiers.php
* [ref-ie]: http://php.net/iconv
* [ref-mb]: http://php.net/mbstring
*
* @package Kohana
* @author Kohana Team
* @copyright (c) 2007-2008 Kohana Team
* @copyright (c) 2005 Harry Fuecks
* @license http://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt
*/
final class UTF8 {
class UTF8 extends Kohana_UTF8 {}
// Does the server support UTF-8 natively?
public static $server_utf8 = NULL;
// Called methods
public static $called = array();
/**
* Recursively cleans arrays, objects, and strings. Removes ASCII control
* codes and converts to the requested charset while silently discarding
* incompatible characters.
*
* @param mixed variable to clean
* @param string character set, defaults to UTF-8
* @return mixed
*/
public static function clean($var, $charset = 'UTF-8')
{
if (is_array($var) OR is_object($var))
{
foreach ($var as $key => $val)
{
// Recursion!
$var[self::clean($key)] = self::clean($val);
}
}
elseif (is_string($var) AND $var !== '')
{
// Remove control characters
$var = self::strip_ascii_ctrl($var);
if ( ! self::is_ascii($var))
{
// Disable notices
$ER = error_reporting(~E_NOTICE);
// iconv is expensive, so it is only used when needed
$var = iconv($charset, $charset.'//IGNORE', $var);
// Turn notices back on
error_reporting($ER);
}
}
return $var;
}
/**
* Tests whether a string contains only 7-bit ASCII bytes. This is used to
* determine when to use native functions or UTF-8 functions.
*
* @param string string to check
* @return bool
*/
public static function is_ascii($str)
{
return ! preg_match('/[^\x00-\x7F]/S', $str);
}
/**
* Strips out device control codes in the ASCII range.
*
* @param string string to clean
* @return string
*/
public static function strip_ascii_ctrl($str)
{
return preg_replace('/[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]+/S', '', $str);
}
/**
* Strips out all non-7bit ASCII bytes.
*
* @param string string to clean
* @return string
*/
public static function strip_non_ascii($str)
{
return preg_replace('/[^\x00-\x7F]+/S', '', $str);
}
/**
* Replaces special/accented UTF-8 characters by ASCII-7 'equivalents'.
*
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string string to transliterate
* @param integer -1 lowercase only, +1 uppercase only, 0 both cases
* @return string
*/
public static function transliterate_to_ascii($str, $case = 0)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _transliterate_to_ascii($str, $case);
}
/**
* Returns the length of the given string.
*
* @see http://php.net/strlen
*
* @param string string being measured for length
* @return integer
*/
public static function strlen($str)
{
if (UTF8::$server_utf8)
return mb_strlen($str);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strlen($str);
}
/**
* Finds position of first occurrence of a UTF-8 string.
*
* @see http://php.net/strlen
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string haystack
* @param string needle
* @param integer offset from which character in haystack to start searching
* @return integer position of needle
* @return boolean FALSE if the needle is not found
*/
public static function strpos($str, $search, $offset = 0)
{
if (UTF8::$server_utf8)
return mb_strpos($str, $search, $offset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strpos($str, $search, $offset);
}
/**
* Finds position of last occurrence of a char in a UTF-8 string.
*
* @see http://php.net/strrpos
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string haystack
* @param string needle
* @param integer offset from which character in haystack to start searching
* @return integer position of needle
* @return boolean FALSE if the needle is not found
*/
public static function strrpos($str, $search, $offset = 0)
{
if (UTF8::$server_utf8)
return mb_strrpos($str, $search, $offset);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strrpos($str, $search, $offset);
}
/**
* Returns part of a UTF-8 string.
*
* @see http://php.net/substr
* @author Chris Smith <chris@jalakai.co.uk>
*
* @param string input string
* @param integer offset
* @param integer length limit
* @return string
*/
public static function substr($str, $offset, $length = NULL)
{
if (UTF8::$server_utf8)
return ($length === NULL) ? mb_substr($str, $offset) : mb_substr($str, $offset, $length);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _substr($str, $offset, $length);
}
/**
* Replaces text within a portion of a UTF-8 string.
*
* @see http://php.net/substr_replace
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string replacement string
* @param integer offset
* @return string
*/
public static function substr_replace($str, $replacement, $offset, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _substr_replace($str, $replacement, $offset, $length);
}
/**
* Makes a UTF-8 string lowercase.
*
* @see http://php.net/strtolower
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string mixed case string
* @return string
*/
public static function strtolower($str)
{
if (UTF8::$server_utf8)
return mb_strtolower($str);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strtolower($str);
}
/**
* Makes a UTF-8 string uppercase.
*
* @see http://php.net/strtoupper
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string mixed case string
* @return string
*/
public static function strtoupper($str)
{
if (UTF8::$server_utf8)
return mb_strtoupper($str);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strtoupper($str);
}
/**
* Makes a UTF-8 string's first character uppercase.
*
* @see http://php.net/ucfirst
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string mixed case string
* @return string
*/
public static function ucfirst($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ucfirst($str);
}
/**
* Makes the first character of every word in a UTF-8 string uppercase.
*
* @see http://php.net/ucwords
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string mixed case string
* @return string
*/
public static function ucwords($str)
{
if (UTF8::$server_utf8)
return mb_convert_case($str, MB_CASE_TITLE);
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ucwords($str);
}
/**
* Case-insensitive UTF-8 string comparison.
*
* @see http://php.net/strcasecmp
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string string to compare
* @param string string to compare
* @return integer less than 0 if str1 is less than str2
* @return integer greater than 0 if str1 is greater than str2
* @return integer 0 if they are equal
*/
public static function strcasecmp($str1, $str2)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strcasecmp($str1, $str2);
}
/**
* Returns a string or an array with all occurrences of search in subject
* (ignoring case) and replaced with the given replace value.
*
* **Note:** This function is very slow compared to the native version.
*
* @see http://php.net/str_ireplace
* @author Harry Fuecks <hfuecks@gmail.com
*
* @param string|array text to replace
* @param string|array replacement text
* @param string|array subject text
* @param integer number of matched and replaced needles will be returned via this parameter which is passed by reference
* @return string if the input was a string
* @return array if the input was an array
*/
public static function str_ireplace($search, $replace, $str, & $count = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_ireplace($search, $replace, $str, $count);
}
/**
* Case-insenstive UTF-8 version of strstr. Returns all of input string
* from the first occurrence of needle to the end.
*
* @see http://php.net/stristr
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string needle
* @return string matched substring if found
* @return boolean FALSE if the substring was not found
*/
public static function stristr($str, $search)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _stristr($str, $search);
}
/**
* Finds the length of the initial segment matching mask.
*
* @see http://php.net/strspn
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string mask for search
* @param integer start position of the string to examine
* @param integer length of the string to examine
* @return integer length of the initial segment that contains characters in the mask
*/
public static function strspn($str, $mask, $offset = NULL, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strspn($str, $mask, $offset, $length);
}
/**
* Finds the length of the initial segment not matching mask.
*
* @see http://php.net/strcspn
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param string mask for search
* @param integer start position of the string to examine
* @param integer length of the string to examine
* @return integer length of the initial segment that contains characters not in the mask
*/
public static function strcspn($str, $mask, $offset = NULL, $length = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strcspn($str, $mask, $offset, $length);
}
/**
* Pads a UTF-8 string to a certain length with another string.
*
* @see http://php.net/str_pad
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param integer desired string length after padding
* @param string string to use as padding
* @param string padding type: STR_PAD_RIGHT, STR_PAD_LEFT, or STR_PAD_BOTH
* @return string
*/
public static function str_pad($str, $final_str_length, $pad_str = ' ', $pad_type = STR_PAD_RIGHT)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_pad($str, $final_str_length, $pad_str, $pad_type);
}
/**
* Converts a UTF-8 string to an array.
*
* @see http://php.net/str_split
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string input string
* @param integer maximum length of each chunk
* @return array
*/
public static function str_split($str, $split_length = 1)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _str_split($str, $split_length);
}
/**
* Reverses a UTF-8 string.
*
* @see http://php.net/strrev
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string string to be reversed
* @return string
*/
public static function strrev($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _strrev($str);
}
/**
* Strips whitespace (or other UTF-8 characters) from the beginning and
* end of a string.
*
* @see http://php.net/trim
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string input string
* @param string string of characters to remove
* @return string
*/
public static function trim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _trim($str, $charlist);
}
/**
* Strips whitespace (or other UTF-8 characters) from the beginning of a string.
*
* @see http://php.net/ltrim
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string input string
* @param string string of characters to remove
* @return string
*/
public static function ltrim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ltrim($str, $charlist);
}
/**
* Strips whitespace (or other UTF-8 characters) from the end of a string.
*
* @see http://php.net/rtrim
* @author Andreas Gohr <andi@splitbrain.org>
*
* @param string input string
* @param string string of characters to remove
* @return string
*/
public static function rtrim($str, $charlist = NULL)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _rtrim($str, $charlist);
}
/**
* Returns the unicode ordinal for a character.
*
* @see http://php.net/ord
* @author Harry Fuecks <hfuecks@gmail.com>
*
* @param string UTF-8 encoded character
* @return integer
*/
public static function ord($chr)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _ord($chr);
}
/**
* Takes an UTF-8 string and returns an array of ints representing the Unicode characters.
* Astral planes are supported i.e. the ints in the output can be > 0xFFFF.
* Occurrances of the BOM are ignored. Surrogates are not allowed.
*
* The Original Code is Mozilla Communicator client code.
* The Initial Developer of the Original Code is Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
* Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
* Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>
*
* @param string UTF-8 encoded string
* @return array unicode code points
* @return boolean FALSE if the string is invalid
*/
public static function to_unicode($str)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _to_unicode($str);
}
/**
* Takes an array of ints representing the Unicode characters and returns a UTF-8 string.
* Astral planes are supported i.e. the ints in the input can be > 0xFFFF.
* Occurrances of the BOM are ignored. Surrogates are not allowed.
*
* The Original Code is Mozilla Communicator client code.
* The Initial Developer of the Original Code is Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998 the Initial Developer.
* Ported to PHP by Henri Sivonen <hsivonen@iki.fi>, see http://hsivonen.iki.fi/php-utf8/
* Slight modifications to fit with phputf8 library by Harry Fuecks <hfuecks@gmail.com>.
*
* @param array unicode code points representing a string
* @return string utf8 string of characters
* @return boolean FALSE if a code point cannot be found
*/
public static function from_unicode($arr)
{
if ( ! isset(self::$called[__FUNCTION__]))
{
require SYSPATH.'utf8/'.__FUNCTION__.EXT;
// Function has been called
self::$called[__FUNCTION__] = TRUE;
}
return _from_unicode($arr);
}
private function __construct()
{
// This is a static class
}
} // End utf8
if (UTF8::$server_utf8 === NULL)
{
// Determine if this server supports UTF-8 natively
UTF8::$server_utf8 = extension_loaded('mbstring');
}