UNPKG

phpjs

Version:

php.js offers community built php functions in javascript

60 lines (56 loc) 2.7 kB
function strlen(string) { // From: http://phpjs.org/functions // + original by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) // + improved by: Sakimori // + input by: Kirk Strobeck // + improved by: Kevin van Zonneveld (http://kevin.vanzonneveld.net) // + bugfixed by: Onno Marsman // + revised by: Brett Zamir (http://brett-zamir.me) // % note 1: May look like overkill, but in order to be truly faithful to handling all Unicode // % note 1: characters and to this function in PHP which does not count the number of bytes // % note 1: but counts the number of characters, something like this is really necessary. // * example 1: strlen('Kevin van Zonneveld'); // * returns 1: 19 // * example 2: ini_set('unicode.semantics', 'on'); // * example 2: strlen('A\ud87e\udc04Z'); // * returns 2: 3 var str = string + ''; var i = 0, chr = '', lgth = 0; if (!this.php_js || !this.php_js.ini || !this.php_js.ini['unicode.semantics'] || this.php_js.ini['unicode.semantics'].local_value.toLowerCase() !== 'on') { return string.length; } var getWholeChar = function(str, i) { var code = str.charCodeAt(i); var next = '', prev = ''; if (0xD800 <= code && code <= 0xDBFF) { // High surrogate (could change last hex to 0xDB7F to treat high private surrogates as single characters) if (str.length <= (i + 1)) { throw 'High surrogate without following low surrogate'; } next = str.charCodeAt(i + 1); if (0xDC00 > next || next > 0xDFFF) { throw 'High surrogate without following low surrogate'; } return str.charAt(i) + str.charAt(i + 1); } else if (0xDC00 <= code && code <= 0xDFFF) { // Low surrogate if (i === 0) { throw 'Low surrogate without preceding high surrogate'; } prev = str.charCodeAt(i - 1); if (0xD800 > prev || prev > 0xDBFF) { //(could change last hex to 0xDB7F to treat high private surrogates as single characters) throw 'Low surrogate without preceding high surrogate'; } return false; // We can pass over low surrogates now as the second component in a pair which we have already processed } return str.charAt(i); }; for (i = 0, lgth = 0; i < str.length; i++) { if ((chr = getWholeChar(str, i)) === false) { continue; } // Adapt this line at the top of any loop, passing in the whole string and the current iteration and returning a variable to represent the individual character; purpose is to treat the first part of a surrogate pair as the whole character and then ignore the second part lgth++; } return lgth; }