view goodreads/UTFUtil.php @ 24:341d8c681357

prepare for temporary closure
author Robert Boland <robert@markup.co.uk>
date Tue, 01 Jan 2019 07:29:44 -0500
parents 077b0a0a3e6d
children
line wrap: on
line source

<?PHP
function convert_utf8($str){
    if(!seems_utf8($str))
        return mb_convert_encoding($str, 'UTF-8');
    return $str;
}

function seems_utf8($Str) {
 for ($i=0; $i<strlen($Str); $i++) {
  if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
  elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
  elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
  elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
  elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
  elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
  else return false; # Does not match any model
  for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
   if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80))
   return false;
  }
 }
 return true;
}
?>