php 汉字转拼音类,完善的汉字转拼音php转换类

段干俊茂

2023-12-01

办法是利用矩阵，汉字的组成方式是两个ascii字符，一个高位码，一个低位码，界限分别是128-264 64-128

每个汉字拼音最长为8个字符，由此组成二维矩阵进行查询，弊端是无法解决多音字问题

class pinyin{

/*

是否将拼音文件读取到内存内，损耗少许内存,几百kb的样子，速度可以略有提升，

*/

var $ismemorycache = 1;

/*

是否只获取首字母

*/

var $isfrist = 1;

/*

拼音矩阵文件地址

*/

var $path = "py.qdb";

/*

内存拼音矩阵

*/

var $memorycache;

/*

拼音文件句柄

*/

var $handle;

/*

转换发生错误盒子

*/

var $errormsgbox;

/*

转换结果

*/

var $result;

var $array = array();

var $n_t = array("ā" => "a","á" => "a","ǎ" => "a","à" => "a","ɑ" => "a",

"ō" => "o","ó" => "o","ǒ" => "o","ò" => "o",

"ē" => "e","é" => "e","ě" => "e","è" => "e","ê" => "e",

"ī" => "i","í" => "i","ǐ" => "i","ì" => "i",

"ū" => "u","ú" => "u","ǔ" => "u","ù" => "u",

"ǖ" => "v","ǘ" => "v","ǚ" => "v","ǜ" => "v","ü" => "v"

);

/*

转换入口

@params $str 所需转换字符,$istonemark 是否保留音标 $suffix 尾缀,默认为空格

*/

function chinesetopinyin($str,$istonemark = 0,$suffix = ""){

$this->py($str,$istonemark,$suffix);

return $this -> result;

}

function get(){

return $this -> result;

}

function py($str,$n = 0,$s = ""){

$strlength = strlen($str);

if($strlength == 0){ return ""; }

$this->result = "";

if(is_array($str)){

foreach($str as $key => $val){

$str[$key] = $this->py($val,$n,$s);

}

return;

}

if(empty($this->handle)){

if(!file_exists($this->path)){

$this->addoneerrormsg(1,"拼音文件路径不存在");

return false;

}

if(is_array($str)){

foreach($str as $key => $val){

$str[$key] = $this->py($val,$n,$s);

}

}

if($this -> ismemorycache){

if(!$this->memorycache){

$this->memorycache = file_get_contents($this->path);

for($i = 0 ; $i < $strlength ; $i ){

$ord1 = ord(substr($str,$i,1));

if($ord1 > 128){

$ord2 = ord(substr($str, $i, 1));

if(!isset($this->array[$ord1][$ord2])){

$leng = ($ord1 - 129) * ((254 - 63) * 8 2) ($ord2 - 64) * 8;

$this->array[$ord1][$ord2] = trim(substr($this->memorycache,$leng,8));

}

$strtrlen = $this->isfrist ? 1 : 8;

$this->result .= substr($this ->array[$ord1][$ord2],0,$strtrlen).$s;

}else{

$this->result .= substr($str,$i,1);

}

}

}

}else{

$this->handle = fopen($this->path,"r");

for($i = 0 ; $i < $strlength ; $i ){

$ord1 = ord(substr($str,$i,1));

if($ord1 > 128){

$ord2 = ord(substr($str, $i, 1));

if(!isset($this->array[$ord1][$ord2])){

$leng = ($ord1 - 129) * ((254 - 63) * 8 2) ($ord2 - 64) * 8;

fseek($this -> handle,$leng);

$this->array[$ord1][$ord2] = trim(fgets($this->handle,8));

}

$strtrlen = $this->isfrist ? 1 : 8;

$this->result .= substr($this ->array[$ord1][$ord2],0,$strtrlen).$s;

}else{ $this->result .= substr($str,$i,1); }

}

}

if(!$n){ $this -> result = strtr($this -> result,$this -> n_t);}

}

}

function addoneerrormsg($no,$reason){

$this->errormsgbox[] = "error:" . $no . "," . $reason;

}

function showerrormsg(){

foreach($this->errormsgbox as $val){

echo $val."rnrn";

}

}

function __destruct(){

if(is_array($this->errormsgbox)){

$this->showerrormsg();

}

}

}

之前遇见过这个难题，发现流传的代码都不怎么完善，汉字库总共有20k 的汉字，大多数的是拿几百个常用汉字打算糊弄过去，在火星文流传的今天，是不行的。

还有种读取词典然后转换的，每行一个汉字|拼音，这种弊端非常大，速度慢，耗费巨大内存，仅仅explode一下读入数组，再循环一次，就能耗费上百m的内存，如果一个单页面耗费上百m，负载稍微大点只能泪奔了。

php 汉字转拼音类,完善的汉字转拼音php转换类

相关阅读

相关文章

相关问答

相关文档