国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
Du kan inte välja fler än 25 ämnen Ämnen måste starta med en bokstav eller siffra, kan innehålla bindestreck ('-') och vara max 35 tecken långa.

410 lines
11KB

  1. <?php if(!defined('DEDEINC')) exit('dedecms');
  2. /**
  3. * DedeCMS中用到的字符编码转换的小助手函数
  4. *
  5. * @version $Id: charset.helper.php 1 2010-07-05 11:43:09Z tianya $
  6. * @package DedeCMS.Helpers
  7. * @copyright Copyright (c) 2020, DedeBIZ.COM
  8. * @license https://www.dedebiz.com/license
  9. * @link https://www.dedebiz.com
  10. */
  11. $UC2GBTABLE = $CODETABLE = $BIG5_DATA = $GB_DATA = '';
  12. $GbkUniDic = null;
  13. /**
  14. * UTF-8 转GB编码
  15. *
  16. * @access public
  17. * @param string $utfstr 需要转换的字符串
  18. * @return string
  19. */
  20. if ( ! function_exists('utf82gb'))
  21. {
  22. function utf82gb($utfstr)
  23. {
  24. if(function_exists('iconv'))
  25. {
  26. return iconv('utf-8','gbk//ignore',$utfstr);
  27. }
  28. global $UC2GBTABLE;
  29. $okstr = "";
  30. if(trim($utfstr)=="")
  31. {
  32. return $utfstr;
  33. }
  34. if(empty($UC2GBTABLE))
  35. {
  36. $filename = DEDEINC."/data/gb2312-utf8.dat";
  37. $fp = fopen($filename,"r");
  38. while($l = fgets($fp,15))
  39. {
  40. $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));
  41. }
  42. fclose($fp);
  43. }
  44. $okstr = "";
  45. $ulen = strlen($utfstr);
  46. for($i=0;$i<$ulen;$i++)
  47. {
  48. $c = $utfstr[$i];
  49. $cb = decbin(ord($utfstr[$i]));
  50. if(strlen($cb)==8)
  51. {
  52. $csize = strpos(decbin(ord($cb)),"0");
  53. for($j=0;$j < $csize;$j++)
  54. {
  55. $i++; $c .= $utfstr[$i];
  56. }
  57. $c = utf82u($c);
  58. if(isset($UC2GBTABLE[$c]))
  59. {
  60. $c = dechex($UC2GBTABLE[$c]+0x8080);
  61. $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
  62. }
  63. else
  64. {
  65. $okstr .= "&#".$c.";";
  66. }
  67. }
  68. else
  69. {
  70. $okstr .= $c;
  71. }
  72. }
  73. $okstr = trim($okstr);
  74. return $okstr;
  75. }
  76. }
  77. /**
  78. * GB转UTF-8编码
  79. *
  80. * @access public
  81. * @param string $gbstr gbk的字符串
  82. * @return string
  83. */
  84. if ( ! function_exists('gb2utf8'))
  85. {
  86. function gb2utf8($gbstr)
  87. {
  88. if(function_exists('iconv'))
  89. {
  90. return iconv('gbk','utf-8//ignore',$gbstr);
  91. }
  92. global $CODETABLE;
  93. if(trim($gbstr)=="")
  94. {
  95. return $gbstr;
  96. }
  97. if(empty($CODETABLE))
  98. {
  99. $filename = DEDEINC."/data/gb2312-utf8.dat";
  100. $fp = fopen($filename,"r");
  101. while ($l = fgets($fp,15))
  102. {
  103. $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6);
  104. }
  105. fclose($fp);
  106. }
  107. $ret = "";
  108. $utf8 = "";
  109. while ($gbstr != '')
  110. {
  111. if (ord(substr($gbstr, 0, 1)) > 0x80)
  112. {
  113. $thisW = substr($gbstr, 0, 2);
  114. $gbstr = substr($gbstr, 2, strlen($gbstr));
  115. $utf8 = "";
  116. @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
  117. if($utf8!="")
  118. {
  119. for ($i = 0;$i < strlen($utf8);$i += 3)
  120. $ret .= chr(substr($utf8, $i, 3));
  121. }
  122. }
  123. else
  124. {
  125. $ret .= substr($gbstr, 0, 1);
  126. $gbstr = substr($gbstr, 1, strlen($gbstr));
  127. }
  128. }
  129. return $ret;
  130. }
  131. }
  132. /**
  133. * Unicode转utf8
  134. *
  135. * @access public
  136. * @param string $c Unicode的字符串内容
  137. * @return string
  138. */
  139. if ( ! function_exists('u2utf8'))
  140. {
  141. function u2utf8($c)
  142. {
  143. for ($i = 0;$i < count($c);$i++)
  144. {
  145. $str = "";
  146. }
  147. if ($c < 0x80)
  148. {
  149. $str .= $c;
  150. }
  151. else if ($c < 0x800)
  152. {
  153. $str .= (0xC0 | $c >> 6);
  154. $str .= (0x80 | $c & 0x3F);
  155. }
  156. else if ($c < 0x10000)
  157. {
  158. $str .= (0xE0 | $c >> 12);
  159. $str .= (0x80 | $c >> 6 & 0x3F);
  160. $str .= (0x80 | $c & 0x3F);
  161. }
  162. else if ($c < 0x200000)
  163. {
  164. $str .= (0xF0 | $c >> 18);
  165. $str .= (0x80 | $c >> 12 & 0x3F);
  166. $str .= (0x80 | $c >> 6 & 0x3F);
  167. $str .= (0x80 | $c & 0x3F);
  168. }
  169. return $str;
  170. }
  171. }
  172. /**
  173. * utf8转Unicode
  174. *
  175. * @access public
  176. * @param string $c UTF-8的字符串信息
  177. * @return string
  178. */
  179. if ( ! function_exists('utf82u'))
  180. {
  181. function utf82u($c)
  182. {
  183. switch(strlen($c))
  184. {
  185. case 1:
  186. return ord($c);
  187. case 2:
  188. $n = (ord($c[0]) & 0x3f) << 6;
  189. $n += ord($c[1]) & 0x3f;
  190. return $n;
  191. case 3:
  192. $n = (ord($c[0]) & 0x1f) << 12;
  193. $n += (ord($c[1]) & 0x3f) << 6;
  194. $n += ord($c[2]) & 0x3f;
  195. return $n;
  196. case 4:
  197. $n = (ord($c[0]) & 0x0f) << 18;
  198. $n += (ord($c[1]) & 0x3f) << 12;
  199. $n += (ord($c[2]) & 0x3f) << 6;
  200. $n += ord($c[3]) & 0x3f;
  201. return $n;
  202. }
  203. }
  204. }
  205. /**
  206. * Big5码转换成GB码
  207. *
  208. * @access public
  209. * @param string $Text 字符串内容
  210. * @return string
  211. */
  212. if ( ! function_exists('big52gb'))
  213. {
  214. function big52gb($Text)
  215. {
  216. if(function_exists('iconv'))
  217. {
  218. return iconv('big5','gbk//ignore',$Text);
  219. }
  220. global $BIG5_DATA;
  221. if(empty($BIG5_DATA))
  222. {
  223. $filename = DEDEINC."/data/big5-gb.dat";
  224. $fp = fopen($filename, "rb");
  225. $BIG5_DATA = fread($fp,filesize($filename));
  226. fclose($fp);
  227. }
  228. $max = strlen($Text)-1;
  229. for($i=0;$i<$max;$i++)
  230. {
  231. $h = ord($Text[$i]);
  232. if($h>=0x80)
  233. {
  234. $l = ord($Text[$i+1]);
  235. if($h==161 && $l==64)
  236. {
  237. $gbstr = " ";
  238. }
  239. else
  240. {
  241. $p = ($h-160)*510+($l-1)*2;
  242. $gbstr = $BIG5_DATA[$p].$BIG5_DATA[$p+1];
  243. }
  244. $Text[$i] = $gbstr[0];
  245. $Text[$i+1] = $gbstr[1];
  246. $i++;
  247. }
  248. }
  249. return $Text;
  250. }
  251. }
  252. /**
  253. * GB码转换成Big5码
  254. *
  255. * @access public
  256. * @param string $Text 字符串内容
  257. * @return string
  258. */
  259. if ( ! function_exists('gb2big5'))
  260. {
  261. function gb2big5($Text)
  262. {
  263. if(function_exists('iconv'))
  264. {
  265. return iconv('gbk','big5//ignore',$Text);
  266. }
  267. global $GB_DATA;
  268. if(empty($GB_DATA))
  269. {
  270. $filename = DEDEINC."/data/gb-big5.dat";
  271. $fp = fopen($filename, "rb");
  272. $gb = fread($fp,filesize($filename));
  273. fclose($fp);
  274. }
  275. $max = strlen($Text)-1;
  276. for($i=0;$i<$max;$i++)
  277. {
  278. $h = ord($Text[$i]);
  279. if($h>=0x80)
  280. {
  281. $l = ord($Text[$i+1]);
  282. if($h==161 && $l==64)
  283. {
  284. $big = " ";
  285. }
  286. else
  287. {
  288. $p = ($h-160)*510+($l-1)*2;
  289. $big = $GB_DATA[$p].$GB_DATA[$p+1];
  290. }
  291. $Text[$i] = $big[0];
  292. $Text[$i+1] = $big[1];
  293. $i++;
  294. }
  295. }
  296. return $Text;
  297. }
  298. }
  299. /**
  300. * unicode url编码转gbk编码函数
  301. *
  302. * @access public
  303. * @param string $str 转换的内容
  304. * @return string
  305. */
  306. if ( ! function_exists('UnicodeUrl2Gbk'))
  307. {
  308. function UnicodeUrl2Gbk($str)
  309. {
  310. //载入对照词典
  311. if(!isset($GLOBALS['GbkUniDic']))
  312. {
  313. $fp = fopen(DEDEINC.'/data/gbk-unicode.dat','rb');
  314. while(!feof($fp))
  315. {
  316. $GLOBALS['GbkUniDic'][bin2hex(fread($fp,2))] = fread($fp,2);
  317. }
  318. fclose($fp);
  319. }
  320. //处理字符串
  321. $str = str_replace('$#$','+',$str);
  322. $glen = strlen($str);
  323. $okstr = "";
  324. for($i=0; $i < $glen; $i++)
  325. {
  326. if($glen-$i > 4)
  327. {
  328. if($str[$i]=='%' && $str[$i+1]=='u')
  329. {
  330. $uni = strtolower(substr($str,$i+2,4));
  331. $i = $i+5;
  332. if(isset($GLOBALS['GbkUniDic'][$uni]))
  333. {
  334. $okstr .= $GLOBALS['GbkUniDic'][$uni];
  335. }
  336. else
  337. {
  338. $okstr .= "&#".hexdec('0x'.$uni).";";
  339. }
  340. }
  341. else
  342. {
  343. $okstr .= $str[$i];
  344. }
  345. }
  346. else
  347. {
  348. $okstr .= $str[$i];
  349. }
  350. }
  351. return $okstr;
  352. }
  353. }
  354. /**
  355. * 自动转换字符集 支持数组转换
  356. *
  357. * @access public
  358. * @param string $str 转换的内容
  359. * @return string
  360. */
  361. if ( ! function_exists('AutoCharset'))
  362. {
  363. function AutoCharset($fContents, $from='gbk', $to='utf-8')
  364. {
  365. $from = strtoupper($from)=='UTF8'? 'utf-8' : $from;
  366. $to = strtoupper($to)=='UTF8'? 'utf-8' : $to;
  367. if( strtoupper($from) === strtoupper($to) || empty($fContents) || (is_scalar($fContents) && !is_string($fContents)) ){
  368. //如果编码相同或者非字符串标量则不转换
  369. return $fContents;
  370. }
  371. if(is_string($fContents) )
  372. {
  373. if(function_exists('mb_convert_encoding'))
  374. {
  375. return mb_convert_encoding ($fContents, $to, $from);
  376. } elseif (function_exists('iconv'))
  377. {
  378. return iconv($from, $to, $fContents);
  379. } else {
  380. return $fContents;
  381. }
  382. }
  383. elseif(is_array($fContents))
  384. {
  385. foreach ( $fContents as $key => $val )
  386. {
  387. $_key = AutoCharset($key,$from,$to);
  388. $fContents[$_key] = AutoCharset($val,$from,$to);
  389. if($key != $_key )
  390. unset($fContents[$key]);
  391. }
  392. return $fContents;
  393. }
  394. else{
  395. return $fContents;
  396. }
  397. }
  398. }