国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

341 lines
10KB

  1. <?php
  2. if (!defined('DEDEINC')) exit('dedebiz');
  3. /**
  4. * DedeBIZV6中用到的字符编码转换的小助手函数
  5. *
  6. * @version $Id: charset.helper.php 1 2010-07-05 11:43:09Z tianya $
  7. * @package DedeBIZ.Helpers
  8. * @copyright Copyright (c) 2022, DedeBIZ.COM
  9. * @license https://www.dedebiz.com/license
  10. * @link https://www.dedebiz.com
  11. */
  12. $UC2GBTABLE = $CODETABLE = $BIG5_DATA = $GB_DATA = '';
  13. $GbkUniDic = null;
  14. /**
  15. * UTF-8 转GB编码
  16. *
  17. * @access public
  18. * @param string $utfstr 需要转换的字符串
  19. * @return string
  20. */
  21. if (!function_exists('utf82gb')) {
  22. function utf82gb($utfstr)
  23. {
  24. if (function_exists('iconv')) {
  25. return iconv('utf-8', 'gbk//ignore', $utfstr);
  26. }
  27. global $UC2GBTABLE;
  28. $okstr = "";
  29. if (trim($utfstr) == "") {
  30. return $utfstr;
  31. }
  32. if (empty($UC2GBTABLE)) {
  33. $filename = DEDEINC."/data/gb2312-utf8.dat";
  34. $fp = fopen($filename, "r");
  35. while ($l = fgets($fp, 15)) {
  36. $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));
  37. }
  38. fclose($fp);
  39. }
  40. $okstr = "";
  41. $ulen = strlen($utfstr);
  42. for ($i = 0; $i < $ulen; $i++) {
  43. $c = $utfstr[$i];
  44. $cb = decbin(ord($utfstr[$i]));
  45. if (strlen($cb) == 8) {
  46. $csize = strpos(decbin(ord($cb)), "0");
  47. for ($j = 0; $j < $csize; $j++) {
  48. $i++;
  49. $c .= $utfstr[$i];
  50. }
  51. $c = utf82u($c);
  52. if (isset($UC2GBTABLE[$c])) {
  53. $c = dechex($UC2GBTABLE[$c] + 0x8080);
  54. $okstr .= chr(hexdec($c[0].$c[1])).chr(hexdec($c[2].$c[3]));
  55. } else {
  56. $okstr .= "&#".$c.";";
  57. }
  58. } else {
  59. $okstr .= $c;
  60. }
  61. }
  62. $okstr = trim($okstr);
  63. return $okstr;
  64. }
  65. }
  66. /**
  67. * GB转UTF-8编码
  68. *
  69. * @access public
  70. * @param string $gbstr gbk的字符串
  71. * @return string
  72. */
  73. if (!function_exists('gb2utf8')) {
  74. function gb2utf8($gbstr)
  75. {
  76. if (function_exists('iconv')) {
  77. return iconv('gbk', 'utf-8//ignore', $gbstr);
  78. }
  79. global $CODETABLE;
  80. if (trim($gbstr) == "") {
  81. return $gbstr;
  82. }
  83. if (empty($CODETABLE)) {
  84. $filename = DEDEINC."/data/gb2312-utf8.dat";
  85. $fp = fopen($filename, "r");
  86. while ($l = fgets($fp, 15)) {
  87. $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6);
  88. }
  89. fclose($fp);
  90. }
  91. $ret = "";
  92. $utf8 = "";
  93. while ($gbstr != '') {
  94. if (ord(substr($gbstr, 0, 1)) > 0x80) {
  95. $thisW = substr($gbstr, 0, 2);
  96. $gbstr = substr($gbstr, 2, strlen($gbstr));
  97. $utf8 = "";
  98. @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
  99. if ($utf8 != "") {
  100. for ($i = 0; $i < strlen($utf8); $i += 3)
  101. $ret .= chr(substr($utf8, $i, 3));
  102. }
  103. } else {
  104. $ret .= substr($gbstr, 0, 1);
  105. $gbstr = substr($gbstr, 1, strlen($gbstr));
  106. }
  107. }
  108. return $ret;
  109. }
  110. }
  111. /**
  112. * Unicode转utf8
  113. *
  114. * @access public
  115. * @param string $c Unicode的字符串内容
  116. * @return string
  117. */
  118. if (!function_exists('u2utf8')) {
  119. function u2utf8($c)
  120. {
  121. for ($i = 0; $i < count($c); $i++) {
  122. $str = "";
  123. }
  124. if ($c < 0x80) {
  125. $str .= $c;
  126. } else if ($c < 0x800) {
  127. $str .= (0xC0 | $c >> 6);
  128. $str .= (0x80 | $c & 0x3F);
  129. } else if ($c < 0x10000) {
  130. $str .= (0xE0 | $c >> 12);
  131. $str .= (0x80 | $c >> 6 & 0x3F);
  132. $str .= (0x80 | $c & 0x3F);
  133. } else if ($c < 0x200000) {
  134. $str .= (0xF0 | $c >> 18);
  135. $str .= (0x80 | $c >> 12 & 0x3F);
  136. $str .= (0x80 | $c >> 6 & 0x3F);
  137. $str .= (0x80 | $c & 0x3F);
  138. }
  139. return $str;
  140. }
  141. }
  142. /**
  143. * utf8转Unicode
  144. *
  145. * @access public
  146. * @param string $c UTF-8的字符串信息
  147. * @return string
  148. */
  149. if (!function_exists('utf82u')) {
  150. function utf82u($c)
  151. {
  152. switch (strlen($c)) {
  153. case 1:
  154. return ord($c);
  155. case 2:
  156. $n = (ord($c[0]) & 0x3f) << 6;
  157. $n += ord($c[1]) & 0x3f;
  158. return $n;
  159. case 3:
  160. $n = (ord($c[0]) & 0x1f) << 12;
  161. $n += (ord($c[1]) & 0x3f) << 6;
  162. $n += ord($c[2]) & 0x3f;
  163. return $n;
  164. case 4:
  165. $n = (ord($c[0]) & 0x0f) << 18;
  166. $n += (ord($c[1]) & 0x3f) << 12;
  167. $n += (ord($c[2]) & 0x3f) << 6;
  168. $n += ord($c[3]) & 0x3f;
  169. return $n;
  170. }
  171. }
  172. }
  173. /**
  174. * Big5码转换成GB码
  175. *
  176. * @access public
  177. * @param string $Text 字符串内容
  178. * @return string
  179. */
  180. if (!function_exists('big52gb')) {
  181. function big52gb($Text)
  182. {
  183. if (function_exists('iconv')) {
  184. return iconv('big5', 'gbk//ignore', $Text);
  185. }
  186. global $BIG5_DATA;
  187. if (empty($BIG5_DATA)) {
  188. $filename = DEDEINC."/data/big5-gb.dat";
  189. $fp = fopen($filename, "rb");
  190. $BIG5_DATA = fread($fp, filesize($filename));
  191. fclose($fp);
  192. }
  193. $max = strlen($Text) - 1;
  194. for ($i = 0; $i < $max; $i++) {
  195. $h = ord($Text[$i]);
  196. if ($h >= 0x80) {
  197. $l = ord($Text[$i + 1]);
  198. if ($h == 161 && $l == 64) {
  199. $gbstr = " ";
  200. } else {
  201. $p = ($h - 160) * 510 + ($l - 1) * 2;
  202. $gbstr = $BIG5_DATA[$p].$BIG5_DATA[$p + 1];
  203. }
  204. $Text[$i] = $gbstr[0];
  205. $Text[$i + 1] = $gbstr[1];
  206. $i++;
  207. }
  208. }
  209. return $Text;
  210. }
  211. }
  212. /**
  213. * GB码转换成Big5码
  214. *
  215. * @access public
  216. * @param string $Text 字符串内容
  217. * @return string
  218. */
  219. if (!function_exists('gb2big5')) {
  220. function gb2big5($Text)
  221. {
  222. if (function_exists('iconv')) {
  223. return iconv('gbk', 'big5//ignore', $Text);
  224. }
  225. global $GB_DATA;
  226. if (empty($GB_DATA)) {
  227. $filename = DEDEINC."/data/gb-big5.dat";
  228. $fp = fopen($filename, "rb");
  229. $gb = fread($fp, filesize($filename));
  230. fclose($fp);
  231. }
  232. $max = strlen($Text) - 1;
  233. for ($i = 0; $i < $max; $i++) {
  234. $h = ord($Text[$i]);
  235. if ($h >= 0x80) {
  236. $l = ord($Text[$i + 1]);
  237. if ($h == 161 && $l == 64) {
  238. $big = " ";
  239. } else {
  240. $p = ($h - 160) * 510 + ($l - 1) * 2;
  241. $big = $GB_DATA[$p].$GB_DATA[$p + 1];
  242. }
  243. $Text[$i] = $big[0];
  244. $Text[$i + 1] = $big[1];
  245. $i++;
  246. }
  247. }
  248. return $Text;
  249. }
  250. }
  251. /**
  252. * unicode url编码转gbk编码函数
  253. *
  254. * @access public
  255. * @param string $str 转换的内容
  256. * @return string
  257. */
  258. if (!function_exists('UnicodeUrl2Gbk')) {
  259. function UnicodeUrl2Gbk($str)
  260. {
  261. //载入对照词典
  262. if (!isset($GLOBALS['GbkUniDic'])) {
  263. $fp = fopen(DEDEINC.'/data/gbk-unicode.dat', 'rb');
  264. while (!feof($fp)) {
  265. $GLOBALS['GbkUniDic'][bin2hex(fread($fp, 2))] = fread($fp, 2);
  266. }
  267. fclose($fp);
  268. }
  269. //处理字符串
  270. $str = str_replace('$#$', '+', $str);
  271. $glen = strlen($str);
  272. $okstr = "";
  273. for ($i = 0; $i < $glen; $i++) {
  274. if ($glen - $i > 4) {
  275. if ($str[$i] == '%' && $str[$i + 1] == 'u') {
  276. $uni = strtolower(substr($str, $i + 2, 4));
  277. $i = $i + 5;
  278. if (isset($GLOBALS['GbkUniDic'][$uni])) {
  279. $okstr .= $GLOBALS['GbkUniDic'][$uni];
  280. } else {
  281. $okstr .= "&#".hexdec('0x'.$uni).";";
  282. }
  283. } else {
  284. $okstr .= $str[$i];
  285. }
  286. } else {
  287. $okstr .= $str[$i];
  288. }
  289. }
  290. return $okstr;
  291. }
  292. }
  293. /**
  294. * 自动转换字符集 支持数组转换
  295. *
  296. * @access public
  297. * @param string $str 转换的内容
  298. * @return string
  299. */
  300. if (!function_exists('AutoCharset')) {
  301. function AutoCharset($fContents, $from = 'gbk', $to = 'utf-8')
  302. {
  303. $from = strtoupper($from) == 'UTF8' ? 'utf-8' : $from;
  304. $to = strtoupper($to) == 'UTF8' ? 'utf-8' : $to;
  305. if (strtoupper($from) === strtoupper($to) || empty($fContents) || (is_scalar($fContents) && !is_string($fContents))) {
  306. //如果编码相同或者非字符串标量则不转换
  307. return $fContents;
  308. }
  309. if (is_string($fContents)) {
  310. if (function_exists('mb_convert_encoding')) {
  311. return mb_convert_encoding($fContents, $to, $from);
  312. } elseif (function_exists('iconv')) {
  313. return iconv($from, $to, $fContents);
  314. } else {
  315. return $fContents;
  316. }
  317. } elseif (is_array($fContents)) {
  318. foreach ($fContents as $key => $val) {
  319. $_key = AutoCharset($key, $from, $to);
  320. $fContents[$_key] = AutoCharset($val, $from, $to);
  321. if ($key != $_key)
  322. unset($fContents[$key]);
  323. }
  324. return $fContents;
  325. } else {
  326. return $fContents;
  327. }
  328. }
  329. }