国内流行的内容管理系统(CMS)多端全媒体解决方案 https://www.dedebiz.com
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

340 lines
10KB

  1. <?php if (!defined('DEDEINC')) exit('dedebiz');
  2. /**
  3. * DedeBIZV6中用到的字符编码转换的小助手函数
  4. *
  5. * @version $Id: charset.helper.php 1 2010-07-05 11:43:09Z tianya $
  6. * @package DedeBIZ.Helpers
  7. * @copyright Copyright (c) 2021, DedeBIZ.COM
  8. * @license https://www.dedebiz.com/license
  9. * @link https://www.dedebiz.com
  10. */
  11. $UC2GBTABLE = $CODETABLE = $BIG5_DATA = $GB_DATA = '';
  12. $GbkUniDic = null;
  13. /**
  14. * UTF-8 转GB编码
  15. *
  16. * @access public
  17. * @param string $utfstr 需要转换的字符串
  18. * @return string
  19. */
  20. if (!function_exists('utf82gb')) {
  21. function utf82gb($utfstr)
  22. {
  23. if (function_exists('iconv')) {
  24. return iconv('utf-8', 'gbk//ignore', $utfstr);
  25. }
  26. global $UC2GBTABLE;
  27. $okstr = "";
  28. if (trim($utfstr) == "") {
  29. return $utfstr;
  30. }
  31. if (empty($UC2GBTABLE)) {
  32. $filename = DEDEINC . "/data/gb2312-utf8.dat";
  33. $fp = fopen($filename, "r");
  34. while ($l = fgets($fp, 15)) {
  35. $UC2GBTABLE[hexdec(substr($l, 7, 6))] = hexdec(substr($l, 0, 6));
  36. }
  37. fclose($fp);
  38. }
  39. $okstr = "";
  40. $ulen = strlen($utfstr);
  41. for ($i = 0; $i < $ulen; $i++) {
  42. $c = $utfstr[$i];
  43. $cb = decbin(ord($utfstr[$i]));
  44. if (strlen($cb) == 8) {
  45. $csize = strpos(decbin(ord($cb)), "0");
  46. for ($j = 0; $j < $csize; $j++) {
  47. $i++;
  48. $c .= $utfstr[$i];
  49. }
  50. $c = utf82u($c);
  51. if (isset($UC2GBTABLE[$c])) {
  52. $c = dechex($UC2GBTABLE[$c] + 0x8080);
  53. $okstr .= chr(hexdec($c[0] . $c[1])) . chr(hexdec($c[2] . $c[3]));
  54. } else {
  55. $okstr .= "&#" . $c . ";";
  56. }
  57. } else {
  58. $okstr .= $c;
  59. }
  60. }
  61. $okstr = trim($okstr);
  62. return $okstr;
  63. }
  64. }
  65. /**
  66. * GB转UTF-8编码
  67. *
  68. * @access public
  69. * @param string $gbstr gbk的字符串
  70. * @return string
  71. */
  72. if (!function_exists('gb2utf8')) {
  73. function gb2utf8($gbstr)
  74. {
  75. if (function_exists('iconv')) {
  76. return iconv('gbk', 'utf-8//ignore', $gbstr);
  77. }
  78. global $CODETABLE;
  79. if (trim($gbstr) == "") {
  80. return $gbstr;
  81. }
  82. if (empty($CODETABLE)) {
  83. $filename = DEDEINC . "/data/gb2312-utf8.dat";
  84. $fp = fopen($filename, "r");
  85. while ($l = fgets($fp, 15)) {
  86. $CODETABLE[hexdec(substr($l, 0, 6))] = substr($l, 7, 6);
  87. }
  88. fclose($fp);
  89. }
  90. $ret = "";
  91. $utf8 = "";
  92. while ($gbstr != '') {
  93. if (ord(substr($gbstr, 0, 1)) > 0x80) {
  94. $thisW = substr($gbstr, 0, 2);
  95. $gbstr = substr($gbstr, 2, strlen($gbstr));
  96. $utf8 = "";
  97. @$utf8 = u2utf8(hexdec($CODETABLE[hexdec(bin2hex($thisW)) - 0x8080]));
  98. if ($utf8 != "") {
  99. for ($i = 0; $i < strlen($utf8); $i += 3)
  100. $ret .= chr(substr($utf8, $i, 3));
  101. }
  102. } else {
  103. $ret .= substr($gbstr, 0, 1);
  104. $gbstr = substr($gbstr, 1, strlen($gbstr));
  105. }
  106. }
  107. return $ret;
  108. }
  109. }
  110. /**
  111. * Unicode转utf8
  112. *
  113. * @access public
  114. * @param string $c Unicode的字符串内容
  115. * @return string
  116. */
  117. if (!function_exists('u2utf8')) {
  118. function u2utf8($c)
  119. {
  120. for ($i = 0; $i < count($c); $i++) {
  121. $str = "";
  122. }
  123. if ($c < 0x80) {
  124. $str .= $c;
  125. } else if ($c < 0x800) {
  126. $str .= (0xC0 | $c >> 6);
  127. $str .= (0x80 | $c & 0x3F);
  128. } else if ($c < 0x10000) {
  129. $str .= (0xE0 | $c >> 12);
  130. $str .= (0x80 | $c >> 6 & 0x3F);
  131. $str .= (0x80 | $c & 0x3F);
  132. } else if ($c < 0x200000) {
  133. $str .= (0xF0 | $c >> 18);
  134. $str .= (0x80 | $c >> 12 & 0x3F);
  135. $str .= (0x80 | $c >> 6 & 0x3F);
  136. $str .= (0x80 | $c & 0x3F);
  137. }
  138. return $str;
  139. }
  140. }
  141. /**
  142. * utf8转Unicode
  143. *
  144. * @access public
  145. * @param string $c UTF-8的字符串信息
  146. * @return string
  147. */
  148. if (!function_exists('utf82u')) {
  149. function utf82u($c)
  150. {
  151. switch (strlen($c)) {
  152. case 1:
  153. return ord($c);
  154. case 2:
  155. $n = (ord($c[0]) & 0x3f) << 6;
  156. $n += ord($c[1]) & 0x3f;
  157. return $n;
  158. case 3:
  159. $n = (ord($c[0]) & 0x1f) << 12;
  160. $n += (ord($c[1]) & 0x3f) << 6;
  161. $n += ord($c[2]) & 0x3f;
  162. return $n;
  163. case 4:
  164. $n = (ord($c[0]) & 0x0f) << 18;
  165. $n += (ord($c[1]) & 0x3f) << 12;
  166. $n += (ord($c[2]) & 0x3f) << 6;
  167. $n += ord($c[3]) & 0x3f;
  168. return $n;
  169. }
  170. }
  171. }
  172. /**
  173. * Big5码转换成GB码
  174. *
  175. * @access public
  176. * @param string $Text 字符串内容
  177. * @return string
  178. */
  179. if (!function_exists('big52gb')) {
  180. function big52gb($Text)
  181. {
  182. if (function_exists('iconv')) {
  183. return iconv('big5', 'gbk//ignore', $Text);
  184. }
  185. global $BIG5_DATA;
  186. if (empty($BIG5_DATA)) {
  187. $filename = DEDEINC . "/data/big5-gb.dat";
  188. $fp = fopen($filename, "rb");
  189. $BIG5_DATA = fread($fp, filesize($filename));
  190. fclose($fp);
  191. }
  192. $max = strlen($Text) - 1;
  193. for ($i = 0; $i < $max; $i++) {
  194. $h = ord($Text[$i]);
  195. if ($h >= 0x80) {
  196. $l = ord($Text[$i + 1]);
  197. if ($h == 161 && $l == 64) {
  198. $gbstr = " ";
  199. } else {
  200. $p = ($h - 160) * 510 + ($l - 1) * 2;
  201. $gbstr = $BIG5_DATA[$p] . $BIG5_DATA[$p + 1];
  202. }
  203. $Text[$i] = $gbstr[0];
  204. $Text[$i + 1] = $gbstr[1];
  205. $i++;
  206. }
  207. }
  208. return $Text;
  209. }
  210. }
  211. /**
  212. * GB码转换成Big5码
  213. *
  214. * @access public
  215. * @param string $Text 字符串内容
  216. * @return string
  217. */
  218. if (!function_exists('gb2big5')) {
  219. function gb2big5($Text)
  220. {
  221. if (function_exists('iconv')) {
  222. return iconv('gbk', 'big5//ignore', $Text);
  223. }
  224. global $GB_DATA;
  225. if (empty($GB_DATA)) {
  226. $filename = DEDEINC . "/data/gb-big5.dat";
  227. $fp = fopen($filename, "rb");
  228. $gb = fread($fp, filesize($filename));
  229. fclose($fp);
  230. }
  231. $max = strlen($Text) - 1;
  232. for ($i = 0; $i < $max; $i++) {
  233. $h = ord($Text[$i]);
  234. if ($h >= 0x80) {
  235. $l = ord($Text[$i + 1]);
  236. if ($h == 161 && $l == 64) {
  237. $big = " ";
  238. } else {
  239. $p = ($h - 160) * 510 + ($l - 1) * 2;
  240. $big = $GB_DATA[$p] . $GB_DATA[$p + 1];
  241. }
  242. $Text[$i] = $big[0];
  243. $Text[$i + 1] = $big[1];
  244. $i++;
  245. }
  246. }
  247. return $Text;
  248. }
  249. }
  250. /**
  251. * unicode url编码转gbk编码函数
  252. *
  253. * @access public
  254. * @param string $str 转换的内容
  255. * @return string
  256. */
  257. if (!function_exists('UnicodeUrl2Gbk')) {
  258. function UnicodeUrl2Gbk($str)
  259. {
  260. //载入对照词典
  261. if (!isset($GLOBALS['GbkUniDic'])) {
  262. $fp = fopen(DEDEINC . '/data/gbk-unicode.dat', 'rb');
  263. while (!feof($fp)) {
  264. $GLOBALS['GbkUniDic'][bin2hex(fread($fp, 2))] = fread($fp, 2);
  265. }
  266. fclose($fp);
  267. }
  268. //处理字符串
  269. $str = str_replace('$#$', '+', $str);
  270. $glen = strlen($str);
  271. $okstr = "";
  272. for ($i = 0; $i < $glen; $i++) {
  273. if ($glen - $i > 4) {
  274. if ($str[$i] == '%' && $str[$i + 1] == 'u') {
  275. $uni = strtolower(substr($str, $i + 2, 4));
  276. $i = $i + 5;
  277. if (isset($GLOBALS['GbkUniDic'][$uni])) {
  278. $okstr .= $GLOBALS['GbkUniDic'][$uni];
  279. } else {
  280. $okstr .= "&#" . hexdec('0x' . $uni) . ";";
  281. }
  282. } else {
  283. $okstr .= $str[$i];
  284. }
  285. } else {
  286. $okstr .= $str[$i];
  287. }
  288. }
  289. return $okstr;
  290. }
  291. }
  292. /**
  293. * 自动转换字符集 支持数组转换
  294. *
  295. * @access public
  296. * @param string $str 转换的内容
  297. * @return string
  298. */
  299. if (!function_exists('AutoCharset')) {
  300. function AutoCharset($fContents, $from = 'gbk', $to = 'utf-8')
  301. {
  302. $from = strtoupper($from) == 'UTF8' ? 'utf-8' : $from;
  303. $to = strtoupper($to) == 'UTF8' ? 'utf-8' : $to;
  304. if (strtoupper($from) === strtoupper($to) || empty($fContents) || (is_scalar($fContents) && !is_string($fContents))) {
  305. //如果编码相同或者非字符串标量则不转换
  306. return $fContents;
  307. }
  308. if (is_string($fContents)) {
  309. if (function_exists('mb_convert_encoding')) {
  310. return mb_convert_encoding($fContents, $to, $from);
  311. } elseif (function_exists('iconv')) {
  312. return iconv($from, $to, $fContents);
  313. } else {
  314. return $fContents;
  315. }
  316. } elseif (is_array($fContents)) {
  317. foreach ($fContents as $key => $val) {
  318. $_key = AutoCharset($key, $from, $to);
  319. $fContents[$_key] = AutoCharset($val, $from, $to);
  320. if ($key != $_key)
  321. unset($fContents[$key]);
  322. }
  323. return $fContents;
  324. } else {
  325. return $fContents;
  326. }
  327. }
  328. }