CodeSwitch.class.php
8.12 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
<?php
// +----------------------------------------------------------------------
// | ThinkPHP [ WE CAN DO IT JUST THINK IT ]
// +----------------------------------------------------------------------
// | Copyright (c) 2009 http://thinkphp.cn All rights reserved.
// +----------------------------------------------------------------------
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
// +----------------------------------------------------------------------
// | Author: liu21st <liu21st@gmail.com>
// +----------------------------------------------------------------------
namespace Org\Util;
class CodeSwitch
{
// 错误信息
private static $error = array();
// 提示信息
private static $info = array();
// 记录错误
static private function error($msg)
{
self::$error[] = $msg;
}
// 记录信息
static private function info($info)
{
self::$info[] = $info;
}
/**
* 编码转换函数,对整个文件进行编码转换
* 支持以下转换
* GB2312、UTF-8 WITH BOM转换为UTF-8
* UTF-8、UTF-8 WITH BOM转换为GB2312
*
* @access public
* @param string $filename
* @param string $out_charset
* @return void
*/
static function DetectAndSwitch($filename, $out_charset)
{
$fpr = fopen($filename, "r");
$char1 = fread($fpr, 1);
$char2 = fread($fpr, 1);
$char3 = fread($fpr, 1);
$originEncoding = "";
if ($char1 == chr(239) && $char2 == chr(187) && $char3 == chr(191)) // UTF-8 WITH BOM
{
$originEncoding = "UTF-8 WITH BOM";
} elseif ($char1 == chr(255) && $char2 == chr(254)) // UNICODE LE
{
self::error("不支持从UNICODE LE转换到UTF-8或GB编码");
fclose($fpr);
return;
} elseif ($char1 == chr(254) && $char2 == chr(255)) { // UNICODE BE
self::error("不支持从UNICODE BE转换到UTF-8或GB编码");
fclose($fpr);
return;
} else { // 没有文件头,可能是GB或UTF-8
if (rewind($fpr) === false) { // 回到文件开始部分,准备逐字节读取判断编码
self::error($filename . "文件指针后移失败");
fclose($fpr);
return;
}
while (! feof($fpr)) {
$char = fread($fpr, 1);
// 对于英文,GB和UTF-8都是单字节的ASCII码小于128的值
if (ord($char) < 128) {
continue;
}
// 对于汉字GB编码第一个字节是110*****第二个字节是10******(有特例,比如联字)
// UTF-8编码第一个字节是1110****第二个字节是10******第三个字节是10******
// 按位与出来结果要跟上面非星号相同,所以应该先判断UTF-8
// 因为使用GB的掩码按位与,UTF-8的111得出来的也是110,所以要先判断UTF-8
if ((ord($char) & 224) == 224) {
// 第一个字节判断通过
$char = fread($fpr, 1);
if ((ord($char) & 128) == 128) {
// 第二个字节判断通过
$char = fread($fpr, 1);
if ((ord($char) & 128) == 128) {
$originEncoding = "UTF-8";
break;
}
}
}
if ((ord($char) & 192) == 192) {
// 第一个字节判断通过
$char = fread($fpr, 1);
if ((ord($char) & 128) == 128) {
// 第二个字节判断通过
$originEncoding = "GB2312";
break;
}
}
}
}
if (strtoupper($out_charset) == $originEncoding) {
self::info("文件" . $filename . "转码检查完成,原始文件编码" . $originEncoding);
fclose($fpr);
} else {
// 文件需要转码
$originContent = "";
if ($originEncoding == "UTF-8 WITH BOM") {
// 跳过三个字节,把后面的内容复制一遍得到utf-8的内容
fseek($fpr, 3);
$originContent = fread($fpr, filesize($filename) - 3);
fclose($fpr);
} elseif (rewind($fpr) != false) { // 不管是UTF-8还是GB2312,回到文件开始部分,读取内容
$originContent = fread($fpr, filesize($filename));
fclose($fpr);
} else {
self::error("文件编码不正确或指针后移失败");
fclose($fpr);
return;
}
// 转码并保存文件
$content = iconv(str_replace(" WITH BOM", "", $originEncoding), strtoupper($out_charset), $originContent);
$fpw = fopen($filename, "w");
fwrite($fpw, $content);
fclose($fpw);
if ($originEncoding != "") {
self::info("对文件" . $filename . "转码完成,原始文件编码" . $originEncoding . ",转换后文件编码" . strtoupper($out_charset));
} elseif ($originEncoding == "") {
self::info("文件" . $filename . "中没有出现中文,但是可以断定不是带BOM的UTF-8编码,没有进行编码转换,不影响使用");
}
}
}
/**
* 目录遍历函数
*
* @access public
* @param string $path
* @param string $mode
* @param array $file_types
* @param int $maxdepth
* @return void
*/
static function searchdir($path, $mode = "FULL", $file_types = array(".html", ".php"), $maxdepth = - 1, $d = 0)
{
if (substr($path, strlen($path) - 1) != '/') {
$path .= '/';
}
$dirlist = array();
if ($mode != "FILES") {
$dirlist[] = $path;
}
$handle = @opendir($path);
if ($handle) {
while (false !== ($file = readdir($handle))) {
if ($file != '.' && $file != '..') {
$file = $path . $file;
if (! is_dir($file)) {
if ($mode != "DIRS") {
$extension = "";
$extpos = strrpos($file, '.');
if ($extpos !== false) {
$extension = substr($file, $extpos, strlen($file) - $extpos);
}
$extension = strtolower($extension);
if (in_array($extension, $file_types)) {
$dirlist[] = $file;
}
}
} elseif ($d >= 0 && ($d < $maxdepth || $maxdepth < 0)) {
$result = self::searchdir($file . '/', $mode, $file_types, $maxdepth, $d + 1);
$dirlist = array_merge($dirlist, $result);
}
}
}
closedir($handle);
}
if ($d == 0) {
natcasesort($dirlist);
}
return ($dirlist);
}
/**
* 对整个项目目录中的PHP和HTML文件行进编码转换
*
* @access public
* @param string $app
* @param string $mode
* @param array $file_types
* @return void
*/
static function CodingSwitch($app = "./", $charset = 'UTF-8', $mode = "FILES", $file_types = array(
".html",
".php",
))
{
self::info("注意: 程序使用的文件编码检测算法可能对某些特殊字符不适用");
$filearr = self::searchdir($app, $mode, $file_types);
foreach ($filearr as $file) {
self::DetectAndSwitch($file, $charset);
}
}
static public function getError()
{
return self::$error;
}
static public function getInfo()
{
return self::$info;
}
}