CodeSwitch.class.php 8.12 KB
<?php
// +----------------------------------------------------------------------
// | ThinkPHP [ WE CAN DO IT JUST THINK IT ]
// +----------------------------------------------------------------------
// | Copyright (c) 2009 http://thinkphp.cn All rights reserved.
// +----------------------------------------------------------------------
// | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 )
// +----------------------------------------------------------------------
// | Author: liu21st <liu21st@gmail.com>
// +----------------------------------------------------------------------
namespace Org\Util;

class CodeSwitch
{
    // 错误信息
    private static $error = array();
    // 提示信息
    private static $info = array();
    
    // 记录错误
    static private function error($msg)
    {
        self::$error[] = $msg;
    }
    
    // 记录信息
    static private function info($info)
    {
        self::$info[] = $info;
    }

    /**
     * 编码转换函数,对整个文件进行编码转换
     * 支持以下转换
     * GB2312、UTF-8 WITH BOM转换为UTF-8
     * UTF-8、UTF-8 WITH BOM转换为GB2312
     * 
     * @access public
     * @param string $filename            
     * @param string $out_charset            
     * @return void
     */
    static function DetectAndSwitch($filename, $out_charset)
    {
        $fpr = fopen($filename, "r");
        $char1 = fread($fpr, 1);
        $char2 = fread($fpr, 1);
        $char3 = fread($fpr, 1);
        $originEncoding = "";
        if ($char1 == chr(239) && $char2 == chr(187) && $char3 == chr(191)) // UTF-8 WITH BOM
{
            $originEncoding = "UTF-8 WITH BOM";
        } elseif ($char1 == chr(255) && $char2 == chr(254)) // UNICODE LE
{
            self::error("不支持从UNICODE LE转换到UTF-8或GB编码");
            fclose($fpr);
            
            return;
        } elseif ($char1 == chr(254) && $char2 == chr(255)) { // UNICODE BE
            self::error("不支持从UNICODE BE转换到UTF-8或GB编码");
            fclose($fpr);
            
            return;
        } else { // 没有文件头,可能是GB或UTF-8
            if (rewind($fpr) === false) { // 回到文件开始部分,准备逐字节读取判断编码
                self::error($filename . "文件指针后移失败");
                fclose($fpr);
                
                return;
            }
            while (! feof($fpr)) {
                $char = fread($fpr, 1);
                // 对于英文,GB和UTF-8都是单字节的ASCII码小于128的值
                if (ord($char) < 128) {
                    continue;
                }
                // 对于汉字GB编码第一个字节是110*****第二个字节是10******(有特例,比如联字)
                // UTF-8编码第一个字节是1110****第二个字节是10******第三个字节是10******
                // 按位与出来结果要跟上面非星号相同,所以应该先判断UTF-8
                // 因为使用GB的掩码按位与,UTF-8的111得出来的也是110,所以要先判断UTF-8
                if ((ord($char) & 224) == 224) {
                    // 第一个字节判断通过
                    $char = fread($fpr, 1);
                    if ((ord($char) & 128) == 128) {
                        // 第二个字节判断通过
                        $char = fread($fpr, 1);
                        if ((ord($char) & 128) == 128) {
                            $originEncoding = "UTF-8";
                            break;
                        }
                    }
                }
                if ((ord($char) & 192) == 192) {
                    // 第一个字节判断通过
                    $char = fread($fpr, 1);
                    if ((ord($char) & 128) == 128) {
                        // 第二个字节判断通过
                        $originEncoding = "GB2312";
                        break;
                    }
                }
            }
        }
        if (strtoupper($out_charset) == $originEncoding) {
            self::info("文件" . $filename . "转码检查完成,原始文件编码" . $originEncoding);
            fclose($fpr);
        } else {
            // 文件需要转码
            $originContent = "";
            if ($originEncoding == "UTF-8 WITH BOM") {
                // 跳过三个字节,把后面的内容复制一遍得到utf-8的内容
                fseek($fpr, 3);
                $originContent = fread($fpr, filesize($filename) - 3);
                fclose($fpr);
            } elseif (rewind($fpr) != false) { // 不管是UTF-8还是GB2312,回到文件开始部分,读取内容
                $originContent = fread($fpr, filesize($filename));
                fclose($fpr);
            } else {
                self::error("文件编码不正确或指针后移失败");
                fclose($fpr);
                
                return;
            }
            // 转码并保存文件
            $content = iconv(str_replace(" WITH BOM", "", $originEncoding), strtoupper($out_charset), $originContent);
            $fpw = fopen($filename, "w");
            fwrite($fpw, $content);
            fclose($fpw);
            if ($originEncoding != "") {
                self::info("对文件" . $filename . "转码完成,原始文件编码" . $originEncoding . ",转换后文件编码" . strtoupper($out_charset));
            } elseif ($originEncoding == "") {
                self::info("文件" . $filename . "中没有出现中文,但是可以断定不是带BOM的UTF-8编码,没有进行编码转换,不影响使用");
            }
        }
    }

    /**
     * 目录遍历函数
     * 
     * @access public
     * @param string $path            
     * @param string $mode            
     * @param array $file_types            
     * @param int $maxdepth            
     * @return void
     */
    static function searchdir($path, $mode = "FULL", $file_types = array(".html", ".php"), $maxdepth = - 1, $d = 0)
    {
        if (substr($path, strlen($path) - 1) != '/') {
            $path .= '/';
        }
        $dirlist = array();
        if ($mode != "FILES") {
            $dirlist[] = $path;
        }
        $handle = @opendir($path);
        if ($handle) {
            while (false !== ($file = readdir($handle))) {
                if ($file != '.' && $file != '..') {
                    $file = $path . $file;
                    if (! is_dir($file)) {
                        if ($mode != "DIRS") {
                            $extension = "";
                            $extpos = strrpos($file, '.');
                            if ($extpos !== false) {
                                $extension = substr($file, $extpos, strlen($file) - $extpos);
                            }
                            $extension = strtolower($extension);
                            if (in_array($extension, $file_types)) {
                                $dirlist[] = $file;
                            }
                        }
                    } elseif ($d >= 0 && ($d < $maxdepth || $maxdepth < 0)) {
                        $result = self::searchdir($file . '/', $mode, $file_types, $maxdepth, $d + 1);
                        $dirlist = array_merge($dirlist, $result);
                    }
                }
            }
            closedir($handle);
        }
        if ($d == 0) {
            natcasesort($dirlist);
        }
        
        return ($dirlist);
    }

    /**
     * 对整个项目目录中的PHP和HTML文件行进编码转换
     * 
     * @access public
     * @param string $app            
     * @param string $mode            
     * @param array $file_types            
     * @return void
     */
    static function CodingSwitch($app = "./", $charset = 'UTF-8', $mode = "FILES", $file_types = array(
		".html",
		".php",
	))
    {
        self::info("注意: 程序使用的文件编码检测算法可能对某些特殊字符不适用");
        $filearr = self::searchdir($app, $mode, $file_types);
        foreach ($filearr as $file) {
            self::DetectAndSwitch($file, $charset);
        }
    }

    static public function getError()
    {
        return self::$error;
    }

    static public function getInfo()
    {
        return self::$info;
    }
}