php解析mht文件转换成html的实例

6年以前  |  阅读数:410 次  |  编程语言:PHP 

php解析mht文件,使用编辑器打开可以看到base64编码所以,mht是可以转换成html的。


    <?php

    /**
     * 针对Mht格式的文件进行解析
    * 使用例子:
    * 
    * function mhtmlParseBody($filename) {

        if (file_exists ( $filename )) {
            if (is_dir ( $filename )) return false;

            $filename = strtolower ( $filename );
            if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;


            $o_mhtml = new mhtml ();
            $o_mhtml->set_file ( $filename );
            $o_mhtml->extract ();
            return $o_mhtml->get_part_to_file(0);

        }
        return null;
    }

    function mhtmlParseAll($filename) {

        if (file_exists ( $filename )) {
            if (is_dir ( $filename )) return false;

            $filename = strtolower ( $filename );
            if (strpos ( $filename, '.mht', 1 ) == FALSE) return false;


            $o_mhtml = new mhtml ();
            $o_mhtml->set_file ( $filename );
            $o_mhtml->extract ();
            return $o_mhtml->get_all_part_file();

        }
        return null;
    }
    */

    class mhtparse {

        var $file = '';
        var $boundary = '';
        var $filedata = '';
        var $countparts = 1;
        var $log = '';

        function extract() {
            $this->read_filedata ();
            $this->file_parts ();

            return 1;
        }

        function set_file($p) {
            $this->file = $p;
        }

        function get_log() {
            return $this->log;
        }

        function file_parts() {
            $lines = explode ( "\n", substr ( $this->filedata, 0, 8192 ) );
            foreach ( $lines as $line ) {
                $line = trim ( $line );
                if (strpos ( $line, '=' ) !== FALSE) {
                    if (strpos ( $line, 'boundary', 0 ) !== FALSE) {
                        $range = $this->getrange ( $line, '"', '"', 0 );
                        $this->boundary = "--" . $range ['range'];
                        $this->filedata = str_replace ( $line, '', $this->filedata );
                        break;
                    }
                }
            }
            if ($this->boundary != '') {
                $this->filedata = explode ( $this->boundary, $this->filedata );
                unset ( $this->filedata [0] );
                $this->filedata = array_values ( $this->filedata );
                $this->countparts = count ( $this->filedata );
            } else {
                $tmp = $this->filedata;
                $this->filedata = array (
                        $tmp 
                );
            }
        }

        function get_all_part_file() {
            return $this->filedata;
        }

        function get_part_to_file($i) {
            $line_data_start = 0;
            $encoding = '';
            $part_lines = explode ( "\n", ltrim ( $this->filedata [$i] ) );
            foreach ( $part_lines as $line_id => $line ) {
                $line = trim ( $line );
                if ($line == '') {
                    if (trim ( $part_lines [0] ) == '--')
                        return 1;
                    $line_data_start = $line_id;
                    break;
                }
                if (strpos ( $line, ':' ) !== FALSE) {
                    $pos = strpos ( $line, ':' );
                    $k = strtolower ( trim ( substr ( $line, 0, $pos ) ) );
                    $v = trim ( substr ( $line, $pos + 1, strlen ( $line ) ) );
                    if ($k == 'content-transfer-encoding') {
                        $encoding = $v;
                    }
                    if ($k == 'content-location') {
                        $location = $v;
                    }
                    if ($k == 'content-type') {
                        $contenttype = $v;
                    }
                }
            }

            foreach ( $part_lines as $line_id => $line ) {
                if ($line_id <= $line_data_start)
                    $part_lines [$line_id] = '';
            }

            $part_lines = implode ( '', $part_lines );
            if ($encoding == 'base64')
                $part_lines = base64_decode ( $part_lines );
            elseif ($encoding == 'quoted-printable')
                $part_lines = imap_qprint ( $part_lines );

            return $part_lines;
        }

        function read_filedata() {
            $handle = fopen ( $this->file, 'r' );
            $this->filedata = fread ( $handle, filesize ( $this->file ) );
            fclose ( $handle );
        }

        function getrange(&$subject, $Beginmark_str = '{', $Endmark_str = '}', $Start_pos = 0) {
            /*
             * $str="sssss { x { xx } {xx{xx } x} x} sssss"; $range=string::getRange($str,'{','}',0); echo $range['range']; //tulem: " x { xx } {xx{xx } x} x" echo $range['behin']; //tulem: 6 echo $range['end']; //tulem: 30 (' ') -- l5pumärgist järgnev out: array('range'=>$Range,'begin'=>$Begin_firstOccurence_pos,'end'=>$End_sequel_pos) | false v1.1 2004-2006,Uku-Kaarel J5esaar,ukjoesaar@hot.ee,http://www.hot.ee/ukjoesaar,+3725110693
             */
            if (empty ( $Beginmark_str ))
                $Beginmark_str = '{';
            $Beginmark_str_len = strlen ( $Beginmark_str );

            if (empty ( $Endmark_str ))
                $Endmark_str = '}';
            $Endmark_str_len = strlen ( $Endmark_str );

            /* $Start_pos_cache = 0; */
            do {
                /* !algus */
                if (! is_int ( $Begin_firstOccurence_pos ))
                    $Start_pos_cache = $Start_pos;

                    /* ?algus-test */
                $Start_pos_cache = @strpos ( $subject, $Beginmark_str, $Start_pos_cache );

                /* this is possible start for range */
                if (is_int ( $Start_pos_cache )) {
                    /* skip */
                    $Start_pos_cache = ($Start_pos_cache + $Beginmark_str_len);
                    /* test possible range start pos */
                    if (is_int ( $Begin_firstOccurence_pos )) {
                        if ($Start_pos_cache < $range_end_pos)
                            $rangeClean = 0;
                        elseif ($Start_pos_cache > $range_end_pos)
                            $rangeClean = 1;
                    }
                    /* here it is */
                    if (! is_int ( $Begin_firstOccurence_pos ))
                        $Begin_firstOccurence_pos = $Start_pos_cache;
                } /* VIGA NR 0 ALGUST EI OLE */

                if (! is_int ( $Start_pos_cache )) {
                    /* !algus */
        /* VIGA NR 1 ALGUSMARKI EI LEITUD : VIIMANE VOIMALIK ALGUS */
        if (is_int ( $Begin_firstOccurence_pos ) and ($Start_pos_cache < $range_end_pos))
                        $rangeClean = 1;
                    else
                        return false;
                }
                if (is_int ( $Begin_firstOccurence_pos ) and ($rangeClean != 1)) {
                    if (! is_int ( $End_pos_cache ))
                        $End_sequel_pos = $Begin_firstOccurence_pos;

                    $End_pos_cache = strpos ( $subject, $Endmark_str, $End_sequel_pos );

                    /* ok */
                    if (is_int ( $End_pos_cache ) and ($rangeClean != 1)) {
                        $range_current_lenght = ($End_pos_cache - $Begin_firstOccurence_pos);
                        $End_sequel_pos = ($End_pos_cache + $Endmark_str_len);
                        $range_end_pos = $End_pos_cache;
                    }
                    /* VIGA NR 2 LOPPU EI LEITUD */
                    if (! is_int ( $End_pos_cache ))
                        if ($End_pos_cache == false)
                            return false;
                }
            } while ( $rangeClean < 1 );

            if (is_int ( $Begin_firstOccurence_pos ) and is_int ( $range_current_lenght ))
                $Range = substr ( $subject, $Begin_firstOccurence_pos, $range_current_lenght );
            else
                return false;

            return array (
                    'range' => $Range,
                    'begin' => $Begin_firstOccurence_pos,
                    'end' => $End_sequel_pos 
            );
        } // end getrange()
    } // class
    ?>

以上这篇php解析mht文件转换成html的实例就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持脚本之家。

 相关文章:
PHP分页显示制作详细讲解
SSH 登录失败:Host key verification failed
获取IMSI
将二进制数据转为16进制以便显示
文件下载
获取IMEI
贪吃蛇
双位运算符
发送邮件
PHP自定义函数获取搜索引擎来源关键字的方法
Java生成UUID
提取后缀名
年的日历图
在Zeus Web Server中安装PHP语言支持
让你成为最历害的git提交人
Yii2汉字转拼音类的实例代码
再谈PHP中单双引号的区别详解
指定应用ID以获取对应的应用名称
Python 2与Python 3版本和编码的对比
php封装的page分页类完整实例