PHP
·
发表于 6年以前
·
阅读量:8458
本文实例讲述了php实现将HTML页面转换成word并且保存的方法。分享给大家供大家参考,具体如下:
这里用使用到一个PHP的工具叫:PHPWord。
生成Word的原理是,将堆规定好了的xml压缩成一个zip包,并且把后缀名改成doc或者docx即可。
所以使用PHPWord,需要你的PHP环境安装zip.dll压缩扩展,我写了一个demo.
功能说明:
20150507 ― HTML中的
标签和
require_once 'PHPWord.php';
require_once 'SimpleHtmlDom.class.php';
class Word{
private $url;
private $LinetextArr = array();
public $CurrentDir;
public $error = array(); //错误数组
public $filename = null;
public $Allowtag = "p,ol,ul,table";
/**数据统计**/
public $DownImg = 0;
public $expendTime = 0;
public $HttpRequestTime = 0;
public $ContentLen = 0;
public $HttpRequestArr = array();
public $expendmemory = 0;
public function __construct($url)
{
$startTime = $this->_Time();
$startMemory = $this->_memory();
$this->url = $url;
$UrlArr = parse_url($this->url);
$this->host = $UrlArr["scheme"]."://".$UrlArr['host'];
$this->CurrentDir = getcwd();
$this->LinetextArr["table"] = array();
$html = new simple_html_dom($this->url);
$this->HttpRequestArr[] = $this->url;
$this->HttpRequestTime++;
foreach($html->find($this->Allowtag) as $key=>$value)
{
if($value->tag == "table")
{
$this->ParseTable($value,0,$this->LinetextArr["table"]);
}
else
{
$this->AnalysisHtmlDom($value);
}
$this->error[] = error_get_last();
}
$endTime = $this->_Time();
$endMemory = $this->_memory();
$this->expendTime = round(($endTime-$startTime),2); //微秒
$this->expendmemory = round(($endMemory-$startMemory)/1000,2); //bytes
$this->CreateWordDom();
}
private function _Time()
{
return array_sum(explode(" ", microtime()));
}
private function _memory()
{
return memory_get_usage();
}
/**
* 解析HTML中的Table,这里考虑到多层table嵌套的情况
* @param $value HTMLDOM
* @param $i 遍历层级
* **/
private function ParseTable($value,$i,$Arr)
{
if($value->firstChild() && in_array($value->firstChild()->tag,array("table","tbody","thead","tfoot","tr")))
{
foreach($value->children as $k=>$v)
{
$this->ParseTable($v,$i++,$Arr);
}
}
else
{
foreach($value->children as $k=>$v)
{
if($v->firstChild() && $v->firstChild()->tag != "table")
{
$Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext));
}
if(!$v->firstChild())
{
$Arr[$i][] = array("tag"=>$v->tag,"text"=>trim($v->plaintext));
}
}
}
}
/**
* 解析HTML里面的表情
* @param $value HTMLDOM
* **/
private function AnalysisHtmlDom($value)
{
$tmp = array();
if($value->has_child())
{
foreach($value->children as $k=>$v)
{
$this->AnalysisHtmlDom($v);
}
}
else
{
if($value->tag == "a")
{
$tmp = array("tag"=>$value->tag,"href"=>$value->href,"text"=>$value->innertext);
}
else if($value->tag == "img")
{
$src = $this->unescape($value->src);
$UrlArr = parse_url($src);
if(!isset($UrlArr['host']))
{
$src = $this->host.$value->src;
$UrlArr = parse_url($src);
}
$src = $this->getImageFromNet($src,$UrlArr); //表示有网络图片,需要下载
if($src)
{
$imgsArr = $this->GD($src);
$tmp = array("tag"=>$value->tag,"src"=>$src,"text"=>$value->alt,"width"=>$imgsArr['width'],"height"=>$imgsArr['height']); }
}
else
{
$tmp = array("tag"=>$value->tag,"text"=>strip_tags($value->innertext));
}
$this->LinetextArr[] = $tmp;
}
}
/**
* 根据GD库来获取图片的如果太多,进行比例压缩
* **/
private function GD($src)
{
list($width, $height, $type, $attr) = getimagesize($src);
if($width > 800 || $height > 800 )
{
$width = $width/2;
$height = $height/2;
}
return array("width"=>$width,"height"=>$height);
}
/**
* 将Uincode编码转移回原来的字符
* **/
public function unescape($str) {
$str = rawurldecode($str);
preg_match_all("/(?:%u.{4})|&#