php英文单词统计器

6年以前  |  阅读数:502 次  |  编程语言:PHP 

本文实例为大家分享了英文单词统计器php 实现,供大家参考,具体内容如下

程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮, 即可得到按字母顺序列出的所有单词,及其出现的次数
用于测试的数据文档: data.txt
驱动程序:word.php
output.php 和 StringTokenizer.php 是 要求在同一个文件夹中的程序
1. words_statistics_PHP.png

2. word.php


    <html>
    <style>
    td{
      background-color:#CF6;
      width:100px;
      margin:5px;
    }
    </style>
    <body>
    <?php
    /**
     * 程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计"钮,
     * 即可得到按字母顺序列出的所有单词,及其出现的次数
     * 
     * 作者: 许同春 author Tongchun Xu 
     * @开源中国 Open Source, Chna communiity
     * 完成日期:2016年6月10日 completion date: 10 June, 2016
     */

    require("StringTokenizer.php");
    require("output.php");
      if($_POST['submit']){
      if ($_FILES["file"]["error"] > 0)
      echo "Error: " . $_FILES["file"]["error"] . "<br />";
      else {
    $myfile = fopen($_FILES["file"]["tmp_name"], "r") or die("Unable to open file!");
    $str = fread($myfile,filesize($_FILES["file"]["tmp_name"]));
    $delim = "?\\,. /:!\"()\t\n\r\f%";
    $st = new StringTokenizer($str, $delim);
    echo '找到字符串: '.$st->countTokens();
    $list=new LinkedList();
     while ($st->hasMoreTokens()) {
     $list->orderInsert($st->nextToken());
     }
    $list->words_count();
    $list->traversal();
    fclose($myfile);
      }
    }
    ?>
    <h2>英文文档单词统计 Statistics on English words </h2>
    <p>程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮,
     即可得到按字母顺序列出的所有单词,及其出现的次数 </p>

    <form action="word.php" method="post"
    enctype="multipart/form-data">
    <label for="file">英文文档名 File Name:</label>
    <input type="file" name="file" id="file" /> 
    <input type="submit" name="submit" value="统计 Statistics" />
    </form>
    </body>
    </html>

3. output.php


    <meta charset="utf-8" />
    <?
    /**
     * The class LinkedList allows an application to store strings in
     * alphabetical order by calling orderInsert().
     * 此处定义的 LinkedList 类,可以调用它的 方法 orderInsert(),来以字母
     * 大小的顺序储存 英文字符串。
     * 同时记录 英文单词出现的次数
     * 作者: 许同春 author Tongchun Xu 
     * @开源中国 Open Source, China communiity
     * 完成日期:2016年6月10日 completion date: 10 June, 2016
     */
    class Node{
      public $data;
      public $frequency;
      public $next;
      function __construct($data, $next = null, $frequency = 1){
        $this->data = $data; //英文字符串
        $this->next = $next; //指向后继结点的指针
        $this->frequency=$frequency; //英文字符串出现的次数
      }  
    }

    class LinkedList{
      private $head; //单链表的头结点,不存储数据
     function __construct(){//单链表的构造方法
      //头结点的数据为"傀儡", 不代表 任何数据
      $this->head = new Node("dummy 傀儡"); 
      $this->first = null;
      }

     function isEmpty(){ 
        return ($this->head->next == null);
      }  
    /* orderInsert($data) 方法, 
     * 按给定字符串 $data 的大小, 将其安插到适当的位置,  
     * 以保证单链表中字符串的存储,始终是有序的。 
     */
     function orderInsert($data){
      $p = new Node($data);  
      if($this->isEmpty()){
        $this->head->next = $p;
      }
      else {
      $node= $this->find($data);
      if(!$node){
      $q = $this->head;
      while($q->next != NULL && strcmp($data, $q->next->data)> 0 ){
      $q = $q->next;
        }
        $p->next = $q->next; 
        $q->next = $p;
      }else
      $node->frequency++;
      }
     }

     function insertLast($data){//将字符串插到单链表的尾部
      $p = new Node($data);

      if($this->isEmpty()){
        $this->head->next = $p;
      }
      else{
        $q = $this->head->next;
        while($q->next != NULL)
          $q = $q->next;
        $q->next = $p; 
      }   
    }

      function find($value){//查询是否有给定的字符串
        $q = $this->head->next;
        while($q->next != null){
        if(strcmp($q->data,$value)==0){
            break;
          }
          $q = $q->next;  
        }
        if ($q->data == $value)
        return $q; 
        else
        return null;
      }

      function traversal(){//遍历单链表
        if(!$this->isEmpty()){
        $p=$this->head->next;
        echo "输出结果:<table><tr>";
        echo "<td>".$p->data."<br>出现次数:".$p->frequency."</td>";
        $n=1;
        while($p->next != null){
          $p=$p->next;
          echo "<td>".$p->data."<br>出现次数:".$p->frequency."</td>";
          $n++;
          if ($n%11==0) echo "</tr><tr>";
          }

          echo "</tr></table>";      
        }else
        echo "链表为空!";
      }


      function words_count(){
      if($this->isEmpty())
      echo "<br>没有储存字符串 <br>";
        else{
      $counter=0;
      $p=$this->head->next;
      while($p->next != null){
      $p=$p->next;
      $counter++;
          };
      echo "***共有单词 ".$counter." 个***";
        }
      }} 
    ?>

4. StringTokenizer.php


    <?php

    /**
     * The string tokenizer class allows an application to break a string into tokens.
     *
     * @author Azeem Michael
     * @example The following is one example of the use of the tokenizer. The code:
     * <code>
     * <?php
     * $str = "this is:@\t\n a test!";
     * $delim = " !@:'\t\n\0"; // remove these chars
     * $st = new StringTokenizer($str, $delim);
     * echo 'Total tokens: '.$st->countTokens().'<br/>';
     * while ($st->hasMoreTokens()) {
     * echo $st->nextToken() . '<br/>';
     * }
     * prints the following output:
     * Total tokens: 4
     * this
     * is
     * a
     * test
     * ?>
     * </code>
     */
    class StringTokenizer {

      /** @var string
       */
      private $string;

      /** @var string
       */
      private $token;

      /** @var string
       */
      private $delim;

      /**
       * Constructs a string tokenizer for the specified string.
       * @param string $str String to tokenize
       * @param string $delim The set of delimiters (the characters that separate tokens)
       * specified at creation time, default to " \n\r\t\0"
       */
      public function __construct($str, $delim=" \n\r\t\0") {
        $this->string = $str;
        $this->delim = $delim;
        $this->token = strtok($str, $delim); 
      }

      /**
       * Destructor to prevent memory leaks
       */
      public function __destruct() {
        unset($this);
      }

      /**
       * Calculates the number of times that this tokenizer's nextToken method can
       * be called before it generates an exception
       * @return int - number of tokens
       */
      public function countTokens() {
        $counter = 0;
        while($this->hasMoreTokens()) {
          $counter++;
          $this->nextToken();
        }
        $this->token = strtok($this->string, $this->delim);
        return $counter; 
      }

      /**
       * Tests if there are more tokens available from this tokenizer's string. It
       * does not move the internal pointer in any way. To move the internal pointer
       * to the next element call nextToken()
       * @return boolean - true if has more tokens, false otherwise
       */
      public function hasMoreTokens() {
        return ($this->token !== false);
      }

      /**
       * Returns the next token from this string tokenizer and advances the internal
       * pointer by one.
       * @return string - next element in the tokenized string
       */
      public function nextToken() {
        $hold = $this->token; //hold current pointer value
        $this->token = strtok($this->delim); //increment pointer
        return $hold; //return current pointer value
      }
    }
    ?> 

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持脚本之家。

 相关文章:
PHP分页显示制作详细讲解
SSH 登录失败:Host key verification failed
获取IMSI
将二进制数据转为16进制以便显示
文件下载
获取IMEI
贪吃蛇
双位运算符
发送邮件
PHP自定义函数获取搜索引擎来源关键字的方法
Java生成UUID
提取后缀名
年的日历图
在Zeus Web Server中安装PHP语言支持
让你成为最历害的git提交人
Yii2汉字转拼音类的实例代码
再谈PHP中单双引号的区别详解
指定应用ID以获取对应的应用名称
Python 2与Python 3版本和编码的对比
php封装的page分页类完整实例