|                                                                                                                                                                                  | 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202 | <?php
/**
 * 文档关键词生成
 *
 * @version        $Id: article_keywords_make.php 1 8:26 2010年7月12日Z tianya $
 * @package        DedeBIZ.Administrator
 * @copyright      Copyright (c) 2022, DedeBIZ.COM
 * @license        https://www.dedebiz.com/license
 * @link           https://www.dedebiz.com
 */
@ob_start();
@set_time_limit(3600);
require_once(dirname(__FILE__).'/config.php');
CheckPurview('sys_Keyword');
if (empty($dopost)) $dopost = '';
//分析已存在的关键字(适用于默认的文章模型)
if ($dopost == 'analyse') {
    echo "正在读取关键字数据库<br>\r\n";
    flush();
    $ws = $wserr = $wsnew = "";
    $dsql->SetQuery("SELECT * FROM `#@__keywords`");
    $dsql->Execute();
    while ($row = $dsql->GetObject()) {
        if ($row->sta == 1) $ws[$row->keyword] = 1;
        else $wserr[$row->keyword] = 1;
    }
    echo "完成关键字数据库的载入<br>\r\n";
    flush();
    echo "读取档案数据库,并对禁用的关键字和生字进行处理<br>\r\n";
    flush();
    $dsql->SetQuery("SELECT id,keywords FROM `#@__archives`");
    $dsql->Execute();
    while ($row = $dsql->GetObject()) {
        $keywords = explode(',', trim($row->keywords));
        $nerr = false;
        $mykey = '';
        if (is_array($keywords)) {
            foreach ($keywords as $v) {
                $v = trim($v);
                if ($v == '') {
                    continue;
                }
                if (isset($ws[$v])) {
                    $mykey .= $v." ";
                } else if (isset($wsnew[$v])) {
                    $mykey .= $v.' ';
                    $wsnew[$v]++;
                } else if (isset($wserr[$v])) {
                    $nerr = true;
                } else {
                    $mykey .= $v." ";
                    $wsnew[$v] = 1;
                }
            }
        }
    }
    echo "完成档案数据库的处理<br>\r\n";
    flush();
    if (is_array($wsnew)) {
        echo "对关键字进行排序<br>\r\n";
        flush();
        arsort($wsnew);
        echo "把关键字保存到数据库<br>\r\n";
        flush();
        foreach ($wsnew as $k => $v) {
            if (strlen($k) > 20) {
                continue;
            }
            $dsql->SetQuery("INSERT INTO `#@__keywords`(keyword,`rank`,sta,rpurl) VALUES('".addslashes($k)."','$v','1','')");
            $dsql->Execute();
        }
        echo "完成关键字的导入<br>\r\n";
        flush();
        sleep(1);
    } else {
        echo "没发现任何新的关键字<br>\r\n";
        flush();
        sleep(1);
    }
    ShowMsg('完成所有操作,现在转到关键字列表页', 'article_keywords_main.php');
    exit();
}
//自动获取关键字(适用于默认的文章模型)
else if ($dopost == 'fetch') {
    require_once(DEDEINC."/splitword.class.php");
    if (empty($startdd)) {
        $startdd = 0;
    }
    if (empty($pagesize)) {
        $pagesize = 20;
    }
    if (empty($totalnum)) {
        $totalnum = 0;
    }
    //统计记录总数
    if ($totalnum == 0) {
        $row = $dsql->GetOne("SELECT COUNT(*) AS dd FROM `#@__archives` WHERE channel='1' ");
        $totalnum = $row['dd'];
    }
    //获取记录,并分析关键字
    if ($totalnum > $startdd + $pagesize) {
        $limitSql = " LIMIT $startdd,$pagesize";
    } else if (($totalnum - $startdd) > 0) {
        $limitSql = " LIMIT $startdd,".($totalnum - $startdd);
    } else {
        $limitSql = '';
    }
    $tjnum = $startdd;
    if ($limitSql != '') {
        $fquery = "SELECT arc.id,arc.title,arc.keywords,addon.body FROM `#@__archives` arc
              LEFT JOIN `#@__addonarticle` addon ON addon.aid=arc.id WHERE arc.channel='1' $limitSql ";
        $dsql->SetQuery($fquery);
        $dsql->Execute();
        if (!empty($cfg_bizcore_appid) && !empty($cfg_bizcore_key)) {
            $client = new DedeBizClient($cfg_bizcore_hostname, $cfg_bizcore_port);
            $client->appid = $cfg_bizcore_appid;
            $client->key = $cfg_bizcore_key;
            while ($row = $dsql->GetObject()) {
                if ($row->keywords != '') {
                    continue;
                }
                $tjnum++;
                $id = $row->id;
                $keywords = "";
                $data = $client->Spliteword($row->title.Html2Text($row->body));
                $keywords = $data->data;
                $keywords = addslashes($keywords);
                if ($keywords == '') {
                    $keywords = ',';
                }
                $dsql->ExecuteNoneQuery("UPDATE `#@__archives` SET keywords='$keywords' WHERE id='$id'");
            }
            $client->Close();
        } else {
            $sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang);
            while ($row = $dsql->GetObject()) {
                if ($row->keywords != '') {
                    continue;
                }
                $tjnum++;
                $id = $row->id;
                $keywords = "";
                $sp->SetSource($row->title, $cfg_soft_lang, $cfg_soft_lang);
                $sp->SetResultType(2);
                $sp->StartAnalysis(TRUE);
                $titleindexs = $sp->GetFinallyIndex();
                $sp->SetSource(Html2Text($row->body), $cfg_soft_lang, $cfg_soft_lang);
                $sp->SetResultType(2);
                $sp->StartAnalysis(TRUE);
                $allindexs = $sp->GetFinallyIndex();
                if (is_array($allindexs) && is_array($titleindexs)) {
                    foreach ($titleindexs as $k => $v) {
                        if (strlen($keywords) >= 30) {
                            break;
                        } else {
                            if (strlen($k) <= 2) continue;
                            $keywords .= $k.",";
                        }
                    }
                    foreach ($allindexs as $k => $v) {
                        if (strlen($keywords) >= 30) {
                            break;
                        } else if (!in_array($k, $titleindexs)) {
                            if (strlen($k) <= 2) continue;
                            $keywords .= $k.",";
                        }
                    }
                }
                $keywords = addslashes($keywords);
                if ($keywords == '') {
                    $keywords = ',';
                }
                $dsql->ExecuteNoneQuery("UPDATE `#@__archives` SET keywords='$keywords' WHERE id='$id'");
            }
            unset($sp);
        }
    } //end if limit
    //返回提示信息
    if ($totalnum > 0) $tjlen = ceil(($tjnum / $totalnum) * 100);
    else $tjlen = 100;
    $dvlen = $tjlen * 2;
    $tjsta = "<div style='width:200;height:15;border:1px solid #898989;text-align:left'><div style='width:$dvlen;height:15;background-color:#829D83'></div></div>";
    $tjsta .= "<br>完成处理文档总数的:$tjlen %,位置:{$startdd},继续执行任务";
    if ($tjnum < $totalnum) {
        $nurl = "article_keywords_make.php?dopost=fetch&totalnum=$totalnum&startdd=".($startdd + $pagesize)."&pagesize=$pagesize";
        ShowMsg($tjsta, $nurl, 0, 500);
    } else {
        ShowMsg("完成所有任务", "javascript:;");
    }
    exit();
}
include DedeInclude('templets/article_keywords_make.htm');
 |