Browse Source

支持DedeBIZ商业分词组件

tags/6.0.0
tianya 3 years ago
parent
commit
b631708652
9 changed files with 282 additions and 198 deletions
  1. +79
    -48
      src/dede/archives_do.php
  2. +72
    -43
      src/dede/article_keywords_make.php
  3. +43
    -31
      src/dede/inc/inc_archives_functions.php
  4. +1
    -3
      src/dede/index_body.php
  5. +0
    -10
      src/include/arc.listview.class.php
  6. +33
    -15
      src/include/arc.searchview.class.php
  7. +54
    -0
      src/include/dedebiz.class.php
  8. +0
    -46
      src/include/taglib/arclist.lib.php
  9. +0
    -2
      src/plus/digg_ajax.php

+ 79
- 48
src/dede/archives_do.php View File

@@ -751,7 +751,6 @@ function makekw(){ }
--------------------------*/
else if($dopost=="makekw")
{
include_once(DEDEINC.'/splitword.class.php');
CheckPurview('a_Commend,sys_ArcBatch');
if( !empty($aid) && empty($qstr) ) $qstr = $aid;
@@ -760,66 +759,98 @@ else if($dopost=="makekw")
ShowMsg("参数无效!", $ENV_GOBACK_URL);
exit();
}
$sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang);
$arcids = preg_replace("#[^0-9,]#", '', preg_replace("#`#", ',', $qstr));
$query = "SELECT arc.*, addt.* From `#@__archives` arc LEFT JOIN `#@__addonarticle` addt ON addt.aid=arc.id WHERE arc.id in($arcids) AND arc.channel=1 ";
$dsql->SetQuery($query);
$dsql->Execute();
while($row = $dsql->GetArray())
{
//跳过已经有关键字的内容
if(trim($row['keywords']) !='' ) continue;
$aid = $row['id'];
$keywords = '';
$title = $row['title'];
$description = $row['description'];
$body = cn_substr($row['body'], 5000);
$sp->SetSource($title, $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$titleindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
$sp->SetSource(Html2Text($body), $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$allindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
if(is_array($allindexs) && is_array($titleindexs))
if (!empty($cfg_bizcore_appid) && !empty($cfg_bizcore_key)) {
$client = new DedeBizClient($cfg_bizcore_hostname, $cfg_bizcore_port);
$client->appid = $cfg_bizcore_appid;
$client->key = $cfg_bizcore_key;
while($row = $dsql->GetArray())
{
foreach($titleindexs as $k => $v)
{
if(strlen($keywords.$k)>=60)
{
break;
}
else
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
}
}
foreach($allindexs as $k => $v)
//跳过已经有关键字的内容
if(trim($row['keywords']) !='' ) continue;
$aid = $row['id'];
$keywords = '';
$title = $row['title'];
$description = $row['description'];
$body = cn_substr($row['body'], 5000);
$data = $client->Spliteword($title.Html2Text($body));
$keywords = $data->data;
$description = str_replace(' ', ' ', trim($description));
$description = str_replace('[', ' ', $description);
$description = str_replace(']', ' ', $description);
$description = preg_replace("#[ \r\n\t]{1,}#is", ' ', $description);
$description = str_replace('关键字', '', $description);
$description = str_replace('关键词', '', $description);
$description = addslashes($description);
$dsql->ExecuteNoneQuery(" UPDATE `#@__archives` SET `keywords`='$keywords',`description`='$description' WHERE id='{$aid}' ");
}
$client->Close();
} else {
include_once(DEDEINC.'/splitword.class.php');
$sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang);
while($row = $dsql->GetArray())
{
//跳过已经有关键字的内容
if(trim($row['keywords']) !='' ) continue;
$aid = $row['id'];
$keywords = '';
$title = $row['title'];
$description = $row['description'];
$body = cn_substr($row['body'], 5000);
$sp->SetSource($title, $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$titleindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
$sp->SetSource(Html2Text($body), $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$allindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
if(is_array($allindexs) && is_array($titleindexs))
{
if(strlen($keywords.$k)>=60)
foreach($titleindexs as $k => $v)
{
break;
if(strlen($keywords.$k)>=60)
{
break;
}
else
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
}
}
else if(!in_array($k,$titleindexs))
foreach($allindexs as $k => $v)
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
if(strlen($keywords.$k)>=60)
{
break;
}
else if(!in_array($k,$titleindexs))
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
}
}
}
$description = str_replace(' ', ' ', trim($description));
$description = str_replace('[', ' ', $description);
$description = str_replace(']', ' ', $description);
$description = preg_replace("#[ \r\n\t]{1,}#is", ' ', $description);
$description = str_replace('关键字', '', $description);
$description = str_replace('关键词', '', $description);
$description = addslashes($description);
$dsql->ExecuteNoneQuery(" UPDATE `#@__archives` SET `keywords`='$keywords',`description`='$description' WHERE id='{$aid}' ");
}
$sp = null;
}
$description = str_replace(' ', ' ', trim($description));
$description = str_replace('[', ' ', $description);
$description = str_replace(']', ' ', $description);
$description = preg_replace("#[ \r\n\t]{1,}#is", ' ', $description);
$description = str_replace('关键字', '', $description);
$description = str_replace('关键词', '', $description);
$description = addslashes($description);
$dsql->ExecuteNoneQuery(" UPDATE `#@__archives` SET `keywords`='$keywords',`description`='$description' WHERE id='{$aid}' ");
}
$sp = null;
ShowMsg("成功分析指定文档的关键词!", $ENV_GOBACK_URL);
exit();
}


+ 72
- 43
src/dede/article_keywords_make.php View File

@@ -143,64 +143,93 @@ else if($dopost=='fetch')
LEFT JOIN `#@__addonarticle` addon ON addon.aid=arc.id WHERE arc.channel='1' $limitSql ";
$dsql->SetQuery($fquery);
$dsql->Execute();
$sp = new SplitWord($cfg_soft_lang , $cfg_soft_lang );
while($row=$dsql->GetObject())
{
if($row->keywords!='')
if (!empty($cfg_bizcore_appid) && !empty($cfg_bizcore_key)) {
$client = new DedeBizClient($cfg_bizcore_hostname, $cfg_bizcore_port);
$client->appid = $cfg_bizcore_appid;
$client->key = $cfg_bizcore_key;
while($row=$dsql->GetObject())
{
continue;
if($row->keywords!='')
{
continue;
}
$tjnum++;
$id = $row->id;
$keywords = "";
$data = $client->Spliteword($row->title.Html2Text($row->body));
$keywords = $data->data;
$keywords = addslashes($keywords);
if($keywords=='')
{
$keywords = ',';
}
$dsql->ExecuteNoneQuery("UPDATE `#@__archives` SET keywords='$keywords' WHERE id='$id'");
}
$tjnum++;
$id = $row->id;
$keywords = "";
$sp->SetSource($row->title, $cfg_soft_lang , $cfg_soft_lang );
$sp->SetResultType(2);
$sp->StartAnalysis(TRUE);
$titleindexs = $sp->GetFinallyIndex();
$sp->SetSource(Html2Text($row->body), $cfg_soft_lang , $cfg_soft_lang );
$sp->SetResultType(2);
$sp->StartAnalysis(TRUE);
$allindexs = $sp->GetFinallyIndex();
if(is_array($allindexs) && is_array($titleindexs))
$client->Close();
} else {
$sp = new SplitWord($cfg_soft_lang , $cfg_soft_lang );
while($row=$dsql->GetObject())
{
foreach($titleindexs as $k => $v)
if($row->keywords!='')
{
if(strlen($keywords)>=30)
{
break;
}
else
{
if(strlen($k) <= 2) continue;
$keywords .= $k.",";
}
continue;
}
foreach($allindexs as $k => $v)
$tjnum++;
$id = $row->id;
$keywords = "";
$sp->SetSource($row->title, $cfg_soft_lang , $cfg_soft_lang );
$sp->SetResultType(2);
$sp->StartAnalysis(TRUE);
$titleindexs = $sp->GetFinallyIndex();
$sp->SetSource(Html2Text($row->body), $cfg_soft_lang , $cfg_soft_lang );
$sp->SetResultType(2);
$sp->StartAnalysis(TRUE);
$allindexs = $sp->GetFinallyIndex();
if(is_array($allindexs) && is_array($titleindexs))
{
if(strlen($keywords)>=30)
foreach($titleindexs as $k => $v)
{
break;
if(strlen($keywords)>=30)
{
break;
}
else
{
if(strlen($k) <= 2) continue;
$keywords .= $k.",";
}
}
else if(!in_array($k,$titleindexs))
foreach($allindexs as $k => $v)
{
if(strlen($k) <= 2) continue;
$keywords .= $k.",";
if(strlen($keywords)>=30)
{
break;
}
else if(!in_array($k,$titleindexs))
{
if(strlen($k) <= 2) continue;
$keywords .= $k.",";
}
}
}
$keywords = addslashes($keywords);
if($keywords=='')
{
$keywords = ',';
}
$dsql->ExecuteNoneQuery("UPDATE `#@__archives` SET keywords='$keywords' WHERE id='$id'");
}
$keywords = addslashes($keywords);
if($keywords=='')
{
$keywords = ',';
}
$dsql->ExecuteNoneQuery("UPDATE `#@__archives` SET keywords='$keywords' WHERE id='$id'");
unset($sp);
}
unset($sp);
}//end if limit
//返回提示信息
if($totalnum>0) $tjlen = ceil( ($tjnum/$totalnum) * 100 );
else $tjlen=100;


+ 43
- 31
src/dede/inc/inc_archives_functions.php View File

@@ -635,6 +635,7 @@ function PrintAutoFieldsEdit(&$fieldset, &$fieldValues, $loadtype='all')
function AnalyseHtmlBody($body,&$description,&$litpic,&$keywords,$dtype='')
{
global $autolitpic,$remote,$dellink,$autokey,$cfg_basehost,$cfg_auot_description,$id,$title,$cfg_soft_lang;
global $cfg_bizcore_appid,$cfg_bizcore_key,$cfg_bizcore_hostname,$cfg_bizcore_port;
$autolitpic = (empty($autolitpic) ? '' : $autolitpic);
$body = stripslashes($body);

@@ -675,44 +676,55 @@ function AnalyseHtmlBody($body,&$description,&$litpic,&$keywords,$dtype='')
{
$subject = $title;
$message = $body;
include_once(DEDEINC.'/splitword.class.php');
$keywords = '';
$sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang);
$sp->SetSource($subject, $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$titleindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
$sp->SetSource(Html2Text($message), $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$allindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
if(is_array($allindexs) && is_array($titleindexs))
{
foreach($titleindexs as $k => $v)
{
if(strlen($keywords.$k)>=60)
{
break;
}
else
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
}
}
foreach($allindexs as $k => $v)
// 采用DedeBIZ Core分词组件分词
if (!empty($cfg_bizcore_appid) && !empty($cfg_bizcore_key)) {
$keywords = '';
$client = new DedeBizClient($cfg_bizcore_hostname, $cfg_bizcore_port);
$client->appid = $cfg_bizcore_appid;
$client->key = $cfg_bizcore_key;
$data = $client->Spliteword($subject.Html2Text($message));
$keywords = $data->data;
$client->Close();
} else {
include_once(DEDEINC.'/splitword.class.php');
$keywords = '';
$sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang);
$sp->SetSource($subject, $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$titleindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
$sp->SetSource(Html2Text($message), $cfg_soft_lang, $cfg_soft_lang);
$sp->StartAnalysis();
$allindexs = preg_replace("/#p#|#e#/",'',$sp->GetFinallyIndex());
if(is_array($allindexs) && is_array($titleindexs))
{
if(strlen($keywords.$k)>=60)
foreach($titleindexs as $k => $v)
{
break;
if(strlen($keywords.$k)>=60)
{
break;
}
else
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
}
}
else if(!in_array($k,$titleindexs))
foreach($allindexs as $k => $v)
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
if(strlen($keywords.$k)>=60)
{
break;
}
else if(!in_array($k,$titleindexs))
{
if(strlen($k) <= 2) continue;
$keywords .= $k.',';
}
}
}
$sp = null;
}
$sp = null;
}
$body = GetFieldValueA($body,$dtype,$id);
$body = addslashes($body);


+ 1
- 3
src/dede/index_body.php View File

@@ -251,9 +251,7 @@ else if ($dopost == 'getRightSide') {
$client->appid = $cfg_bizcore_appid;
$client->key = $cfg_bizcore_key;
$core_info = $client->SystemInfo();
if ($core_info->code === 200) {
$client->Close();
}
$client->Close();
}
if (!empty($decotent)) {


+ 0
- 10
src/include/arc.listview.class.php View File

@@ -825,16 +825,6 @@ class ListView
//处理一些特殊字段
$row['infos'] = cn_substr($row['description'],$infolen);
$row['id'] = $row['id'];
if($cfg_digg_update > 0)
{
$prefix = 'diggCache';
$key = 'aid-'.$row['id'];
$cacherow = GetCache($prefix, $key);
$row['goodpost'] = $cacherow['goodpost'];
$row['badpost'] = $cacherow['badpost'];
$row['scores'] = $cacherow['scores'];
}
if($row['corank'] > 0 && $row['arcrank']==0)
{
$row['arcrank'] = $row['corank'];


+ 33
- 15
src/include/arc.searchview.class.php View File

@@ -175,29 +175,47 @@ class SearchView
function GetKeywords($keyword)
{
global $cfg_soft_lang;
global $cfg_bizcore_appid,$cfg_bizcore_key,$cfg_bizcore_hostname,$cfg_bizcore_port;
$keyword = cn_substr($keyword, 50);
$row = $this->dsql->GetOne("SELECT spwords FROM `#@__search_keywords` WHERE keyword='".addslashes($keyword)."'; ");
if(!is_array($row))
{
if(strlen($keyword)>7)
{
$sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang);
$sp->SetSource($keyword, $cfg_soft_lang, $cfg_soft_lang);
$sp->SetResultType(2);
$sp->StartAnalysis(TRUE);
$keywords = $sp->GetFinallyResult();
$idx_keywords = $sp->GetFinallyIndex();
ksort($idx_keywords);
$keywords = $keyword.' ';
foreach ($idx_keywords as $key => $value) {
if (strlen($key) <= 3) {
continue;
if (!empty($cfg_bizcore_appid) && !empty($cfg_bizcore_key)) {
$client = new DedeBizClient($cfg_bizcore_hostname, $cfg_bizcore_port);
$client->appid = $cfg_bizcore_appid;
$client->key = $cfg_bizcore_key;
$data = $client->Spliteword($keyword);
$kvs = explode(",", $data->data);
$keywords = $keyword." ";
foreach ($kvs as $key => $value) {
$keywords .= ' '.$value;
}
$keywords .= ' '.$key;
$keywords = preg_replace("/[ ]{1,}/", " ", $keywords);
$client->Close();
// var_dump($keywords);exit;
} else {
$sp = new SplitWord($cfg_soft_lang, $cfg_soft_lang);
$sp->SetSource($keyword, $cfg_soft_lang, $cfg_soft_lang);
$sp->SetResultType(2);
$sp->StartAnalysis(TRUE);
$keywords = $sp->GetFinallyResult();
$idx_keywords = $sp->GetFinallyIndex();
ksort($idx_keywords);
$keywords = $keyword.' ';
foreach ($idx_keywords as $key => $value) {
if (strlen($key) <= 3) {
continue;
}
$keywords .= ' '.$key;
}
$keywords = preg_replace("/[ ]{1,}/", " ", $keywords);
// var_dump($keywords);exit();
unset($sp);
}
$keywords = preg_replace("/[ ]{1,}/", " ", $keywords);
//var_dump($idx_keywords);exit();
unset($sp);


}
else
{


+ 54
- 0
src/include/dedebiz.class.php View File

@@ -103,6 +103,60 @@ class DedeBizClient
return $this->request($req);
}

// 缓存
// $key:键 $val:值 $d:缓存时间
function CacheSet($key,$val,$duration)
{
$req = array(
"method" => "cache_set",
"parms" => array(
"k" => $key,
"v" => $val,
"d" => $duration,
)
);
return $this->request($req);
}

// 获取缓存内容
// $key:键
function CacheGet($key)
{
$req = array(
"method" => "cache_get",
"parms" => array(
"k" => $key,
)
);
return $this->request($req);
}

// 删除缓存内容
// $key:键
function CacheDel($key)
{
$req = array(
"method" => "cache_del",
"parms" => array(
"k" => $key,
)
);
return $this->request($req);
}

// 获取分词结果
// $key:键
function Spliteword($body)
{
$req = array(
"method" => "spliteword",
"parms" => array(
"body" => $body,
)
);
return $this->request($req);
}

// 拼接规则就是method+
function MakeSign(&$req)
{


+ 0
- 46
src/include/taglib/arclist.lib.php View File

@@ -415,52 +415,6 @@ function lib_arclistDone(&$refObj, &$ctag, $typeid=0, $row=10, $col=1, $titlelen
WHERE arc.id in($idlist) $ordersql ";
}
// 好评差评缓存更新
if($cfg_digg_update > 0)
{
if($orderby == 'goodpost' || $orderby == 'badpost')
{
$t1 = ExecTime();
$postsql = "SELECT arc.id,arc.goodpost,arc.badpost,arc.scores
FROM `$maintable` arc
$orwhere $ordersql $limitsql";
if($idlist != '')
{
$postsql = "SELECT arc.id,arc.goodpost,arc.badpost,arc.scores
FROM `$maintable` arc
WHERE arc.id in($idlist) $ordersql ";
}
$dsql->SetQuery($query);
$dsql->Execute('lit');
while ($row = $dsql->GetArray('lit')) {
$prefix = 'diggCache';
$key = 'aid-'.$row['id'];
$cacherow = GetCache($prefix, $key);
$setsql = array();
if(!empty($cacherow['scores']) && $cacherow['scores'] != $row['scores'])
{
$setsql[] = "scores = {$cacherow['scores']}";
}
if(!empty($cacherow['goodpost']) && $cacherow['goodpost'] != $row['goodpost'])
{
$setsql[] = "goodpost = {$cacherow['goodpost']}";
}
if(!empty($cacherow['badpost']) && $cacherow['badpost'] != $row['badpost'])
{
$setsql[] = "badpost = {$cacherow['badpost']}";
}
$setsql = implode(',', $setsql);
$sql = "UPDATE `$maintable` SET {$setsql} WHERE id='{$row['id']}'";
if(!empty($setsql))
{
$dsql->ExecuteNoneQuery($sql);
}
}
//echo ExecTime()-$t1;
}
}
$dsql->SetQuery($query);
$dsql->Execute('al');
//$row = $dsql->GetArray("al");


+ 0
- 2
src/plus/digg_ajax.php View File

@@ -15,8 +15,6 @@ $format = isset($format)? $format : '';
$id = empty($id)? 0 : intval($id);
$cid = empty($cid)? 1 : intval($cid);
helper('cache');
if($id < 1)


Loading…
Cancel
Save