sphinx全文检索(整合ci框架)
2018-05-28
后端

php
//
// $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
//
//
// Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
//
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License. You should have
// received a copy of the GPL license along with this program; if you
// did not, you can find it at http://www.gnu.org/
//
/////////////////////////////////////////////////////////////////////////////
// PHP version of Sphinx searchd client (PHP API)
/////////////////////////////////////////////////////////////////////////////
/// known searchd commands
define ("SEARCHD_COMMAND_SEARCH",0);
define ("SEARCHD_COMMAND_EXCERPT",1);
define ("SEARCHD_COMMAND_UPDATE",2);
define ("SEARCHD_COMMAND_KEYWORDS",3);
define ("SEARCHD_COMMAND_PERSIST",4);
define ("SEARCHD_COMMAND_STATUS",5);
define ("SEARCHD_COMMAND_QUERY",6);
/// current client-side command implementation versions
define ("VER_COMMAND_SEARCH",0x116);
define ("VER_COMMAND_EXCERPT",0x100);
define ("VER_COMMAND_UPDATE",0x102);
define ("VER_COMMAND_KEYWORDS",0x100);
define ("VER_COMMAND_STATUS",0x100);
define ("VER_COMMAND_QUERY",0x100);
/// known searchd status codes
define ("SEARCHD_OK",0);
define ("SEARCHD_ERROR",1);
define ("SEARCHD_RETRY",2);
define ("SEARCHD_WARNING",3);
/// known match modes
define ("SPH_MATCH_ALL",0);
define ("SPH_MATCH_ANY",1);
define ("SPH_MATCH_PHRASE",2);
define ("SPH_MATCH_BOOLEAN",3);
define ("SPH_MATCH_EXTENDED",4);
define ("SPH_MATCH_FULLSCAN",5);
define ("SPH_MATCH_EXTENDED2",6);// extended engine V2 (TEMPORARY, WILL BE REMOVED)
/// known ranking modes (ext2 only)
define ("SPH_RANK_PROXIMITY_BM25",0);///< default mode, phrase proximity major factor and BM25 minor one
define ("SPH_RANK_BM25",1);///< statistical mode, BM25 ranking only (faster but worse quality)
define ("SPH_RANK_NONE",2);///< no ranking, all matches get a weight of 1
define ("SPH_RANK_WORDCOUNT",3);///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
define ("SPH_RANK_PROXIMITY",4);
define ("SPH_RANK_MATCHANY",5);
define ("SPH_RANK_FIELDMASK",6);
/// known sort modes
define ("SPH_SORT_RELEVANCE",0);
define ("SPH_SORT_ATTR_DESC",1);
define ("SPH_SORT_ATTR_ASC",2);
define ("SPH_SORT_TIME_SEGMENTS",3);
define ("SPH_SORT_EXTENDED",4);
define ("SPH_SORT_EXPR",5);
/// known filter types
define ("SPH_FILTER_VALUES",0);
define ("SPH_FILTER_RANGE",1);
define ("SPH_FILTER_FLOATRANGE",2);
/// known attribute types
define ("SPH_ATTR_INTEGER",1);
define ("SPH_ATTR_TIMESTAMP",2);
define ("SPH_ATTR_ORDINAL",3);
define ("SPH_ATTR_BOOL",4);
define ("SPH_ATTR_FLOAT",5);
define ("SPH_ATTR_BIGINT",6);
define ("SPH_ATTR_MULTI",0x40000000);
/// known grouping functions
define ("SPH_GROUPBY_DAY",0);
define ("SPH_GROUPBY_WEEK",1);
define ("SPH_GROUPBY_MONTH",2);
define ("SPH_GROUPBY_YEAR",3);
define ("SPH_GROUPBY_ATTR",4);
define ("SPH_GROUPBY_ATTRPAIR",5);
// important properties of PHP's integers:
// - always signed (one bit short of PHP_INT_SIZE)
// - conversion from string to int is saturated
// - float is double
// - div converts arguments to floats
// - mod converts arguments to ints
// the packing code below works as follows:
// - when we got an int, just pack it
// if performance is a problem, this is the branch users should aim for
//
// - otherwise, we got a number in string form
// this might be due to different reasons, but we assume that this is
// because it didn't fit into PHP int
//
// - factor the string into high and low ints for packing
// - if we have bcmath, then it is used
// - if we don't, we have to do it manually (this is the fun part)
//
// - x64 branch does factoring using ints
// - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
//
// unpacking routines are pretty much the same.
// - return ints if we can
// - otherwise format number into a string
/// pack 64-bit signed
function sphPackI64 ( $v )
{
assert( is_numeric($v));
// x64
if( PHP_INT_SIZE>=8)
{
$v =(int)$v;
return pack ("NN", $v>>32, $v&0xFFFFFFFF);
}
// x32, int
if( is_int($v))
return pack ("NN", $v <0?-1:0, $v );
// x32, bcmath
if( function_exists("bcmul"))
{
if( bccomp ( $v,0)==-1)
$v = bcadd ("18446744073709551616", $v );
$h = bcdiv ( $v,"4294967296",0);
$l = bcmod ( $v,"4294967296");
return pack ("NN",(float)$h,(float)$l );// conversion to float is intentional; int would lose 31st bit
}
// x32, no-bcmath
$p = max(0, strlen($v)-13);
$lo = abs((float)substr($v, $p));
$hi = abs((float)substr($v,0, $p));
$m = $lo + $hi*1316134912.0;// (10 ^ 13) % (1 << 32) = 1316134912
$q = floor($m/4294967296.0);
$l = $m -($q*4294967296.0);
$h = $hi*2328.0+ $q;// (10 ^ 13) / (1 << 32) = 2328
if( $v<0)
{
if( $l==0)
$h =4294967296.0- $h;
else
{
$h =4294967295.0- $h;
$l =4294967296.0- $l;
}
}
return pack ("NN", $h, $l );
}
/// pack 64-bit unsigned
function sphPackU64 ( $v )
{
assert( is_numeric($v));
// x64
if( PHP_INT_SIZE>=8)
{
assert( $v>=0);
// x64, int
if( is_int($v))
return pack ("NN", $v>>32, $v&0xFFFFFFFF);
// x64, bcmath
if( function_exists("bcmul"))
{
$h = bcdiv ( $v,4294967296,0);
$l = bcmod ( $v,4294967296);
return pack ("NN", $h, $l );
}
// x64, no-bcmath
$p = max (0, strlen($v)-13);
$lo =(int)substr ( $v, $p );
$hi =(int)substr ( $v,0, $p );
$m = $lo + $hi*1316134912;
$l = $m %4294967296;
$h = $hi*2328+(int)($m/4294967296);
return pack ("NN", $h, $l );
}
// x32, int
if( is_int($v))
return pack ("NN",0, $v );
// x32, bcmath
if( function_exists("bcmul"))
{
$h = bcdiv ( $v,"4294967296",0);
$l = bcmod ( $v,"4294967296");
return pack ("NN",(float)$h,(float)$l );// conversion to float is intentional; int would lose 31st bit
}
// x32, no-bcmath
$p = max(0, strlen($v)-13);
$lo =(float)substr($v, $p);
$hi =(float)substr($v,0, $p);
$m = $lo + $hi*1316134912.0;
$q = floor($m /4294967296.0);
$l = $m -($q *4294967296.0);
$h = $hi*2328.0+ $q;
return pack ("NN", $h, $l );
}
// unpack 64-bit unsigned
function sphUnpackU64 ( $v )
{
list ( $hi, $lo )= array_values ( unpack ("N*N*", $v ));
if( PHP_INT_SIZE>=8)
{
if( $hi<0) $hi +=(1<<32);// because php 5.2.2 to 5.2.5 is totally fucked up again
if( $lo<0) $lo +=(1<<32);
// x64, int
if( $hi<=2147483647)
return($hi<<32)+ $lo;
// x64, bcmath
if( function_exists("bcmul"))
return bcadd ( $lo, bcmul ( $hi,"4294967296"));
// x64, no-bcmath
$C =100000;
$h =((int)($hi / $C)<<32)+(int)($lo / $C);
$l =(($hi % $C)<<32)+($lo % $C);
if( $l>$C )
{
$h +=(int)($l / $C);
$l = $l % $C;
}
if( $h==0)
return $l;
return sprintf ("%d%05d", $h, $l );
}
// x32, int
if( $hi==0)
{
if( $lo>0)
return $lo;
return sprintf ("%u", $lo );
}
$hi = sprintf ("%u", $hi );
$lo = sprintf ("%u", $lo );
// x32, bcmath
if( function_exists("bcmul"))
return bcadd ( $lo, bcmul ( $hi,"4294967296"));
// x32, no-bcmath
$hi =(float)$hi;
$lo =(float)$lo;
$q = floor($hi/10000000.0);
$r = $hi - $q*10000000.0;
$m = $lo + $r*4967296.0;
$mq = floor($m/10000000.0);
$l = $m - $mq*10000000.0;
$h = $q*4294967296.0+ $r*429.0+ $mq;
$h = sprintf ("%.0f", $h );
$l = sprintf ("%07.0f", $l );
if( $h=="0")
return sprintf("%.0f",(float)$l );
return $h . $l;
}
// unpack 64-bit signed
function sphUnpackI64 ( $v )
{
list ( $hi, $lo )= array_values ( unpack ("N*N*", $v ));
// x64
if( PHP_INT_SIZE>=8)
{
if( $hi<0) $hi +=(1<<32);// because php 5.2.2 to 5.2.5 is totally fucked up again
if( $lo<0) $lo +=(1<<32);
return($hi<<32)+ $lo;
}
// x32, int
if( $hi==0)
{
if( $lo>0)
return $lo;
return sprintf ("%u", $lo );
}
// x32, int
elseif ( $hi==-1)
{
if( $lo<0)
return $lo;
return sprintf ("%.0f", $lo -4294967296.0);
}
$neg ="";
$c =0;
if( $hi<0)
{
$hi =~$hi;
$lo =~$lo;
$c =1;
$neg ="-";
}
$hi = sprintf ("%u", $hi );
$lo = sprintf ("%u", $lo );
// x32, bcmath
if( function_exists("bcmul"))
return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi,"4294967296")), $c );
// x32, no-bcmath
$hi =(float)$hi;
$lo =(float)$lo;
$q = floor($hi/10000000.0);
$r = $hi - $q*10000000.0;
$m = $lo + $r*4967296.0;
$mq = floor($m/10000000.0);
$l = $m - $mq*10000000.0+ $c;
$h = $q*4294967296.0+ $r*429.0+ $mq;
if( $l==10000000)
{
$l =0;
$h +=1;
}
$h = sprintf ("%.0f", $h );
$l = sprintf ("%07.0f", $l );
if( $h=="0")
return $neg . sprintf("%.0f",(float)$l );
return $neg . $h . $l;
}
function sphFixUint ( $value )
{
if( PHP_INT_SIZE>=8)
{
// x64 route, workaround broken unpack() in 5.2.2+
if( $value<0) $value +=(1<<32);
return $value;
}
else
{
// x32 route, workaround php signed/unsigned braindamage
return sprintf ("%u", $value );
}
}
/// sphinx searchd client class
classSphinx_client
{
var $_host;///< searchd host (default is "localhost")
var $_port;///< searchd port (default is 9312)
var $_offset;///< how many records to seek from result-set start (default is 0)
var $_limit;///< how many records to return from result-set starting at offset (default is 20)
var $_mode;///< query matching mode (default is SPH_MATCH_ALL)
var $_weights;///< per-field weights (default is 1 for all fields)
var $_sort;///< match sorting mode (default is SPH_SORT_RELEVANCE)
var $_sortby;///< attribute to sort by (defualt is "")
var $_min_id;///< min ID to match (default is 0, which means no limit)
var $_max_id;///< max ID to match (default is 0, which means no limit)
var $_filters;///< search filters
var $_groupby;///< group-by attribute name
var $_groupfunc;///< group-by function (to pre-process group-by attribute value with)
var $_groupsort;///< group-by sorting clause (to sort groups in result set with)
var $_groupdistinct;///< group-by count-distinct attribute
var $_maxmatches;///< max matches to retrieve
var $_cutoff;///< cutoff to stop searching at (default is 0)
var $_retrycount;///< distributed retries count
var $_retrydelay;///< distributed retries delay
var $_anchor;///< geographical anchor point
var $_indexweights;///< per-index weights
var $_ranker;///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
var $_maxquerytime;///< max query time, milliseconds (default is 0, do not limit)
var $_fieldweights;///< per-field-name weights
var $_overrides;///< per-query attribute values overrides
var $_select;///< select-list (attributes or expressions, with optional aliases)
var $_error;///< last error message
var $_warning;///< last warning message
var $_connerror;///< connection error vs remote error flag
var $_reqs;///< requests array for multi-query
var $_mbenc;///< stored mbstring encoding
var $_arrayresult;///< whether $result["matches"] should be a hash or an array
var $_timeout;///< connect timeout
/////////////////////////////////////////////////////////////////////////////
// common stuff
/////////////////////////////////////////////////////////////////////////////
/// create a new client object and fill defaults
function __construct ()
{
// per-client-object settings
$this->_host ="localhost";
$this->_port =9312;
$this->_path =false;
$this->_socket =false;
// per-query settings
$this->_offset =0;
$this->_limit =20;
$this->_mode = SPH_MATCH_ALL;
$this->_weights = array ();
$this->_sort = SPH_SORT_RELEVANCE;
$this->_sortby ="";
$this->_min_id =0;
$this->_max_id =0;
$this->_filters = array ();
$this->_groupby ="";
$this->_groupfunc = SPH_GROUPBY_DAY;
$this->_groupsort ="@group desc";
$this->_groupdistinct="";
$this->_maxmatches =1000;
$this->_cutoff =0;
$this->_retrycount =0;
$this->_retrydelay =0;
$this->_anchor = array ();
$this->_indexweights= array ();
$this->_ranker = SPH_RANK_PROXIMITY_BM25;
$this->_maxquerytime=0;
$this->_fieldweights= array();
$this->_overrides = array();
$this->_select ="*";
$this->_error ="";// per-reply fields (for single-query case)
$this->_warning ="";
$this->_connerror =false;
$this->_reqs = array ();// requests storage (for multi-query case)
$this->_mbenc ="";
$this->_arrayresult =false;
$this->_timeout =0;
}
function __destruct()
{
if( $this->_socket !==false)
fclose ( $this->_socket );
}
/// get last error message (string)
functionGetLastError()
{
return $this->_error;
}
/// get last warning message (string)
functionGetLastWarning()
{
return $this->_warning;
}
/// get last error flag (to tell network connection errors from searchd errors or broken responses)
functionIsConnectError()
{
return $this->_connerror;
}
/// set searchd host name (string) and port (integer)
functionSetServer( $host, $port =0)
{
assert( is_string($host));
if( $host[0]=='/')
{
$this->_path ='unix://'. $host;
return;
}
if( substr ( $host,0,7)=="unix://")
{
$this->_path = $host;
return;
}
assert( is_int($port));
$this->_host = $host;
$this->_port = $port;
$this->_path ='';
}
/// set server connection timeout (0 to remove)
functionSetConnectTimeout( $timeout )
{
assert( is_numeric($timeout));
$this->_timeout = $timeout;
}
function_Send( $handle, $data, $length )
{
if( feof($handle)|| fwrite ( $handle, $data, $length )!== $length )
{
$this->_error ='connection unexpectedly closed (timed out?)';
$this->_connerror =true;
returnfalse;
}
returntrue;
}
/////////////////////////////////////////////////////////////////////////////
/// enter mbstring workaround mode
function_MBPush()
{
$this->_mbenc ="";
if( ini_get ("mbstring.func_overload")&2)
{
$this->_mbenc = mb_internal_encoding();
mb_internal_encoding ("latin1");
}
}
/// leave mbstring workaround mode
function_MBPop()
{
if( $this->_mbenc )
mb_internal_encoding ( $this->_mbenc );
}
/// connect to searchd server
function_Connect()
{
if( $this->_socket!==false)
{
// we are in persistent connection mode, so we have a socket
// however, need to check whether it's still alive
if(!@feof( $this->_socket ))
return $this->_socket;
// force reopen
$this->_socket =false;
}
$errno =0;
$errstr ="";
$this->_connerror =false;
if( $this->_path )
{
$host = $this->_path;
$port =0;
}
else
{
$host = $this->_host;
$port = $this->_port;
}
if( $this->_timeout<=0)
$fp =@fsockopen( $host, $port, $errno, $errstr );
else
$fp =@fsockopen( $host, $port, $errno, $errstr, $this->_timeout );
if(!$fp )
{
if( $this->_path )
$location = $this->_path;
else
$location ="{$this->_host}:{$this->_port}";
$errstr = trim ( $errstr );
$this->_error ="connection to $location failed (errno=$errno, msg=$errstr)";
$this->_connerror =true;
returnfalse;
}
// send my version
// this is a subtle part. we must do it before (!) reading back from searchd.
// because otherwise under some conditions (reported on FreeBSD for instance)
// TCP stack could throttle write-write-read pattern because of Nagle.
if(!$this->_Send( $fp, pack ("N",1),4))
{
fclose ( $fp );
$this->_error ="failed to send client protocol version";
returnfalse;
}
// check version
list(,$v)= unpack ("N*", fread ( $fp,4));
$v =(int)$v;
if( $v<1)
{
fclose ( $fp );
$this->_error ="expected searchd protocol version 1+, got version '$v'";
returnfalse;
}
return $fp;
}
/// get and check response packet from searchd server
function_GetResponse( $fp, $client_ver )
{
$response ="";
$len =0;
$header = fread ( $fp,8);
if( strlen($header)==8)
{
list ( $status, $ver, $len )= array_values ( unpack ("n2a/Nb", $header ));
$left = $len;
while( $left>0&&!feof($fp))
{
$chunk = fread ( $fp, $left );
if( $chunk )
{
$response .= $chunk;
$left -= strlen($chunk);
}
}
}
if( $this->_socket ===false)
fclose ( $fp );
// check response
$read = strlen ( $response );
if(!$response || $read!=$len )
{
$this->_error = $len
?"failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
:"received zero-sized searchd response";
returnfalse;
}
// check status
if( $status==SEARCHD_WARNING )
{
list(,$wlen)= unpack ("N*", substr ( $response,0,4));
$this->_warning = substr ( $response,4, $wlen );
return substr ( $response,4+$wlen );
}
if( $status==SEARCHD_ERROR )
{
$this->_error ="searchd error: ". substr ( $response,4);
returnfalse;
}
if( $status==SEARCHD_RETRY )
{
$this->_error ="temporary searchd error: ". substr ( $response,4);
returnfalse;
}
if( $status!=SEARCHD_OK )
{
$this->_error ="unknown status code '$status'";
returnfalse;
}
// check version
if( $ver<$client_ver )
{
$this->_warning = sprintf ("searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
$ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff);
}
return $response;
}
/////////////////////////////////////////////////////////////////////////////
// searching
/////////////////////////////////////////////////////////////////////////////
/// set offset and count into result set,
/// and optionally set max-matches and cutoff limits
functionSetLimits( $offset, $limit, $max=0, $cutoff=0)
{
assert( is_int($offset));
assert( is_int($limit));
assert( $offset>=0);
assert( $limit>0);
assert( $max>=0);
$this->_offset = $offset;
$this->_limit = $limit;
if( $max>0)
$this->_maxmatches = $max;
if( $cutoff>0)
$this->_cutoff = $cutoff;
}
/// set maximum query time, in milliseconds, per-index
/// integer, 0 means "do not limit"
functionSetMaxQueryTime( $max )
{
assert( is_int($max));
assert( $max>=0);
$this->_maxquerytime = $max;
}
/// set matching mode
functionSetMatchMode( $mode )
{
assert( $mode==SPH_MATCH_ALL
|| $mode==SPH_MATCH_ANY
|| $mode==SPH_MATCH_PHRASE
|| $mode==SPH_MATCH_BOOLEAN
|| $mode==SPH_MATCH_EXTENDED
|| $mode==SPH_MATCH_FULLSCAN
|| $mode==SPH_MATCH_EXTENDED2 );
$this->_mode = $mode;
}
/// set ranking mode
functionSetRankingMode( $ranker )
{
assert( $ranker==SPH_RANK_PROXIMITY_BM25
|| $ranker==SPH_RANK_BM25
|| $ranker==SPH_RANK_NONE
|| $ranker==SPH_RANK_WORDCOUNT
|| $ranker==SPH_RANK_PROXIMITY );
$this->_ranker = $ranker;
}
/// set matches sorting mode
functionSetSortMode( $mode, $sortby="")
{
assert(
$mode==SPH_SORT_RELEVANCE ||
$mode==SPH_SORT_ATTR_DESC ||
$mode==SPH_SORT_ATTR_ASC ||
$mode==SPH_SORT_TIME_SEGMENTS ||
$mode==SPH_SORT_EXTENDED ||
$mode==SPH_SORT_EXPR );
assert( is_string($sortby));
assert( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0);
$this->_sort = $mode;
$this->_sortby = $sortby;
}
/// bind per-field weights by order
/// DEPRECATED; use SetFieldWeights() instead
functionSetWeights( $weights )
{
assert( is_array($weights));
foreach( $weights as $weight )
assert( is_int($weight));
$this->_weights = $weights;
}
/// bind per-field weights by name
functionSetFieldWeights( $weights )
{
assert( is_array($weights));
foreach( $weights as $name=>$weight )
{
assert( is_string($name));
assert( is_int($weight));
}
$this->_fieldweights = $weights;
}
/// bind per-index weights by name
functionSetIndexWeights( $weights )
{
assert( is_array($weights));
foreach( $weights as $index=>$weight )
{
assert( is_string($index));
assert( is_int($weight));
}
$this->_indexweights = $weights;
}
/// set IDs range to match
/// only match records if document ID is beetwen $min and $max (inclusive)
functionSetIDRange( $min, $max )
{
assert( is_numeric($min));
assert( is_numeric($max));
assert( $min<=$max );
$this->_min_id = $min;
$this->_max_id = $max;
}
/// set values set filter
/// only match records where $attribute value is in given set
functionSetFilter( $attribute, $values, $exclude=false)
{
assert( is_string($attribute));
assert( is_array($values));
assert( count($values));
if( is_array($values)&& count($values))
{
foreach( $values as $value )
assert( is_numeric($value));
$this->_filters[]= array ("type"=>SPH_FILTER_VALUES,"attr"=>$attribute,"exclude"=>$exclude,"values"=>$values );
}
}
/// set range filter
/// only match records if $attribute value is beetwen $min and $max (inclusive)
functionSetFilterRange( $attribute, $min, $max, $exclude=false)
{
assert( is_string($attribute));
assert( is_numeric($min));
assert( is_numeric($max));
assert( $min<=$max );
$this->_filters[]= array ("type"=>SPH_FILTER_RANGE,"attr"=>$attribute,"exclude"=>$exclude,"min"=>$min,"max"=>$max );
}
/// set float range filter
/// only match records if $attribute value is beetwen $min and $max (inclusive)
functionSetFilterFloatRange( $attribute, $min, $max, $exclude=false)
{
assert( is_string($attribute));
assert( is_float($min));
assert( is_float($max));
assert( $min<=$max );
$this->_filters[]= array ("type"=>SPH_FILTER_FLOATRANGE,"attr"=>$attribute,"exclude"=>$exclude,"min"=>$min,"max"=>$max );
}
/// setup anchor point for geosphere distance calculations
/// required to use @geodist in filters and sorting
/// latitude and longitude must be in radians
functionSetGeoAnchor( $attrlat, $attrlong, $lat, $long )
{
assert( is_string($attrlat));
assert( is_string($attrlong));
assert( is_float($lat));
assert( is_float($long));
$this->_anchor = array ("attrlat"=>$attrlat,"attrlong"=>$attrlong,"lat"=>$lat,"long"=>$long );
}
/// set grouping attribute and function
functionSetGroupBy( $attribute, $func, $groupsort="@group desc")
{
assert( is_string($attribute));
assert( is_string($groupsort));
assert( $func==SPH_GROUPBY_DAY
|| $func==SPH_GROUPBY_WEEK
|| $func==SPH_GROUPBY_MONTH
|| $func==SPH_GROUPBY_YEAR
|| $func==SPH_GROUPBY_ATTR
|| $func==SPH_GROUPBY_ATTRPAIR );
$this->_groupby = $attribute;
$this->_groupfunc = $func;
$this->_groupsort = $groupsort;
}
/// set count-distinct attribute for group-by queries
functionSetGroupDistinct( $attribute )
{
assert( is_string($attribute));
$this->_groupdistinct = $attribute;
}
/// set distributed retries count and delay
functionSetRetries( $count, $delay=0)
{
assert( is_int($count)&& $count>=0);
assert( is_int($delay)&& $delay>=0);
$this->_retrycount = $count;
$this->_retrydelay = $delay;
}
/// set result set format (hash or array; hash by default)
/// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
functionSetArrayResult( $arrayresult )
{
assert( is_bool($arrayresult));
$this->_arrayresult = $arrayresult;
}
/// set attribute values override
/// there can be only one override per attribute
/// $values must be a hash that maps document IDs to attribute values
functionSetOverride( $attrname, $attrtype, $values )
{
assert( is_string ( $attrname ));
assert( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT )));
assert( is_array ( $values ));
$this->_overrides[$attrname]= array ("attr"=>$attrname,"type"=>$attrtype,"values"=>$values );
}
/// set select-list (attributes or expressions), SQL-like syntax
functionSetSelect( $select )
{
assert( is_string ( $select ));
$this->_select = $select;
}
//////////////////////////////////////////////////////////////////////////////
/// clear all filters (for multi-queries)
functionResetFilters()
{
$this->_filters = array();
$this->_anchor = array();
}
/// clear groupby settings (for multi-queries)
functionResetGroupBy()
{
$this->_groupby ="";
$this->_groupfunc = SPH_GROUPBY_DAY;
$this->_groupsort ="@group desc";
$this->_groupdistinct="";
}
/// clear all attribute value overrides (for multi-queries)
functionResetOverrides()
{
$this->_overrides = array ();
}
//////////////////////////////////////////////////////////////////////////////
/// connect to searchd server, run given search query through given indexes,
/// and return the search results
functionQuery( $query, $index="*", $comment="")
{
assert( empty($this->_reqs));
$this->AddQuery( $query, $index, $comment );
$results = $this->RunQueries();
$this->_reqs = array ();// just in case it failed too early
if(!is_array($results))
returnfalse;// probably network error; error message should be already filled
$this->_error = $results[0]["error"];
$this->_warning = $results[0]["warning"];
if( $results[0]["status"]==SEARCHD_ERROR )
returnfalse;
else
return $results[0];
}
/// helper to pack floats in network byte order
function_PackFloat( $f )
{
$t1 = pack ("f", $f );// machine order
list(,$t2)= unpack ("L*", $t1 );// int in machine order
return pack ("N", $t2 );
}
/// add query to multi-query batch
/// returns index into results array from RunQueries() call
functionAddQuery( $query, $index="*", $comment="")
{
// mbstring workaround
$this->_MBPush();
// build request
$req = pack ("NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort );// mode and limits
$req .= pack ("N", strlen($this->_sortby)). $this->_sortby;
$req .= pack ("N", strlen($query)). $query;// query itself
$req .= pack ("N", count($this->_weights));// weights
foreach( $this->_weights as $weight )
$req .= pack ("N",(int)$weight );
$req .= pack ("N", strlen($index)). $index;// indexes
$req .= pack ("N",1);// id64 range marker
$req .= sphPackU64 ( $this->_min_id ). sphPackU64 ( $this->_max_id );// id64 range
// filters
$req .= pack ("N", count($this->_filters));
foreach( $this->_filters as $filter )
{
$req .= pack ("N", strlen($filter["attr"])). $filter["attr"];
$req .= pack ("N", $filter["type"]);
switch( $filter["type"])
{
case SPH_FILTER_VALUES:
$req .= pack ("N", count($filter["values"]));
foreach( $filter["values"]as $value )
$req .= sphPackI64 ( $value );
break;
case SPH_FILTER_RANGE:
$req .= sphPackI64 ( $filter["min"]). sphPackI64 ( $filter["max"]);
break;
case SPH_FILTER_FLOATRANGE:
$req .= $this->_PackFloat( $filter["min"]). $this->_PackFloat( $filter["max"]);
break;
default:
assert(0&&"internal error: unhandled filter type");
}
$req .= pack ("N", $filter["exclude"]);
}
// group-by clause, max-matches count, group-sort clause, cutoff count
$req .= pack ("NN", $this->_groupfunc, strlen($this->_groupby)). $this->_groupby;
$req .= pack ("N", $this->_maxmatches );
$req .= pack ("N", strlen($this->_groupsort)). $this->_groupsort;
$req .= pack ("NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
$req .= pack ("N", strlen($this->_groupdistinct)). $this->_groupdistinct;
// anchor point
if( empty($this->_anchor))
{
$req .= pack ("N",0);
}else
{
$a =& $this->_anchor;
$req .= pack ("N",1);
$req .= pack ("N", strlen($a["attrlat"])). $a["attrlat"];
$req .= pack ("N", strlen($a["attrlong"])). $a["attrlong"];
$req .= $this->_PackFloat( $a["lat"]). $this->_PackFloat( $a["long"]);
}
// per-index weights
$req .= pack ("N", count($this->_indexweights));
foreach( $this->_indexweights as $idx=>$weight )
$req .= pack ("N", strlen($idx)). $idx . pack ("N", $weight );
// max query time
$req .= pack ("N", $this->_maxquerytime );
// per-field weights
$req .= pack ("N", count($this->_fieldweights));
foreach( $this->_fieldweights as $field=>$weight )
$req .= pack ("N", strlen($field)). $field . pack ("N", $weight );
// comment
$req .= pack ("N", strlen($comment)). $comment;
// attribute overrides
$req .= pack ("N", count($this->_overrides));
foreach( $this->_overrides as $key => $entry )
{
$req .= pack ("N", strlen($entry["attr"])). $entry["attr"];
$req .= pack ("NN", $entry["type"], count($entry["values"]));
foreach( $entry["values"]as $id=>$val )
{
assert( is_numeric($id));
assert( is_numeric($val));
$req .= sphPackU64 ( $id );
switch( $entry["type"])
{
case SPH_ATTR_FLOAT: $req .= $this->_PackFloat( $val );break;
case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val );break;
default: $req .= pack ("N", $val );break;
}
}
}
// select-list
$req .= pack ("N", strlen($this->_select)). $this->_select;
// mbstring workaround
$this->_MBPop();
// store request to requests array
$this->_reqs[]= $req;
return count($this->_reqs)-1;
}
/// connect to searchd, run queries batch, and return an array of result sets
functionRunQueries()
{
if( empty($this->_reqs))
{
$this->_error ="no queries defined, issue AddQuery() first";
returnfalse;
}
// mbstring workaround
$this->_MBPush();
if(!( $fp = $this->_Connect()))
{
$this->_MBPop();
returnfalse;
}
// send query, get response
$nreqs = count($this->_reqs);
$req = join ("", $this->_reqs );
$len =4+strlen($req);
$req = pack ("nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ). $req;// add header
if(!( $this->_Send( $fp, $req, $len+8))||
!( $response = $this->_GetResponse( $fp, VER_COMMAND_SEARCH )))
{
$this->_MBPop();
returnfalse;
}
// query sent ok; we can reset reqs now
$this->_reqs = array ();
// parse and return response
return $this->_ParseSearchResponse( $response, $nreqs );
}
/// parse and return search query (or queries) response
function_ParseSearchResponse( $response, $nreqs )
{
$p =0;// current position
$max = strlen($response);// max position for checks, to protect against broken responses
$results = array ();
for( $ires=0; $ires<$nreqs && $p<$max; $ires++)
{
$results[]= array();
$result =& $results[$ires];
$result["error"]="";
$result["warning"]="";
// extract status
list(,$status)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$result["status"]= $status;
if( $status!=SEARCHD_OK )
{
list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$message = substr ( $response, $p, $len ); $p += $len;
if( $status==SEARCHD_WARNING )
{
$result["warning"]= $message;
}else
{
$result["error"]= $message;
continue;
}
}
// read schema
$fields = array ();
$attrs = array ();
list(,$nfields)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
while( $nfields-->0&& $p<$max )
{
list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$fields[]= substr ( $response, $p, $len ); $p += $len;
}
$result["fields"]= $fields;
list(,$nattrs)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
while( $nattrs-->0&& $p<$max )
{
list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$attr = substr ( $response, $p, $len ); $p += $len;
list(,$type)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$attrs[$attr]= $type;
}
$result["attrs"]= $attrs;
// read match count
list(,$count)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
list(,$id64)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
// read matches
$idx =-1;
while( $count-->0&& $p<$max )
{
// index into result array
$idx++;
// parse document id and weight
if( $id64 )
{
$doc = sphUnpackU64 ( substr ( $response, $p,8)); $p +=8;
list(,$weight)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
}
else
{
list ( $doc, $weight )= array_values ( unpack ("N*N*",
substr ( $response, $p,8)));
$p +=8;
$doc = sphFixUint($doc);
}
$weight = sprintf ("%u", $weight );
// create match entry
if( $this->_arrayresult )
$result["matches"][$idx]= array ("id"=>$doc,"weight"=>$weight );
else
$result["matches"][$doc]["weight"]= $weight;
// parse and create attributes
$attrvals = array ();
foreach( $attrs as $attr=>$type )
{
// handle 64bit ints
if( $type==SPH_ATTR_BIGINT )
{
$attrvals[$attr]= sphUnpackI64 ( substr ( $response, $p,8)); $p +=8;
continue;
}
// handle floats
if( $type==SPH_ATTR_FLOAT )
{
list(,$uval)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
list(,$fval)= unpack ("f*", pack ("L", $uval ));
$attrvals[$attr]= $fval;
continue;
}
// handle everything else as unsigned ints
list(,$val)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
if( $type & SPH_ATTR_MULTI )
{
$attrvals[$attr]= array ();
$nvalues = $val;
while( $nvalues-->0&& $p<$max )
{
list(,$val)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$attrvals[$attr][]= sphFixUint($val);
}
}else
{
$attrvals[$attr]= sphFixUint($val);
}
}
if( $this->_arrayresult )
$result["matches"][$idx]["attrs"]= $attrvals;
else
$result["matches"][$doc]["attrs"]= $attrvals;
}
list ( $total, $total_found, $msecs, $words )=
array_values ( unpack ("N*N*N*N*", substr ( $response, $p,16)));
$result["total"]= sprintf ("%u", $total );
$result["total_found"]= sprintf ("%u", $total_found );
$result["time"]= sprintf ("%.3f", $msecs/1000);
$p +=16;
while( $words-->0&& $p<$max )
{
list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$word = substr ( $response, $p, $len ); $p += $len;
list ( $docs, $hits )= array_values ( unpack ("N*N*", substr ( $response, $p,8))); $p +=8;
$result["words"][$word]= array (
"docs"=>sprintf ("%u", $docs ),
"hits"=>sprintf ("%u", $hits ));
}
}
$this->_MBPop();
return $results;
}
/////////////////////////////////////////////////////////////////////////////
// excerpts generation
/////////////////////////////////////////////////////////////////////////////
/// connect to searchd server, and generate exceprts (snippets)
/// of given documents for given query. returns false on failure,
/// an array of snippets on success
functionBuildExcerpts( $docs, $index, $words, $opts=array())
{
assert( is_array($docs));
assert( is_string($index));
assert( is_string($words));
assert( is_array($opts));
$this->_MBPush();
if(!( $fp = $this->_Connect()))
{
$this->_MBPop();
returnfalse;
}
/////////////////
// fixup options
/////////////////
if(!isset($opts["before_match"])) $opts["before_match"]="";
if(!isset($opts["after_match"])) $opts["after_match"]="";
if(!isset($opts["chunk_separator"])) $opts["chunk_separator"]=" ... ";
if(!isset($opts["limit"])) $opts["limit"]=256;
if(!isset($opts["around"])) $opts["around"]=5;
if(!isset($opts["exact_phrase"])) $opts["exact_phrase"]=false;
if(!isset($opts["single_passage"])) $opts["single_passage"]=false;
if(!isset($opts["use_boundaries"])) $opts["use_boundaries"]=false;
if(!isset($opts["weight_order"])) $opts["weight_order"]=false;
/////////////////
// build request
/////////////////
// v.1.0 req
$flags =1;// remove spaces
if( $opts["exact_phrase"]) $flags |=2;
if( $opts["single_passage"]) $flags |=4;
if( $opts["use_boundaries"]) $flags |=8;
if( $opts["weight_order"]) $flags |=16;
$req = pack ("NN",0, $flags );// mode=0, flags=$flags
$req .= pack ("N", strlen($index)). $index;// req index
$req .= pack ("N", strlen($words)). $words;// req words
// options
$req .= pack ("N", strlen($opts["before_match"])). $opts["before_match"];
$req .= pack ("N", strlen($opts["after_match"])). $opts["after_match"];
$req .= pack ("N", strlen($opts["chunk_separator"])). $opts["chunk_separator"];
$req .= pack ("N",(int)$opts["limit"]);
$req .= pack ("N",(int)$opts["around"]);
// documents
$req .= pack ("N", count($docs));
foreach( $docs as $doc )
{
assert( is_string($doc));
$req .= pack ("N", strlen($doc)). $doc;
}
////////////////////////////
// send query, get response
////////////////////////////
$len = strlen($req);
$req = pack ("nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ). $req;// add header
if(!( $this->_Send( $fp, $req, $len+8))||
!( $response = $this->_GetResponse( $fp, VER_COMMAND_EXCERPT )))
{
$this->_MBPop();
returnfalse;
}
//////////////////
// parse response
//////////////////
$pos =0;
$res = array ();
$rlen = strlen($response);
for( $i=0; $i<count($docs); $i++)
{
list(,$len)= unpack ("N*", substr ( $response, $pos,4));
$pos +=4;
if( $pos+$len > $rlen )
{
$this->_error ="incomplete reply";
$this->_MBPop();
returnfalse;
}
$res[]= $len ? substr ( $response, $pos, $len ):"";
$pos += $len;
}
$this->_MBPop();
return $res;
}
/////////////////////////////////////////////////////////////////////////////
// keyword generation
/////////////////////////////////////////////////////////////////////////////
/// connect to searchd server, and generate keyword list for a given query
/// returns false on failure,
/// an array of words on success
functionBuildKeywords( $query, $index, $hits )
{
assert( is_string($query));
assert( is_string($index));
assert( is_bool($hits));
$this->_MBPush();
if(!( $fp = $this->_Connect()))
{
$this->_MBPop();
returnfalse;
}
/////////////////
// build request
/////////////////
// v.1.0 req
$req = pack ("N", strlen($query)). $query;// req query
$req .= pack ("N", strlen($index)). $index;// req index
$req .= pack ("N",(int)$hits );
////////////////////////////
// send query, get response
////////////////////////////
$len = strlen($req);
$req = pack ("nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ). $req;// add header
if(!( $this->_Send( $fp, $req, $len+8))||
!( $response = $this->_GetResponse( $fp, VER_COMMAND_KEYWORDS )))
{
$this->_MBPop();
returnfalse;
}
//////////////////
// parse response
//////////////////
$pos =0;
$res = array ();
$rlen = strlen($response);
list(,$nwords)= unpack ("N*", substr ( $response, $pos,4));
$pos +=4;
for( $i=0; $i<$nwords; $i++)
{
list(,$len)= unpack ("N*", substr ( $response, $pos,4)); $pos +=4;
$tokenized = $len ? substr ( $response, $pos, $len ):"";
$pos += $len;
list(,$len)= unpack ("N*", substr ( $response, $pos,4)); $pos +=4;
$normalized = $len ? substr ( $response, $pos, $len ):"";
$pos += $len;
$res[]= array ("tokenized"=>$tokenized,"normalized"=>$normalized );
if( $hits )
{
list($ndocs,$nhits)= array_values ( unpack ("N*N*", substr ( $response, $pos,8)));
$pos +=8;
$res [$i]["docs"]= $ndocs;
$res [$i]["hits"]= $nhits;
}
if( $pos > $rlen )
{
$this->_error ="incomplete reply";
$this->_MBPop();
returnfalse;
}
}
$this->_MBPop();
return $res;
}
functionEscapeString( $string )
{
$from = array ('\','(',')','|','-','!','@','~','"','&','/','^','$','=');
$to = array ('\\','(',')','|','-','!','@','~','"','&','/','^','$','=');
return str_replace ( $from, $to, $string );
}
/////////////////////////////////////////////////////////////////////////////
// attribute updates
/////////////////////////////////////////////////////////////////////////////
/// batch update given attributes in given rows in given indexes
/// returns amount of updated documents (0 or more) on success, or -1 on failure
functionUpdateAttributes( $index, $attrs, $values, $mva=false)
{
// verify everything
assert( is_string($index));
assert( is_bool($mva));
assert( is_array($attrs));
foreach( $attrs as $attr )
assert( is_string($attr));
assert( is_array($values));
foreach( $values as $id=>$entry )
{
assert( is_numeric($id));
assert( is_array($entry));
assert( count($entry)==count($attrs));
foreach( $entry as $v )
{
if( $mva )
{
assert( is_array($v));
foreach( $v as $vv )
assert( is_int($vv));
}else
assert( is_int($v));
}
}
// build request
$req = pack ("N", strlen($index)). $index;
$req .= pack ("N", count($attrs));
foreach( $attrs as $attr )
{
$req .= pack ("N", strlen($attr)). $attr;
$req .= pack ("N", $mva ?1:0);
}
$req .= pack ("N", count($values));
foreach( $values as $id=>$entry )
{
$req .= sphPackU64 ( $id );
foreach( $entry as $v )
{
$req .= pack ("N", $mva ? count($v): $v );
if( $mva )
foreach( $v as $vv )
$req .= pack ("N", $vv );
}
}
// connect, send query, get response
if(!( $fp = $this->_Connect()))
return-1;
$len = strlen($req);
$req = pack ("nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ). $req;// add header
if(!$this->_Send( $fp, $req, $len+8))
return-1;
if(!( $response = $this->_GetResponse( $fp, VER_COMMAND_UPDATE )))
return-1;
// parse response
list(,$updated)= unpack ("N*", substr ( $response,0,4));
return $updated;
}
/////////////////////////////////////////////////////////////////////////////
// persistent connections
/////////////////////////////////////////////////////////////////////////////
functionOpen()
{
if( $this->_socket !==false)
{
$this->_error ='already connected';
returnfalse;
}
if(!$fp = $this->_Connect())
returnfalse;
// command, command version = 0, body length = 4, body = 1
$req = pack ("nnNN", SEARCHD_COMMAND_PERSIST,0,4,1);
if(!$this->_Send( $fp, $req,12))
returnfalse;
$this->_socket = $fp;
returntrue;
}
functionClose()
{
if( $this->_socket ===false)
{
$this->_error ='not connected';
returnfalse;
}
fclose ( $this->_socket );
$this->_socket =false;
returntrue;
}
//////////////////////////////////////////////////////////////////////////
// status
//////////////////////////////////////////////////////////////////////////
functionStatus()
{
$this->_MBPush();
if(!( $fp = $this->_Connect()))
{
$this->_MBPop();
returnfalse;
}
$req = pack ("nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS,4,1);// len=4, body=1
if(!( $this->_Send( $fp, $req,12))||
!( $response = $this->_GetResponse( $fp, VER_COMMAND_STATUS )))
{
$this->_MBPop();
returnfalse;
}
$res = substr ( $response,4);// just ignore length, error handling, etc
$p =0;
list ( $rows, $cols )= array_values ( unpack ("N*N*", substr ( $response, $p,8))); $p +=8;
$res = array();
for( $i=0; $i<$rows; $i++)
for( $j=0; $j<$cols; $j++)
{
list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
$res[$i][]= substr ( $response, $p, $len ); $p += $len;
}
$this->_MBPop();
return $res;
}
}
//
// $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
//
测试控制器(/application/controllers/search_page.php)
php if(!defined('BASEPATH'))die('No Access');
classSearch_pageextends CI_Controller{
publicfunction __construct(){
parent::__construct();
}
publicfunction search(){
$this->load->helper('url');
$this->load->view('search');
}
publicfunction result(){
header('content-type: text/html;charset=utf-8');
$words = $this->input->get('words');
if($words===NULL) $words ='';
$this->load->library('sphinx_client', NULL,'sphinx');
$index ="test1";
$opts = array
(
"before_match"=>'',
"after_match"=>"",
"chunk_separator"=>" ... ",
"limit"=>60,
"around"=>3,
);
$this->sphinx->SetServer('192.168.23.128',9312);
$this->sphinx->SetConnectTimeout(3);
$this->sphinx->SetArrayResult(TRUE);
$this->sphinx->SetMatchMode(SPH_MATCH_ANY);
$this->sphinx->SetLimits(0,20);
$res = $this->sphinx->Query($words,'test1');
if($res===FALSE){
var_dump($this->sphinx->GetLastError());
exit;
}
echo "关键词 {$words} ,找到约 {$res['total_found']} 结果,用时 {$res['time']}s";
echo '
';
if(array_key_exists('words', $res)&& is_array($res['words'])){
foreach($res['words']as $k => $v){
echo $k .' : '. $v['docs'].' - '. $v['hits'].'
';
}
}
echo '
';
$this->load->database();
$idarr = array();
if(array_key_exists('matches', $res)&& is_array($res['matches'])){
foreach($res['matches']as $v){
$idarr[]= $v['id'];
}
}
if(count($idarr)>0){
$this->db->from('shop_goods_info');
$this->db->select('pname,cretime');
$this->db->where_in('id', $idarr);
$result = $this->db->get()->result_array();
echo ''
;
$name_arr = array();
foreach($result as $k=>$v){
$name_arr[$k]= $v['pname'];
}
$name_arr = $this->sphinx->BuildExcerpts($name_arr, $index, $words, $opts);
foreach($result as $k=>$v){
echo '- '
. $name_arr[$k].'('. date('Y-m-d H:i:s', $v['cretime']).')
';}
echo '';
}
$this->sphinx->Close();
}
}
?>
搜索表单(/application/views/search.php)
http-equiv="content-type"content="text/html;charset=utf-8"/>
搜索
name="keywords"content="keywords"/>
name="description"content="description"/>
id="panel">php echo site_url(array('search_page','result'));?>">
type="text"id="words"name="words"value=""size="60"/>
type="submit"name="submit"value="搜索"/>