sphinx全文检索(整合ci框架)



加入sphinx类库(/application/libraries/sphinx_client.php)
  1. php
  2. //
  3. // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
  4. //
  5. //
  6. // Copyright (c) 2001-2008, Andrew Aksyonoff. All rights reserved.
  7. //
  8. // This program is free software; you can redistribute it and/or modify
  9. // it under the terms of the GNU General Public License. You should have
  10. // received a copy of the GPL license along with this program; if you
  11. // did not, you can find it at http://www.gnu.org/
  12. //
  13. /////////////////////////////////////////////////////////////////////////////
  14. // PHP version of Sphinx searchd client (PHP API)
  15. /////////////////////////////////////////////////////////////////////////////
  16. /// known searchd commands
  17. define ("SEARCHD_COMMAND_SEARCH",0);
  18. define ("SEARCHD_COMMAND_EXCERPT",1);
  19. define ("SEARCHD_COMMAND_UPDATE",2);
  20. define ("SEARCHD_COMMAND_KEYWORDS",3);
  21. define ("SEARCHD_COMMAND_PERSIST",4);
  22. define ("SEARCHD_COMMAND_STATUS",5);
  23. define ("SEARCHD_COMMAND_QUERY",6);
  24. /// current client-side command implementation versions
  25. define ("VER_COMMAND_SEARCH",0x116);
  26. define ("VER_COMMAND_EXCERPT",0x100);
  27. define ("VER_COMMAND_UPDATE",0x102);
  28. define ("VER_COMMAND_KEYWORDS",0x100);
  29. define ("VER_COMMAND_STATUS",0x100);
  30. define ("VER_COMMAND_QUERY",0x100);
  31. /// known searchd status codes
  32. define ("SEARCHD_OK",0);
  33. define ("SEARCHD_ERROR",1);
  34. define ("SEARCHD_RETRY",2);
  35. define ("SEARCHD_WARNING",3);
  36. /// known match modes
  37. define ("SPH_MATCH_ALL",0);
  38. define ("SPH_MATCH_ANY",1);
  39. define ("SPH_MATCH_PHRASE",2);
  40. define ("SPH_MATCH_BOOLEAN",3);
  41. define ("SPH_MATCH_EXTENDED",4);
  42. define ("SPH_MATCH_FULLSCAN",5);
  43. define ("SPH_MATCH_EXTENDED2",6);// extended engine V2 (TEMPORARY, WILL BE REMOVED)
  44. /// known ranking modes (ext2 only)
  45. define ("SPH_RANK_PROXIMITY_BM25",0);///< default mode, phrase proximity major factor and BM25 minor one
  46. define ("SPH_RANK_BM25",1);///< statistical mode, BM25 ranking only (faster but worse quality)
  47. define ("SPH_RANK_NONE",2);///< no ranking, all matches get a weight of 1
  48. define ("SPH_RANK_WORDCOUNT",3);///< simple word-count weighting, rank is a weighted sum of per-field keyword occurence counts
  49. define ("SPH_RANK_PROXIMITY",4);
  50. define ("SPH_RANK_MATCHANY",5);
  51. define ("SPH_RANK_FIELDMASK",6);
  52. /// known sort modes
  53. define ("SPH_SORT_RELEVANCE",0);
  54. define ("SPH_SORT_ATTR_DESC",1);
  55. define ("SPH_SORT_ATTR_ASC",2);
  56. define ("SPH_SORT_TIME_SEGMENTS",3);
  57. define ("SPH_SORT_EXTENDED",4);
  58. define ("SPH_SORT_EXPR",5);
  59. /// known filter types
  60. define ("SPH_FILTER_VALUES",0);
  61. define ("SPH_FILTER_RANGE",1);
  62. define ("SPH_FILTER_FLOATRANGE",2);
  63. /// known attribute types
  64. define ("SPH_ATTR_INTEGER",1);
  65. define ("SPH_ATTR_TIMESTAMP",2);
  66. define ("SPH_ATTR_ORDINAL",3);
  67. define ("SPH_ATTR_BOOL",4);
  68. define ("SPH_ATTR_FLOAT",5);
  69. define ("SPH_ATTR_BIGINT",6);
  70. define ("SPH_ATTR_MULTI",0x40000000);
  71. /// known grouping functions
  72. define ("SPH_GROUPBY_DAY",0);
  73. define ("SPH_GROUPBY_WEEK",1);
  74. define ("SPH_GROUPBY_MONTH",2);
  75. define ("SPH_GROUPBY_YEAR",3);
  76. define ("SPH_GROUPBY_ATTR",4);
  77. define ("SPH_GROUPBY_ATTRPAIR",5);
  78. // important properties of PHP's integers:
  79. // - always signed (one bit short of PHP_INT_SIZE)
  80. // - conversion from string to int is saturated
  81. // - float is double
  82. // - div converts arguments to floats
  83. // - mod converts arguments to ints
  84. // the packing code below works as follows:
  85. // - when we got an int, just pack it
  86. // if performance is a problem, this is the branch users should aim for
  87. //
  88. // - otherwise, we got a number in string form
  89. // this might be due to different reasons, but we assume that this is
  90. // because it didn't fit into PHP int
  91. //
  92. // - factor the string into high and low ints for packing
  93. // - if we have bcmath, then it is used
  94. // - if we don't, we have to do it manually (this is the fun part)
  95. //
  96. // - x64 branch does factoring using ints
  97. // - x32 (ab)uses floats, since we can't fit unsigned 32-bit number into an int
  98. //
  99. // unpacking routines are pretty much the same.
  100. // - return ints if we can
  101. // - otherwise format number into a string
  102. /// pack 64-bit signed
  103. function sphPackI64 ( $v )
  104. {
  105. assert( is_numeric($v));
  106. // x64
  107. if( PHP_INT_SIZE>=8)
  108. {
  109. $v =(int)$v;
  110. return pack ("NN", $v>>32, $v&0xFFFFFFFF);
  111. }
  112. // x32, int
  113. if( is_int($v))
  114. return pack ("NN", $v <0?-1:0, $v );
  115. // x32, bcmath
  116. if( function_exists("bcmul"))
  117. {
  118. if( bccomp ( $v,0)==-1)
  119. $v = bcadd ("18446744073709551616", $v );
  120. $h = bcdiv ( $v,"4294967296",0);
  121. $l = bcmod ( $v,"4294967296");
  122. return pack ("NN",(float)$h,(float)$l );// conversion to float is intentional; int would lose 31st bit
  123. }
  124. // x32, no-bcmath
  125. $p = max(0, strlen($v)-13);
  126. $lo = abs((float)substr($v, $p));
  127. $hi = abs((float)substr($v,0, $p));
  128. $m = $lo + $hi*1316134912.0;// (10 ^ 13) % (1 << 32) = 1316134912
  129. $q = floor($m/4294967296.0);
  130. $l = $m -($q*4294967296.0);
  131. $h = $hi*2328.0+ $q;// (10 ^ 13) / (1 << 32) = 2328
  132. if( $v<0)
  133. {
  134. if( $l==0)
  135. $h =4294967296.0- $h;
  136. else
  137. {
  138. $h =4294967295.0- $h;
  139. $l =4294967296.0- $l;
  140. }
  141. }
  142. return pack ("NN", $h, $l );
  143. }
  144. /// pack 64-bit unsigned
  145. function sphPackU64 ( $v )
  146. {
  147. assert( is_numeric($v));
  148. // x64
  149. if( PHP_INT_SIZE>=8)
  150. {
  151. assert( $v>=0);
  152. // x64, int
  153. if( is_int($v))
  154. return pack ("NN", $v>>32, $v&0xFFFFFFFF);
  155. // x64, bcmath
  156. if( function_exists("bcmul"))
  157. {
  158. $h = bcdiv ( $v,4294967296,0);
  159. $l = bcmod ( $v,4294967296);
  160. return pack ("NN", $h, $l );
  161. }
  162. // x64, no-bcmath
  163. $p = max (0, strlen($v)-13);
  164. $lo =(int)substr ( $v, $p );
  165. $hi =(int)substr ( $v,0, $p );
  166. $m = $lo + $hi*1316134912;
  167. $l = $m %4294967296;
  168. $h = $hi*2328+(int)($m/4294967296);
  169. return pack ("NN", $h, $l );
  170. }
  171. // x32, int
  172. if( is_int($v))
  173. return pack ("NN",0, $v );
  174. // x32, bcmath
  175. if( function_exists("bcmul"))
  176. {
  177. $h = bcdiv ( $v,"4294967296",0);
  178. $l = bcmod ( $v,"4294967296");
  179. return pack ("NN",(float)$h,(float)$l );// conversion to float is intentional; int would lose 31st bit
  180. }
  181. // x32, no-bcmath
  182. $p = max(0, strlen($v)-13);
  183. $lo =(float)substr($v, $p);
  184. $hi =(float)substr($v,0, $p);
  185. $m = $lo + $hi*1316134912.0;
  186. $q = floor($m /4294967296.0);
  187. $l = $m -($q *4294967296.0);
  188. $h = $hi*2328.0+ $q;
  189. return pack ("NN", $h, $l );
  190. }
  191. // unpack 64-bit unsigned
  192. function sphUnpackU64 ( $v )
  193. {
  194. list ( $hi, $lo )= array_values ( unpack ("N*N*", $v ));
  195. if( PHP_INT_SIZE>=8)
  196. {
  197. if( $hi<0) $hi +=(1<<32);// because php 5.2.2 to 5.2.5 is totally fucked up again
  198. if( $lo<0) $lo +=(1<<32);
  199. // x64, int
  200. if( $hi<=2147483647)
  201. return($hi<<32)+ $lo;
  202. // x64, bcmath
  203. if( function_exists("bcmul"))
  204. return bcadd ( $lo, bcmul ( $hi,"4294967296"));
  205. // x64, no-bcmath
  206. $C =100000;
  207. $h =((int)($hi / $C)<<32)+(int)($lo / $C);
  208. $l =(($hi % $C)<<32)+($lo % $C);
  209. if( $l>$C )
  210. {
  211. $h +=(int)($l / $C);
  212. $l = $l % $C;
  213. }
  214. if( $h==0)
  215. return $l;
  216. return sprintf ("%d%05d", $h, $l );
  217. }
  218. // x32, int
  219. if( $hi==0)
  220. {
  221. if( $lo>0)
  222. return $lo;
  223. return sprintf ("%u", $lo );
  224. }
  225. $hi = sprintf ("%u", $hi );
  226. $lo = sprintf ("%u", $lo );
  227. // x32, bcmath
  228. if( function_exists("bcmul"))
  229. return bcadd ( $lo, bcmul ( $hi,"4294967296"));
  230. // x32, no-bcmath
  231. $hi =(float)$hi;
  232. $lo =(float)$lo;
  233. $q = floor($hi/10000000.0);
  234. $r = $hi - $q*10000000.0;
  235. $m = $lo + $r*4967296.0;
  236. $mq = floor($m/10000000.0);
  237. $l = $m - $mq*10000000.0;
  238. $h = $q*4294967296.0+ $r*429.0+ $mq;
  239. $h = sprintf ("%.0f", $h );
  240. $l = sprintf ("%07.0f", $l );
  241. if( $h=="0")
  242. return sprintf("%.0f",(float)$l );
  243. return $h . $l;
  244. }
  245. // unpack 64-bit signed
  246. function sphUnpackI64 ( $v )
  247. {
  248. list ( $hi, $lo )= array_values ( unpack ("N*N*", $v ));
  249. // x64
  250. if( PHP_INT_SIZE>=8)
  251. {
  252. if( $hi<0) $hi +=(1<<32);// because php 5.2.2 to 5.2.5 is totally fucked up again
  253. if( $lo<0) $lo +=(1<<32);
  254. return($hi<<32)+ $lo;
  255. }
  256. // x32, int
  257. if( $hi==0)
  258. {
  259. if( $lo>0)
  260. return $lo;
  261. return sprintf ("%u", $lo );
  262. }
  263. // x32, int
  264. elseif ( $hi==-1)
  265. {
  266. if( $lo<0)
  267. return $lo;
  268. return sprintf ("%.0f", $lo -4294967296.0);
  269. }
  270. $neg ="";
  271. $c =0;
  272. if( $hi<0)
  273. {
  274. $hi =~$hi;
  275. $lo =~$lo;
  276. $c =1;
  277. $neg ="-";
  278. }
  279. $hi = sprintf ("%u", $hi );
  280. $lo = sprintf ("%u", $lo );
  281. // x32, bcmath
  282. if( function_exists("bcmul"))
  283. return $neg . bcadd ( bcadd ( $lo, bcmul ( $hi,"4294967296")), $c );
  284. // x32, no-bcmath
  285. $hi =(float)$hi;
  286. $lo =(float)$lo;
  287. $q = floor($hi/10000000.0);
  288. $r = $hi - $q*10000000.0;
  289. $m = $lo + $r*4967296.0;
  290. $mq = floor($m/10000000.0);
  291. $l = $m - $mq*10000000.0+ $c;
  292. $h = $q*4294967296.0+ $r*429.0+ $mq;
  293. if( $l==10000000)
  294. {
  295. $l =0;
  296. $h +=1;
  297. }
  298. $h = sprintf ("%.0f", $h );
  299. $l = sprintf ("%07.0f", $l );
  300. if( $h=="0")
  301. return $neg . sprintf("%.0f",(float)$l );
  302. return $neg . $h . $l;
  303. }
  304. function sphFixUint ( $value )
  305. {
  306. if( PHP_INT_SIZE>=8)
  307. {
  308. // x64 route, workaround broken unpack() in 5.2.2+
  309. if( $value<0) $value +=(1<<32);
  310. return $value;
  311. }
  312. else
  313. {
  314. // x32 route, workaround php signed/unsigned braindamage
  315. return sprintf ("%u", $value );
  316. }
  317. }
  318. /// sphinx searchd client class
  319. classSphinx_client
  320. {
  321. var $_host;///< searchd host (default is "localhost")
  322. var $_port;///< searchd port (default is 9312)
  323. var $_offset;///< how many records to seek from result-set start (default is 0)
  324. var $_limit;///< how many records to return from result-set starting at offset (default is 20)
  325. var $_mode;///< query matching mode (default is SPH_MATCH_ALL)
  326. var $_weights;///< per-field weights (default is 1 for all fields)
  327. var $_sort;///< match sorting mode (default is SPH_SORT_RELEVANCE)
  328. var $_sortby;///< attribute to sort by (defualt is "")
  329. var $_min_id;///< min ID to match (default is 0, which means no limit)
  330. var $_max_id;///< max ID to match (default is 0, which means no limit)
  331. var $_filters;///< search filters
  332. var $_groupby;///< group-by attribute name
  333. var $_groupfunc;///< group-by function (to pre-process group-by attribute value with)
  334. var $_groupsort;///< group-by sorting clause (to sort groups in result set with)
  335. var $_groupdistinct;///< group-by count-distinct attribute
  336. var $_maxmatches;///< max matches to retrieve
  337. var $_cutoff;///< cutoff to stop searching at (default is 0)
  338. var $_retrycount;///< distributed retries count
  339. var $_retrydelay;///< distributed retries delay
  340. var $_anchor;///< geographical anchor point
  341. var $_indexweights;///< per-index weights
  342. var $_ranker;///< ranking mode (default is SPH_RANK_PROXIMITY_BM25)
  343. var $_maxquerytime;///< max query time, milliseconds (default is 0, do not limit)
  344. var $_fieldweights;///< per-field-name weights
  345. var $_overrides;///< per-query attribute values overrides
  346. var $_select;///< select-list (attributes or expressions, with optional aliases)
  347. var $_error;///< last error message
  348. var $_warning;///< last warning message
  349. var $_connerror;///< connection error vs remote error flag
  350. var $_reqs;///< requests array for multi-query
  351. var $_mbenc;///< stored mbstring encoding
  352. var $_arrayresult;///< whether $result["matches"] should be a hash or an array
  353. var $_timeout;///< connect timeout
  354. /////////////////////////////////////////////////////////////////////////////
  355. // common stuff
  356. /////////////////////////////////////////////////////////////////////////////
  357. /// create a new client object and fill defaults
  358. function __construct ()
  359. {
  360. // per-client-object settings
  361. $this->_host ="localhost";
  362. $this->_port =9312;
  363. $this->_path =false;
  364. $this->_socket =false;
  365. // per-query settings
  366. $this->_offset =0;
  367. $this->_limit =20;
  368. $this->_mode = SPH_MATCH_ALL;
  369. $this->_weights = array ();
  370. $this->_sort = SPH_SORT_RELEVANCE;
  371. $this->_sortby ="";
  372. $this->_min_id =0;
  373. $this->_max_id =0;
  374. $this->_filters = array ();
  375. $this->_groupby ="";
  376. $this->_groupfunc = SPH_GROUPBY_DAY;
  377. $this->_groupsort ="@group desc";
  378. $this->_groupdistinct="";
  379. $this->_maxmatches =1000;
  380. $this->_cutoff =0;
  381. $this->_retrycount =0;
  382. $this->_retrydelay =0;
  383. $this->_anchor = array ();
  384. $this->_indexweights= array ();
  385. $this->_ranker = SPH_RANK_PROXIMITY_BM25;
  386. $this->_maxquerytime=0;
  387. $this->_fieldweights= array();
  388. $this->_overrides = array();
  389. $this->_select ="*";
  390. $this->_error ="";// per-reply fields (for single-query case)
  391. $this->_warning ="";
  392. $this->_connerror =false;
  393. $this->_reqs = array ();// requests storage (for multi-query case)
  394. $this->_mbenc ="";
  395. $this->_arrayresult =false;
  396. $this->_timeout =0;
  397. }
  398. function __destruct()
  399. {
  400. if( $this->_socket !==false)
  401. fclose ( $this->_socket );
  402. }
  403. /// get last error message (string)
  404. functionGetLastError()
  405. {
  406. return $this->_error;
  407. }
  408. /// get last warning message (string)
  409. functionGetLastWarning()
  410. {
  411. return $this->_warning;
  412. }
  413. /// get last error flag (to tell network connection errors from searchd errors or broken responses)
  414. functionIsConnectError()
  415. {
  416. return $this->_connerror;
  417. }
  418. /// set searchd host name (string) and port (integer)
  419. functionSetServer( $host, $port =0)
  420. {
  421. assert( is_string($host));
  422. if( $host[0]=='/')
  423. {
  424. $this->_path ='unix://'. $host;
  425. return;
  426. }
  427. if( substr ( $host,0,7)=="unix://")
  428. {
  429. $this->_path = $host;
  430. return;
  431. }
  432. assert( is_int($port));
  433. $this->_host = $host;
  434. $this->_port = $port;
  435. $this->_path ='';
  436. }
  437. /// set server connection timeout (0 to remove)
  438. functionSetConnectTimeout( $timeout )
  439. {
  440. assert( is_numeric($timeout));
  441. $this->_timeout = $timeout;
  442. }
  443. function_Send( $handle, $data, $length )
  444. {
  445. if( feof($handle)|| fwrite ( $handle, $data, $length )!== $length )
  446. {
  447. $this->_error ='connection unexpectedly closed (timed out?)';
  448. $this->_connerror =true;
  449. returnfalse;
  450. }
  451. returntrue;
  452. }
  453. /////////////////////////////////////////////////////////////////////////////
  454. /// enter mbstring workaround mode
  455. function_MBPush()
  456. {
  457. $this->_mbenc ="";
  458. if( ini_get ("mbstring.func_overload")&2)
  459. {
  460. $this->_mbenc = mb_internal_encoding();
  461. mb_internal_encoding ("latin1");
  462. }
  463. }
  464. /// leave mbstring workaround mode
  465. function_MBPop()
  466. {
  467. if( $this->_mbenc )
  468. mb_internal_encoding ( $this->_mbenc );
  469. }
  470. /// connect to searchd server
  471. function_Connect()
  472. {
  473. if( $this->_socket!==false)
  474. {
  475. // we are in persistent connection mode, so we have a socket
  476. // however, need to check whether it's still alive
  477. if(!@feof( $this->_socket ))
  478. return $this->_socket;
  479. // force reopen
  480. $this->_socket =false;
  481. }
  482. $errno =0;
  483. $errstr ="";
  484. $this->_connerror =false;
  485. if( $this->_path )
  486. {
  487. $host = $this->_path;
  488. $port =0;
  489. }
  490. else
  491. {
  492. $host = $this->_host;
  493. $port = $this->_port;
  494. }
  495. if( $this->_timeout<=0)
  496. $fp =@fsockopen( $host, $port, $errno, $errstr );
  497. else
  498. $fp =@fsockopen( $host, $port, $errno, $errstr, $this->_timeout );
  499. if(!$fp )
  500. {
  501. if( $this->_path )
  502. $location = $this->_path;
  503. else
  504. $location ="{$this->_host}:{$this->_port}";
  505. $errstr = trim ( $errstr );
  506. $this->_error ="connection to $location failed (errno=$errno, msg=$errstr)";
  507. $this->_connerror =true;
  508. returnfalse;
  509. }
  510. // send my version
  511. // this is a subtle part. we must do it before (!) reading back from searchd.
  512. // because otherwise under some conditions (reported on FreeBSD for instance)
  513. // TCP stack could throttle write-write-read pattern because of Nagle.
  514. if(!$this->_Send( $fp, pack ("N",1),4))
  515. {
  516. fclose ( $fp );
  517. $this->_error ="failed to send client protocol version";
  518. returnfalse;
  519. }
  520. // check version
  521. list(,$v)= unpack ("N*", fread ( $fp,4));
  522. $v =(int)$v;
  523. if( $v<1)
  524. {
  525. fclose ( $fp );
  526. $this->_error ="expected searchd protocol version 1+, got version '$v'";
  527. returnfalse;
  528. }
  529. return $fp;
  530. }
  531. /// get and check response packet from searchd server
  532. function_GetResponse( $fp, $client_ver )
  533. {
  534. $response ="";
  535. $len =0;
  536. $header = fread ( $fp,8);
  537. if( strlen($header)==8)
  538. {
  539. list ( $status, $ver, $len )= array_values ( unpack ("n2a/Nb", $header ));
  540. $left = $len;
  541. while( $left>0&&!feof($fp))
  542. {
  543. $chunk = fread ( $fp, $left );
  544. if( $chunk )
  545. {
  546. $response .= $chunk;
  547. $left -= strlen($chunk);
  548. }
  549. }
  550. }
  551. if( $this->_socket ===false)
  552. fclose ( $fp );
  553. // check response
  554. $read = strlen ( $response );
  555. if(!$response || $read!=$len )
  556. {
  557. $this->_error = $len
  558. ?"failed to read searchd response (status=$status, ver=$ver, len=$len, read=$read)"
  559. :"received zero-sized searchd response";
  560. returnfalse;
  561. }
  562. // check status
  563. if( $status==SEARCHD_WARNING )
  564. {
  565. list(,$wlen)= unpack ("N*", substr ( $response,0,4));
  566. $this->_warning = substr ( $response,4, $wlen );
  567. return substr ( $response,4+$wlen );
  568. }
  569. if( $status==SEARCHD_ERROR )
  570. {
  571. $this->_error ="searchd error: ". substr ( $response,4);
  572. returnfalse;
  573. }
  574. if( $status==SEARCHD_RETRY )
  575. {
  576. $this->_error ="temporary searchd error: ". substr ( $response,4);
  577. returnfalse;
  578. }
  579. if( $status!=SEARCHD_OK )
  580. {
  581. $this->_error ="unknown status code '$status'";
  582. returnfalse;
  583. }
  584. // check version
  585. if( $ver<$client_ver )
  586. {
  587. $this->_warning = sprintf ("searchd command v.%d.%d older than client's v.%d.%d, some options might not work",
  588. $ver>>8, $ver&0xff, $client_ver>>8, $client_ver&0xff);
  589. }
  590. return $response;
  591. }
  592. /////////////////////////////////////////////////////////////////////////////
  593. // searching
  594. /////////////////////////////////////////////////////////////////////////////
  595. /// set offset and count into result set,
  596. /// and optionally set max-matches and cutoff limits
  597. functionSetLimits( $offset, $limit, $max=0, $cutoff=0)
  598. {
  599. assert( is_int($offset));
  600. assert( is_int($limit));
  601. assert( $offset>=0);
  602. assert( $limit>0);
  603. assert( $max>=0);
  604. $this->_offset = $offset;
  605. $this->_limit = $limit;
  606. if( $max>0)
  607. $this->_maxmatches = $max;
  608. if( $cutoff>0)
  609. $this->_cutoff = $cutoff;
  610. }
  611. /// set maximum query time, in milliseconds, per-index
  612. /// integer, 0 means "do not limit"
  613. functionSetMaxQueryTime( $max )
  614. {
  615. assert( is_int($max));
  616. assert( $max>=0);
  617. $this->_maxquerytime = $max;
  618. }
  619. /// set matching mode
  620. functionSetMatchMode( $mode )
  621. {
  622. assert( $mode==SPH_MATCH_ALL
  623. || $mode==SPH_MATCH_ANY
  624. || $mode==SPH_MATCH_PHRASE
  625. || $mode==SPH_MATCH_BOOLEAN
  626. || $mode==SPH_MATCH_EXTENDED
  627. || $mode==SPH_MATCH_FULLSCAN
  628. || $mode==SPH_MATCH_EXTENDED2 );
  629. $this->_mode = $mode;
  630. }
  631. /// set ranking mode
  632. functionSetRankingMode( $ranker )
  633. {
  634. assert( $ranker==SPH_RANK_PROXIMITY_BM25
  635. || $ranker==SPH_RANK_BM25
  636. || $ranker==SPH_RANK_NONE
  637. || $ranker==SPH_RANK_WORDCOUNT
  638. || $ranker==SPH_RANK_PROXIMITY );
  639. $this->_ranker = $ranker;
  640. }
  641. /// set matches sorting mode
  642. functionSetSortMode( $mode, $sortby="")
  643. {
  644. assert(
  645. $mode==SPH_SORT_RELEVANCE ||
  646. $mode==SPH_SORT_ATTR_DESC ||
  647. $mode==SPH_SORT_ATTR_ASC ||
  648. $mode==SPH_SORT_TIME_SEGMENTS ||
  649. $mode==SPH_SORT_EXTENDED ||
  650. $mode==SPH_SORT_EXPR );
  651. assert( is_string($sortby));
  652. assert( $mode==SPH_SORT_RELEVANCE || strlen($sortby)>0);
  653. $this->_sort = $mode;
  654. $this->_sortby = $sortby;
  655. }
  656. /// bind per-field weights by order
  657. /// DEPRECATED; use SetFieldWeights() instead
  658. functionSetWeights( $weights )
  659. {
  660. assert( is_array($weights));
  661. foreach( $weights as $weight )
  662. assert( is_int($weight));
  663. $this->_weights = $weights;
  664. }
  665. /// bind per-field weights by name
  666. functionSetFieldWeights( $weights )
  667. {
  668. assert( is_array($weights));
  669. foreach( $weights as $name=>$weight )
  670. {
  671. assert( is_string($name));
  672. assert( is_int($weight));
  673. }
  674. $this->_fieldweights = $weights;
  675. }
  676. /// bind per-index weights by name
  677. functionSetIndexWeights( $weights )
  678. {
  679. assert( is_array($weights));
  680. foreach( $weights as $index=>$weight )
  681. {
  682. assert( is_string($index));
  683. assert( is_int($weight));
  684. }
  685. $this->_indexweights = $weights;
  686. }
  687. /// set IDs range to match
  688. /// only match records if document ID is beetwen $min and $max (inclusive)
  689. functionSetIDRange( $min, $max )
  690. {
  691. assert( is_numeric($min));
  692. assert( is_numeric($max));
  693. assert( $min<=$max );
  694. $this->_min_id = $min;
  695. $this->_max_id = $max;
  696. }
  697. /// set values set filter
  698. /// only match records where $attribute value is in given set
  699. functionSetFilter( $attribute, $values, $exclude=false)
  700. {
  701. assert( is_string($attribute));
  702. assert( is_array($values));
  703. assert( count($values));
  704. if( is_array($values)&& count($values))
  705. {
  706. foreach( $values as $value )
  707. assert( is_numeric($value));
  708. $this->_filters[]= array ("type"=>SPH_FILTER_VALUES,"attr"=>$attribute,"exclude"=>$exclude,"values"=>$values );
  709. }
  710. }
  711. /// set range filter
  712. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  713. functionSetFilterRange( $attribute, $min, $max, $exclude=false)
  714. {
  715. assert( is_string($attribute));
  716. assert( is_numeric($min));
  717. assert( is_numeric($max));
  718. assert( $min<=$max );
  719. $this->_filters[]= array ("type"=>SPH_FILTER_RANGE,"attr"=>$attribute,"exclude"=>$exclude,"min"=>$min,"max"=>$max );
  720. }
  721. /// set float range filter
  722. /// only match records if $attribute value is beetwen $min and $max (inclusive)
  723. functionSetFilterFloatRange( $attribute, $min, $max, $exclude=false)
  724. {
  725. assert( is_string($attribute));
  726. assert( is_float($min));
  727. assert( is_float($max));
  728. assert( $min<=$max );
  729. $this->_filters[]= array ("type"=>SPH_FILTER_FLOATRANGE,"attr"=>$attribute,"exclude"=>$exclude,"min"=>$min,"max"=>$max );
  730. }
  731. /// setup anchor point for geosphere distance calculations
  732. /// required to use @geodist in filters and sorting
  733. /// latitude and longitude must be in radians
  734. functionSetGeoAnchor( $attrlat, $attrlong, $lat, $long )
  735. {
  736. assert( is_string($attrlat));
  737. assert( is_string($attrlong));
  738. assert( is_float($lat));
  739. assert( is_float($long));
  740. $this->_anchor = array ("attrlat"=>$attrlat,"attrlong"=>$attrlong,"lat"=>$lat,"long"=>$long );
  741. }
  742. /// set grouping attribute and function
  743. functionSetGroupBy( $attribute, $func, $groupsort="@group desc")
  744. {
  745. assert( is_string($attribute));
  746. assert( is_string($groupsort));
  747. assert( $func==SPH_GROUPBY_DAY
  748. || $func==SPH_GROUPBY_WEEK
  749. || $func==SPH_GROUPBY_MONTH
  750. || $func==SPH_GROUPBY_YEAR
  751. || $func==SPH_GROUPBY_ATTR
  752. || $func==SPH_GROUPBY_ATTRPAIR );
  753. $this->_groupby = $attribute;
  754. $this->_groupfunc = $func;
  755. $this->_groupsort = $groupsort;
  756. }
  757. /// set count-distinct attribute for group-by queries
  758. functionSetGroupDistinct( $attribute )
  759. {
  760. assert( is_string($attribute));
  761. $this->_groupdistinct = $attribute;
  762. }
  763. /// set distributed retries count and delay
  764. functionSetRetries( $count, $delay=0)
  765. {
  766. assert( is_int($count)&& $count>=0);
  767. assert( is_int($delay)&& $delay>=0);
  768. $this->_retrycount = $count;
  769. $this->_retrydelay = $delay;
  770. }
  771. /// set result set format (hash or array; hash by default)
  772. /// PHP specific; needed for group-by-MVA result sets that may contain duplicate IDs
  773. functionSetArrayResult( $arrayresult )
  774. {
  775. assert( is_bool($arrayresult));
  776. $this->_arrayresult = $arrayresult;
  777. }
  778. /// set attribute values override
  779. /// there can be only one override per attribute
  780. /// $values must be a hash that maps document IDs to attribute values
  781. functionSetOverride( $attrname, $attrtype, $values )
  782. {
  783. assert( is_string ( $attrname ));
  784. assert( in_array ( $attrtype, array ( SPH_ATTR_INTEGER, SPH_ATTR_TIMESTAMP, SPH_ATTR_BOOL, SPH_ATTR_FLOAT, SPH_ATTR_BIGINT )));
  785. assert( is_array ( $values ));
  786. $this->_overrides[$attrname]= array ("attr"=>$attrname,"type"=>$attrtype,"values"=>$values );
  787. }
  788. /// set select-list (attributes or expressions), SQL-like syntax
  789. functionSetSelect( $select )
  790. {
  791. assert( is_string ( $select ));
  792. $this->_select = $select;
  793. }
  794. //////////////////////////////////////////////////////////////////////////////
  795. /// clear all filters (for multi-queries)
  796. functionResetFilters()
  797. {
  798. $this->_filters = array();
  799. $this->_anchor = array();
  800. }
  801. /// clear groupby settings (for multi-queries)
  802. functionResetGroupBy()
  803. {
  804. $this->_groupby ="";
  805. $this->_groupfunc = SPH_GROUPBY_DAY;
  806. $this->_groupsort ="@group desc";
  807. $this->_groupdistinct="";
  808. }
  809. /// clear all attribute value overrides (for multi-queries)
  810. functionResetOverrides()
  811. {
  812. $this->_overrides = array ();
  813. }
  814. //////////////////////////////////////////////////////////////////////////////
  815. /// connect to searchd server, run given search query through given indexes,
  816. /// and return the search results
  817. functionQuery( $query, $index="*", $comment="")
  818. {
  819. assert( empty($this->_reqs));
  820. $this->AddQuery( $query, $index, $comment );
  821. $results = $this->RunQueries();
  822. $this->_reqs = array ();// just in case it failed too early
  823. if(!is_array($results))
  824. returnfalse;// probably network error; error message should be already filled
  825. $this->_error = $results[0]["error"];
  826. $this->_warning = $results[0]["warning"];
  827. if( $results[0]["status"]==SEARCHD_ERROR )
  828. returnfalse;
  829. else
  830. return $results[0];
  831. }
  832. /// helper to pack floats in network byte order
  833. function_PackFloat( $f )
  834. {
  835. $t1 = pack ("f", $f );// machine order
  836. list(,$t2)= unpack ("L*", $t1 );// int in machine order
  837. return pack ("N", $t2 );
  838. }
  839. /// add query to multi-query batch
  840. /// returns index into results array from RunQueries() call
  841. functionAddQuery( $query, $index="*", $comment="")
  842. {
  843. // mbstring workaround
  844. $this->_MBPush();
  845. // build request
  846. $req = pack ("NNNNN", $this->_offset, $this->_limit, $this->_mode, $this->_ranker, $this->_sort );// mode and limits
  847. $req .= pack ("N", strlen($this->_sortby)). $this->_sortby;
  848. $req .= pack ("N", strlen($query)). $query;// query itself
  849. $req .= pack ("N", count($this->_weights));// weights
  850. foreach( $this->_weights as $weight )
  851. $req .= pack ("N",(int)$weight );
  852. $req .= pack ("N", strlen($index)). $index;// indexes
  853. $req .= pack ("N",1);// id64 range marker
  854. $req .= sphPackU64 ( $this->_min_id ). sphPackU64 ( $this->_max_id );// id64 range
  855. // filters
  856. $req .= pack ("N", count($this->_filters));
  857. foreach( $this->_filters as $filter )
  858. {
  859. $req .= pack ("N", strlen($filter["attr"])). $filter["attr"];
  860. $req .= pack ("N", $filter["type"]);
  861. switch( $filter["type"])
  862. {
  863. case SPH_FILTER_VALUES:
  864. $req .= pack ("N", count($filter["values"]));
  865. foreach( $filter["values"]as $value )
  866. $req .= sphPackI64 ( $value );
  867. break;
  868. case SPH_FILTER_RANGE:
  869. $req .= sphPackI64 ( $filter["min"]). sphPackI64 ( $filter["max"]);
  870. break;
  871. case SPH_FILTER_FLOATRANGE:
  872. $req .= $this->_PackFloat( $filter["min"]). $this->_PackFloat( $filter["max"]);
  873. break;
  874. default:
  875. assert(0&&"internal error: unhandled filter type");
  876. }
  877. $req .= pack ("N", $filter["exclude"]);
  878. }
  879. // group-by clause, max-matches count, group-sort clause, cutoff count
  880. $req .= pack ("NN", $this->_groupfunc, strlen($this->_groupby)). $this->_groupby;
  881. $req .= pack ("N", $this->_maxmatches );
  882. $req .= pack ("N", strlen($this->_groupsort)). $this->_groupsort;
  883. $req .= pack ("NNN", $this->_cutoff, $this->_retrycount, $this->_retrydelay );
  884. $req .= pack ("N", strlen($this->_groupdistinct)). $this->_groupdistinct;
  885. // anchor point
  886. if( empty($this->_anchor))
  887. {
  888. $req .= pack ("N",0);
  889. }else
  890. {
  891. $a =& $this->_anchor;
  892. $req .= pack ("N",1);
  893. $req .= pack ("N", strlen($a["attrlat"])). $a["attrlat"];
  894. $req .= pack ("N", strlen($a["attrlong"])). $a["attrlong"];
  895. $req .= $this->_PackFloat( $a["lat"]). $this->_PackFloat( $a["long"]);
  896. }
  897. // per-index weights
  898. $req .= pack ("N", count($this->_indexweights));
  899. foreach( $this->_indexweights as $idx=>$weight )
  900. $req .= pack ("N", strlen($idx)). $idx . pack ("N", $weight );
  901. // max query time
  902. $req .= pack ("N", $this->_maxquerytime );
  903. // per-field weights
  904. $req .= pack ("N", count($this->_fieldweights));
  905. foreach( $this->_fieldweights as $field=>$weight )
  906. $req .= pack ("N", strlen($field)). $field . pack ("N", $weight );
  907. // comment
  908. $req .= pack ("N", strlen($comment)). $comment;
  909. // attribute overrides
  910. $req .= pack ("N", count($this->_overrides));
  911. foreach( $this->_overrides as $key => $entry )
  912. {
  913. $req .= pack ("N", strlen($entry["attr"])). $entry["attr"];
  914. $req .= pack ("NN", $entry["type"], count($entry["values"]));
  915. foreach( $entry["values"]as $id=>$val )
  916. {
  917. assert( is_numeric($id));
  918. assert( is_numeric($val));
  919. $req .= sphPackU64 ( $id );
  920. switch( $entry["type"])
  921. {
  922. case SPH_ATTR_FLOAT: $req .= $this->_PackFloat( $val );break;
  923. case SPH_ATTR_BIGINT: $req .= sphPackI64 ( $val );break;
  924. default: $req .= pack ("N", $val );break;
  925. }
  926. }
  927. }
  928. // select-list
  929. $req .= pack ("N", strlen($this->_select)). $this->_select;
  930. // mbstring workaround
  931. $this->_MBPop();
  932. // store request to requests array
  933. $this->_reqs[]= $req;
  934. return count($this->_reqs)-1;
  935. }
  936. /// connect to searchd, run queries batch, and return an array of result sets
  937. functionRunQueries()
  938. {
  939. if( empty($this->_reqs))
  940. {
  941. $this->_error ="no queries defined, issue AddQuery() first";
  942. returnfalse;
  943. }
  944. // mbstring workaround
  945. $this->_MBPush();
  946. if(!( $fp = $this->_Connect()))
  947. {
  948. $this->_MBPop();
  949. returnfalse;
  950. }
  951. // send query, get response
  952. $nreqs = count($this->_reqs);
  953. $req = join ("", $this->_reqs );
  954. $len =4+strlen($req);
  955. $req = pack ("nnNN", SEARCHD_COMMAND_SEARCH, VER_COMMAND_SEARCH, $len, $nreqs ). $req;// add header
  956. if(!( $this->_Send( $fp, $req, $len+8))||
  957. !( $response = $this->_GetResponse( $fp, VER_COMMAND_SEARCH )))
  958. {
  959. $this->_MBPop();
  960. returnfalse;
  961. }
  962. // query sent ok; we can reset reqs now
  963. $this->_reqs = array ();
  964. // parse and return response
  965. return $this->_ParseSearchResponse( $response, $nreqs );
  966. }
  967. /// parse and return search query (or queries) response
  968. function_ParseSearchResponse( $response, $nreqs )
  969. {
  970. $p =0;// current position
  971. $max = strlen($response);// max position for checks, to protect against broken responses
  972. $results = array ();
  973. for( $ires=0; $ires<$nreqs && $p<$max; $ires++)
  974. {
  975. $results[]= array();
  976. $result =& $results[$ires];
  977. $result["error"]="";
  978. $result["warning"]="";
  979. // extract status
  980. list(,$status)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  981. $result["status"]= $status;
  982. if( $status!=SEARCHD_OK )
  983. {
  984. list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  985. $message = substr ( $response, $p, $len ); $p += $len;
  986. if( $status==SEARCHD_WARNING )
  987. {
  988. $result["warning"]= $message;
  989. }else
  990. {
  991. $result["error"]= $message;
  992. continue;
  993. }
  994. }
  995. // read schema
  996. $fields = array ();
  997. $attrs = array ();
  998. list(,$nfields)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  999. while( $nfields-->0&& $p<$max )
  1000. {
  1001. list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1002. $fields[]= substr ( $response, $p, $len ); $p += $len;
  1003. }
  1004. $result["fields"]= $fields;
  1005. list(,$nattrs)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1006. while( $nattrs-->0&& $p<$max )
  1007. {
  1008. list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1009. $attr = substr ( $response, $p, $len ); $p += $len;
  1010. list(,$type)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1011. $attrs[$attr]= $type;
  1012. }
  1013. $result["attrs"]= $attrs;
  1014. // read match count
  1015. list(,$count)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1016. list(,$id64)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1017. // read matches
  1018. $idx =-1;
  1019. while( $count-->0&& $p<$max )
  1020. {
  1021. // index into result array
  1022. $idx++;
  1023. // parse document id and weight
  1024. if( $id64 )
  1025. {
  1026. $doc = sphUnpackU64 ( substr ( $response, $p,8)); $p +=8;
  1027. list(,$weight)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1028. }
  1029. else
  1030. {
  1031. list ( $doc, $weight )= array_values ( unpack ("N*N*",
  1032. substr ( $response, $p,8)));
  1033. $p +=8;
  1034. $doc = sphFixUint($doc);
  1035. }
  1036. $weight = sprintf ("%u", $weight );
  1037. // create match entry
  1038. if( $this->_arrayresult )
  1039. $result["matches"][$idx]= array ("id"=>$doc,"weight"=>$weight );
  1040. else
  1041. $result["matches"][$doc]["weight"]= $weight;
  1042. // parse and create attributes
  1043. $attrvals = array ();
  1044. foreach( $attrs as $attr=>$type )
  1045. {
  1046. // handle 64bit ints
  1047. if( $type==SPH_ATTR_BIGINT )
  1048. {
  1049. $attrvals[$attr]= sphUnpackI64 ( substr ( $response, $p,8)); $p +=8;
  1050. continue;
  1051. }
  1052. // handle floats
  1053. if( $type==SPH_ATTR_FLOAT )
  1054. {
  1055. list(,$uval)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1056. list(,$fval)= unpack ("f*", pack ("L", $uval ));
  1057. $attrvals[$attr]= $fval;
  1058. continue;
  1059. }
  1060. // handle everything else as unsigned ints
  1061. list(,$val)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1062. if( $type & SPH_ATTR_MULTI )
  1063. {
  1064. $attrvals[$attr]= array ();
  1065. $nvalues = $val;
  1066. while( $nvalues-->0&& $p<$max )
  1067. {
  1068. list(,$val)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1069. $attrvals[$attr][]= sphFixUint($val);
  1070. }
  1071. }else
  1072. {
  1073. $attrvals[$attr]= sphFixUint($val);
  1074. }
  1075. }
  1076. if( $this->_arrayresult )
  1077. $result["matches"][$idx]["attrs"]= $attrvals;
  1078. else
  1079. $result["matches"][$doc]["attrs"]= $attrvals;
  1080. }
  1081. list ( $total, $total_found, $msecs, $words )=
  1082. array_values ( unpack ("N*N*N*N*", substr ( $response, $p,16)));
  1083. $result["total"]= sprintf ("%u", $total );
  1084. $result["total_found"]= sprintf ("%u", $total_found );
  1085. $result["time"]= sprintf ("%.3f", $msecs/1000);
  1086. $p +=16;
  1087. while( $words-->0&& $p<$max )
  1088. {
  1089. list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1090. $word = substr ( $response, $p, $len ); $p += $len;
  1091. list ( $docs, $hits )= array_values ( unpack ("N*N*", substr ( $response, $p,8))); $p +=8;
  1092. $result["words"][$word]= array (
  1093. "docs"=>sprintf ("%u", $docs ),
  1094. "hits"=>sprintf ("%u", $hits ));
  1095. }
  1096. }
  1097. $this->_MBPop();
  1098. return $results;
  1099. }
  1100. /////////////////////////////////////////////////////////////////////////////
  1101. // excerpts generation
  1102. /////////////////////////////////////////////////////////////////////////////
  1103. /// connect to searchd server, and generate exceprts (snippets)
  1104. /// of given documents for given query. returns false on failure,
  1105. /// an array of snippets on success
  1106. functionBuildExcerpts( $docs, $index, $words, $opts=array())
  1107. {
  1108. assert( is_array($docs));
  1109. assert( is_string($index));
  1110. assert( is_string($words));
  1111. assert( is_array($opts));
  1112. $this->_MBPush();
  1113. if(!( $fp = $this->_Connect()))
  1114. {
  1115. $this->_MBPop();
  1116. returnfalse;
  1117. }
  1118. /////////////////
  1119. // fixup options
  1120. /////////////////
  1121. if(!isset($opts["before_match"])) $opts["before_match"]="";
  1122. if(!isset($opts["after_match"])) $opts["after_match"]="";
  1123. if(!isset($opts["chunk_separator"])) $opts["chunk_separator"]=" ... ";
  1124. if(!isset($opts["limit"])) $opts["limit"]=256;
  1125. if(!isset($opts["around"])) $opts["around"]=5;
  1126. if(!isset($opts["exact_phrase"])) $opts["exact_phrase"]=false;
  1127. if(!isset($opts["single_passage"])) $opts["single_passage"]=false;
  1128. if(!isset($opts["use_boundaries"])) $opts["use_boundaries"]=false;
  1129. if(!isset($opts["weight_order"])) $opts["weight_order"]=false;
  1130. /////////////////
  1131. // build request
  1132. /////////////////
  1133. // v.1.0 req
  1134. $flags =1;// remove spaces
  1135. if( $opts["exact_phrase"]) $flags |=2;
  1136. if( $opts["single_passage"]) $flags |=4;
  1137. if( $opts["use_boundaries"]) $flags |=8;
  1138. if( $opts["weight_order"]) $flags |=16;
  1139. $req = pack ("NN",0, $flags );// mode=0, flags=$flags
  1140. $req .= pack ("N", strlen($index)). $index;// req index
  1141. $req .= pack ("N", strlen($words)). $words;// req words
  1142. // options
  1143. $req .= pack ("N", strlen($opts["before_match"])). $opts["before_match"];
  1144. $req .= pack ("N", strlen($opts["after_match"])). $opts["after_match"];
  1145. $req .= pack ("N", strlen($opts["chunk_separator"])). $opts["chunk_separator"];
  1146. $req .= pack ("N",(int)$opts["limit"]);
  1147. $req .= pack ("N",(int)$opts["around"]);
  1148. // documents
  1149. $req .= pack ("N", count($docs));
  1150. foreach( $docs as $doc )
  1151. {
  1152. assert( is_string($doc));
  1153. $req .= pack ("N", strlen($doc)). $doc;
  1154. }
  1155. ////////////////////////////
  1156. // send query, get response
  1157. ////////////////////////////
  1158. $len = strlen($req);
  1159. $req = pack ("nnN", SEARCHD_COMMAND_EXCERPT, VER_COMMAND_EXCERPT, $len ). $req;// add header
  1160. if(!( $this->_Send( $fp, $req, $len+8))||
  1161. !( $response = $this->_GetResponse( $fp, VER_COMMAND_EXCERPT )))
  1162. {
  1163. $this->_MBPop();
  1164. returnfalse;
  1165. }
  1166. //////////////////
  1167. // parse response
  1168. //////////////////
  1169. $pos =0;
  1170. $res = array ();
  1171. $rlen = strlen($response);
  1172. for( $i=0; $i<count($docs); $i++)
  1173. {
  1174. list(,$len)= unpack ("N*", substr ( $response, $pos,4));
  1175. $pos +=4;
  1176. if( $pos+$len > $rlen )
  1177. {
  1178. $this->_error ="incomplete reply";
  1179. $this->_MBPop();
  1180. returnfalse;
  1181. }
  1182. $res[]= $len ? substr ( $response, $pos, $len ):"";
  1183. $pos += $len;
  1184. }
  1185. $this->_MBPop();
  1186. return $res;
  1187. }
  1188. /////////////////////////////////////////////////////////////////////////////
  1189. // keyword generation
  1190. /////////////////////////////////////////////////////////////////////////////
  1191. /// connect to searchd server, and generate keyword list for a given query
  1192. /// returns false on failure,
  1193. /// an array of words on success
  1194. functionBuildKeywords( $query, $index, $hits )
  1195. {
  1196. assert( is_string($query));
  1197. assert( is_string($index));
  1198. assert( is_bool($hits));
  1199. $this->_MBPush();
  1200. if(!( $fp = $this->_Connect()))
  1201. {
  1202. $this->_MBPop();
  1203. returnfalse;
  1204. }
  1205. /////////////////
  1206. // build request
  1207. /////////////////
  1208. // v.1.0 req
  1209. $req = pack ("N", strlen($query)). $query;// req query
  1210. $req .= pack ("N", strlen($index)). $index;// req index
  1211. $req .= pack ("N",(int)$hits );
  1212. ////////////////////////////
  1213. // send query, get response
  1214. ////////////////////////////
  1215. $len = strlen($req);
  1216. $req = pack ("nnN", SEARCHD_COMMAND_KEYWORDS, VER_COMMAND_KEYWORDS, $len ). $req;// add header
  1217. if(!( $this->_Send( $fp, $req, $len+8))||
  1218. !( $response = $this->_GetResponse( $fp, VER_COMMAND_KEYWORDS )))
  1219. {
  1220. $this->_MBPop();
  1221. returnfalse;
  1222. }
  1223. //////////////////
  1224. // parse response
  1225. //////////////////
  1226. $pos =0;
  1227. $res = array ();
  1228. $rlen = strlen($response);
  1229. list(,$nwords)= unpack ("N*", substr ( $response, $pos,4));
  1230. $pos +=4;
  1231. for( $i=0; $i<$nwords; $i++)
  1232. {
  1233. list(,$len)= unpack ("N*", substr ( $response, $pos,4)); $pos +=4;
  1234. $tokenized = $len ? substr ( $response, $pos, $len ):"";
  1235. $pos += $len;
  1236. list(,$len)= unpack ("N*", substr ( $response, $pos,4)); $pos +=4;
  1237. $normalized = $len ? substr ( $response, $pos, $len ):"";
  1238. $pos += $len;
  1239. $res[]= array ("tokenized"=>$tokenized,"normalized"=>$normalized );
  1240. if( $hits )
  1241. {
  1242. list($ndocs,$nhits)= array_values ( unpack ("N*N*", substr ( $response, $pos,8)));
  1243. $pos +=8;
  1244. $res [$i]["docs"]= $ndocs;
  1245. $res [$i]["hits"]= $nhits;
  1246. }
  1247. if( $pos > $rlen )
  1248. {
  1249. $this->_error ="incomplete reply";
  1250. $this->_MBPop();
  1251. returnfalse;
  1252. }
  1253. }
  1254. $this->_MBPop();
  1255. return $res;
  1256. }
  1257. functionEscapeString( $string )
  1258. {
  1259. $from = array ('\','(',')','|','-','!','@','~','"','&','/','^','$','=');
  1260. $to = array ('\\','(',')','|','-','!','@','~','"','&','/','^','$','=');
  1261. return str_replace ( $from, $to, $string );
  1262. }
  1263. /////////////////////////////////////////////////////////////////////////////
  1264. // attribute updates
  1265. /////////////////////////////////////////////////////////////////////////////
  1266. /// batch update given attributes in given rows in given indexes
  1267. /// returns amount of updated documents (0 or more) on success, or -1 on failure
  1268. functionUpdateAttributes( $index, $attrs, $values, $mva=false)
  1269. {
  1270. // verify everything
  1271. assert( is_string($index));
  1272. assert( is_bool($mva));
  1273. assert( is_array($attrs));
  1274. foreach( $attrs as $attr )
  1275. assert( is_string($attr));
  1276. assert( is_array($values));
  1277. foreach( $values as $id=>$entry )
  1278. {
  1279. assert( is_numeric($id));
  1280. assert( is_array($entry));
  1281. assert( count($entry)==count($attrs));
  1282. foreach( $entry as $v )
  1283. {
  1284. if( $mva )
  1285. {
  1286. assert( is_array($v));
  1287. foreach( $v as $vv )
  1288. assert( is_int($vv));
  1289. }else
  1290. assert( is_int($v));
  1291. }
  1292. }
  1293. // build request
  1294. $req = pack ("N", strlen($index)). $index;
  1295. $req .= pack ("N", count($attrs));
  1296. foreach( $attrs as $attr )
  1297. {
  1298. $req .= pack ("N", strlen($attr)). $attr;
  1299. $req .= pack ("N", $mva ?1:0);
  1300. }
  1301. $req .= pack ("N", count($values));
  1302. foreach( $values as $id=>$entry )
  1303. {
  1304. $req .= sphPackU64 ( $id );
  1305. foreach( $entry as $v )
  1306. {
  1307. $req .= pack ("N", $mva ? count($v): $v );
  1308. if( $mva )
  1309. foreach( $v as $vv )
  1310. $req .= pack ("N", $vv );
  1311. }
  1312. }
  1313. // connect, send query, get response
  1314. if(!( $fp = $this->_Connect()))
  1315. return-1;
  1316. $len = strlen($req);
  1317. $req = pack ("nnN", SEARCHD_COMMAND_UPDATE, VER_COMMAND_UPDATE, $len ). $req;// add header
  1318. if(!$this->_Send( $fp, $req, $len+8))
  1319. return-1;
  1320. if(!( $response = $this->_GetResponse( $fp, VER_COMMAND_UPDATE )))
  1321. return-1;
  1322. // parse response
  1323. list(,$updated)= unpack ("N*", substr ( $response,0,4));
  1324. return $updated;
  1325. }
  1326. /////////////////////////////////////////////////////////////////////////////
  1327. // persistent connections
  1328. /////////////////////////////////////////////////////////////////////////////
  1329. functionOpen()
  1330. {
  1331. if( $this->_socket !==false)
  1332. {
  1333. $this->_error ='already connected';
  1334. returnfalse;
  1335. }
  1336. if(!$fp = $this->_Connect())
  1337. returnfalse;
  1338. // command, command version = 0, body length = 4, body = 1
  1339. $req = pack ("nnNN", SEARCHD_COMMAND_PERSIST,0,4,1);
  1340. if(!$this->_Send( $fp, $req,12))
  1341. returnfalse;
  1342. $this->_socket = $fp;
  1343. returntrue;
  1344. }
  1345. functionClose()
  1346. {
  1347. if( $this->_socket ===false)
  1348. {
  1349. $this->_error ='not connected';
  1350. returnfalse;
  1351. }
  1352. fclose ( $this->_socket );
  1353. $this->_socket =false;
  1354. returntrue;
  1355. }
  1356. //////////////////////////////////////////////////////////////////////////
  1357. // status
  1358. //////////////////////////////////////////////////////////////////////////
  1359. functionStatus()
  1360. {
  1361. $this->_MBPush();
  1362. if(!( $fp = $this->_Connect()))
  1363. {
  1364. $this->_MBPop();
  1365. returnfalse;
  1366. }
  1367. $req = pack ("nnNN", SEARCHD_COMMAND_STATUS, VER_COMMAND_STATUS,4,1);// len=4, body=1
  1368. if(!( $this->_Send( $fp, $req,12))||
  1369. !( $response = $this->_GetResponse( $fp, VER_COMMAND_STATUS )))
  1370. {
  1371. $this->_MBPop();
  1372. returnfalse;
  1373. }
  1374. $res = substr ( $response,4);// just ignore length, error handling, etc
  1375. $p =0;
  1376. list ( $rows, $cols )= array_values ( unpack ("N*N*", substr ( $response, $p,8))); $p +=8;
  1377. $res = array();
  1378. for( $i=0; $i<$rows; $i++)
  1379. for( $j=0; $j<$cols; $j++)
  1380. {
  1381. list(,$len)= unpack ("N*", substr ( $response, $p,4)); $p +=4;
  1382. $res[$i][]= substr ( $response, $p, $len ); $p += $len;
  1383. }
  1384. $this->_MBPop();
  1385. return $res;
  1386. }
  1387. }
  1388. //
  1389. // $Id: sphinxapi.php 2055 2009-11-06 23:09:58Z shodan $
  1390. //

测试控制器(/application/controllers/search_page.php)
  1. php if(!defined('BASEPATH'))die('No Access');
  2. classSearch_pageextends CI_Controller{
  3. publicfunction __construct(){
  4. parent::__construct();
  5. }
  6. publicfunction search(){
  7. $this->load->helper('url');
  8. $this->load->view('search');
  9. }
  10. publicfunction result(){
  11. header('content-type: text/html;charset=utf-8');
  12. $words = $this->input->get('words');
  13. if($words===NULL) $words ='';
  14. $this->load->library('sphinx_client', NULL,'sphinx');
  15. $index ="test1";
  16. $opts = array
  17. (
  18. "before_match"=>'',
  19. "after_match"=>"",
  20. "chunk_separator"=>" ... ",
  21. "limit"=>60,
  22. "around"=>3,
  23. );
  24. $this->sphinx->SetServer('192.168.23.128',9312);
  25. $this->sphinx->SetConnectTimeout(3);
  26. $this->sphinx->SetArrayResult(TRUE);
  27. $this->sphinx->SetMatchMode(SPH_MATCH_ANY);
  28. $this->sphinx->SetLimits(0,20);
  29. $res = $this->sphinx->Query($words,'test1');
  30. if($res===FALSE){
  31. var_dump($this->sphinx->GetLastError());
  32. exit;
  33. }
  34. echo "关键词 {$words} ,找到约 {$res['total_found']} 结果,用时 {$res['time']}s";
  35. echo '


    '
    ;
  36. if(array_key_exists('words', $res)&& is_array($res['words'])){
  37. foreach($res['words']as $k => $v){
  38. echo $k .' : '. $v['docs'].' - '. $v['hits'].'
    '
    ;
  39. }
  40. }
  41. echo '


    '
    ;
  42. $this->load->database();
  43. $idarr = array();
  44. if(array_key_exists('matches', $res)&& is_array($res['matches'])){
  45. foreach($res['matches']as $v){
  46. $idarr[]= $v['id'];
  47. }
  48. }
  49. if(count($idarr)>0){
  50. $this->db->from('shop_goods_info');
  51. $this->db->select('pname,cretime');
  52. $this->db->where_in('id', $idarr);
  53. $result = $this->db->get()->result_array();
  54. echo '
      ';
    • $name_arr = array();
    • foreach($result as $k=>$v){
    • $name_arr[$k]= $v['pname'];
    • }
    • $name_arr = $this->sphinx->BuildExcerpts($name_arr, $index, $words, $opts);
    • foreach($result as $k=>$v){
    • echo '
    • '. $name_arr[$k].'('. date('Y-m-d H:i:s', $v['cretime']).')
    • ';
    • }
    • echo '
    ';
  55. }
  56. $this->sphinx->Close();
  57. }
  58. }
  59. ?>


搜索表单(/application/views/search.php)
  1. http-equiv="content-type"content="text/html;charset=utf-8"/>
  2. </span><span class="pln">搜索</span><span class="tag">
  3. name="keywords"content="keywords"/>
  4. name="description"content="description"/>
  5. type="text/css">
  6. #panel {
  7. margin:20px;
  8. }
  9. id="panel">
  10. php echo site_url(array('search_page','result'));?>">
  11. for="words">关键词:
  12. type="text"id="words"name="words"value=""size="60"/>
  13. type="submit"name="submit"value="搜索"/>