当前位置:首页 > 开发 > Web前端 > 前端 > 正文

全文检索lucene

发表于: 2014-08-27   作者:chen_sheng_lin   来源:转载   浏览次数:
摘要: 一、下载lucene4.7的jar包: lucene-analyzers-common-4.7.0.jar lucene-analyzers-smartcn-4.7.0.jar lucene-core-4.7.0.jar lucene-facet-4.7.0.jar lucene-highlighter-4.7.0.jar lucene-queries-4.7.0.jar lucen
一、下载lucene4.7的jar包:
lucene-analyzers-common-4.7.0.jar
lucene-analyzers-smartcn-4.7.0.jar
lucene-core-4.7.0.jar
lucene-facet-4.7.0.jar
lucene-highlighter-4.7.0.jar
lucene-queries-4.7.0.jar
lucene-queryparser-4.7.0.jar

把以上jar包导入项目中

二、创建索引
*************************************************1.创建索引配置****************************************
因为创建索引是针对表的,所以定义配置文件,配置需要创建索引的SQL
index.xml:

<?xml version='1.0' encoding='UTF-8'?>
<indexs>
  <index>
    <name>riskRule</name>
    <all>
      <![CDATA[
        select ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
        where remove_flag = 0
        ORDER BY ID
      ]]>
    </all>
    <add>
      <![CDATA[
        SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
        WHERE remove_flag = 0
        and  ID > {?#ID#}
        AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
      ]]>
    </add>
    <update>
      <![CDATA[
        SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
        WHERE remove_flag = 0
        and ID < {?#ID#}
        AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
      ]]>
    </update>
    <delete>
      <![CDATA[
        SELECT ID,NAME,BODY,DOCUMENT_TYPE from ARMS.T_RISK_RULES
        WHERE remove_flag = 1
        AND ID < {?#ID#}
        AND UPDATE_TIMESTAMP > {?#UPDATE_TIME#}
      ]]>
    </delete>
    <blob>BODY:DOCUMENT_TYPE</blob>
  </index>
    <index>
        <name>riskProblem</name>
        <all>
            <![CDATA[
          SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
          WHERE REMOVE_FLAG = 0
          ORDER BY ID
        ]]>
        </all>
        <add>
            <![CDATA[
          SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
          WHERE REMOVE_FLAG = 0
          AND ID > {?#ID#}
          AND UPDATE_TIME > {?#UPDATE_TIME#}
        ]]>
        </add>
        <update>
            <![CDATA[
          SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
          WHERE REMOVE_FLAG = 0
          AND ID < {?#ID#}
          AND UPDATE_TIME > {?#UPDATE_TIME#}
        ]]>
        </update>
        <delete>
        <![CDATA[
          SELECT ID,TITLE,CONTENTS,PUNISH,CRITERION_CONTENT FROM ARMS.T_RISK
          WHERE REMOVE_FLAG = 1
          AND ID < {?#ID#}
          AND UPDATE_TIME > {?#UPDATE_TIME#}
        ]]>
        </delete>
        <blob></blob>
    </index>
</indexs>


读取解析index.xml的工具类
package com.lhzq.ibms.lucene.util;

import com.htsc.abms.lucene.model.Index;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
import java.util.ArrayList;
import java.util.List;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-10
* Time: 上午9:28
* 索引配置解析
*/
public class IndexConfigMgr
{
    /**
     *  自身对象用来做单例
     */
    private static IndexConfigMgr indexConfigMgr;

    /**
     * 用来做线程锁
     */
    private static Object obj = new Object();

    /**
     *  要索引的表的配置
     */
    private List<Index> tableConfigs;

    /**
     * 日志
     */
    private static Logger logger= LoggerFactory.getLogger(IndexConfigMgr.class);

    /**
     * 索引配置文件
     */
    private static final String INDEX_DIR = "index/index.xml";

    /**
     * index节点的名称
     */
    private static final String INDEX_NODE_NAME = "index";

    /**
     * name节点的名称
     */
    private static final String NAME_NODE_NAME = "name";

    /**
     * all节点的名称
     */
    private static final String ALL_NODE_NAME = "all";

    /**
     * add节点的名称
     */
    private static final String ADD_NODE_NAME = "add";

    /**
     * update节点的名称
     */
    private static final String UPDATE_NODE_NAME = "update";

    /**
     * delete节点的名称
     */
    private static final String DELETE_NODE_NAME = "delete";

    /**
     * blob节点的名称
     */
    private static final String BLOB_NODE_NAME = "blob";

    /**
     * 私有的构造方法
     */
    private  IndexConfigMgr()
    {
        // 创建配置容器
        tableConfigs = new ArrayList<Index>();
    }

    /**
     * 获取实例对象
     * @return
     */
    public static IndexConfigMgr getInstance()
    {
         synchronized (obj)
         {
             if(null == indexConfigMgr)
             {
                 indexConfigMgr = new IndexConfigMgr();
             }
         }

        // 加载配置文件
        indexConfigMgr.load();

        return  indexConfigMgr;
    }

    /**
     * 加载配置文件
     */
    private void load()
    {
        // 拿到索引配置文件的路径
        String path = WorkSpaceCenter.getClassPath(INDEX_DIR);

        Document doc = null;
        try
        {
            doc = getDocumentByPath(path);
            loadIndexes(doc);
        } catch (Exception e) {
            logger.error("加载index.xml文件失败",e);
        }
    }

    /**
     *  根据xml文件路径拿到dom对象
     * @param path 文件路径
     * @return
     * @throws javax.xml.parsers.ParserConfigurationException
     * @throws java.io.IOException
     * @throws org.xml.sax.SAXException
     */
    private Document getDocumentByPath(String path) throws Exception
    {
       // 获取DOM解析器工厂对象
       DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

       // 获取DOM解析器对象
       DocumentBuilder db = dbf.newDocumentBuilder();

        File file=new File(path);

        // 加载要解析xml文档
       Document doc = db.parse(file);

      return doc;
    }


    /**
     *  加载索引配置
     * @param doc
     * @return
     * @throws javax.xml.parsers.ParserConfigurationException
     * @throws java.io.IOException
     * @throws org.xml.sax.SAXException
     */
    private void loadIndexes(Document doc)
    {
       NodeList indexNodes = doc.getElementsByTagName(INDEX_NODE_NAME);

       Node node = null;
       tableConfigs.clear();
       for (int i = 0; i < indexNodes.getLength() ; i++)
       {
           node = indexNodes.item(i);
           if(!node.hasChildNodes())
           {
               continue;
           }

           tableConfigs.add(newIndex(node));
       }
    }


    /**
     * 封装一个index
     * @param parent
     * @return
     */
    private Index newIndex(Node parent)
    {
        Node node= null;
        Index index = null;

        String name = null;
        String all = null;
        String add = null;
        String update = null;
        String delete = null;
        String blob = null;

        NodeList nodes = parent.getChildNodes();
        for(int i = 0; i < nodes.getLength(); i++)
        {
             node =  nodes.item(i);

            if(!node.hasChildNodes())
            {
                continue;
            }

            if(node.getNodeName().equals(NAME_NODE_NAME))
            {
                name = node.getTextContent().trim();
            }

            if(node.getNodeName().equals(ALL_NODE_NAME))
            {
                all = node.getTextContent().trim();
            }

            if(node.getNodeName().equals(ADD_NODE_NAME))
            {
                add = node.getTextContent().trim();
            }

            if(node.getNodeName().equals(UPDATE_NODE_NAME))
            {
                update = node.getTextContent().trim();
            }

            if(node.getNodeName().equals(DELETE_NODE_NAME))
            {
                delete = node.getTextContent().trim();
            }

            if(node.getNodeName().equals(BLOB_NODE_NAME))
            {
                blob = node.getTextContent().trim();
            }

            index = new Index(name,all,add,update,delete,blob);
        }

        return index;
    }


    /**
     * 返回结果数据
     * @return
     */
    public List<Index> getTableConfigs()
    {
        return tableConfigs;
    }
}



*************************************************2.定时创建索引****************************************
如果创建索引的数据量较大,创建索引需要花很长的时间,建议创建定时任务创建索引

由于第一次是索引的全部创建,之后就可以更新索引(新增,更新,删除)即可不用每次全部创建,
所以要记录索引的最大ID和上一次更新时间

1>////////////////////////创建索引的定时任务CreateIndexJob.java:

package com.lhzq.ibms.lucene.job;

import com.htsc.abms.lucene.model.Index;
import com.htsc.abms.lucene.service.CreateIndexService;
import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.ibms.lucene.util.*;
import com.lhzq.leap.core.utils.DateUtils;
import com.lhzq.leap.core.utils.FileUtility;
import com.lhzq.leap.core.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.stereotype.Service;

import java.io.File;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* 创建索引的定时任务
*/
@Service("createIndexJob")
public class CreateIndexJob
{
    /**
     * 日志
     */
   private static Logger logger = LoggerFactory.getLogger(CreateIndexJob.class);

    /**
     * 示例用户业务处理
     */
   @Autowired
   private CreateIndexService indexService;

    /**
     * 创建索引工具
     */
   private BuildIndex buildIndex;

    /**
     * 记录最大的Id和更新索引的时间
     */
   private IndexLog indexLog;

    /**
     *  全部加载索引
     */
   public String loadIndex()
   {
      StringBuffer message = new StringBuffer();
      message.append("["+DateUtils.now()+"]:开始创建索引***********!\r\n");
      logger.info("开始创建索引***************************");
      long begin=System.currentTimeMillis();
      List<Index> indexes = IndexConfigMgr.getInstance().getTableConfigs();
      try
      {
         // 先删除目录
         String indexPath = Configuration.getLuceneIndexDir();
         FileUtility.deleteDir(indexPath);
         message.append("删除index目录成功!\r\n");
         logger.info("删除index目录成功**********");

         // 创建日志文件
         CreateLog.init();
         BigDecimal maxId = null;
         for(Index index : indexes)
         {
             message.append("开始创建["+index.getName()+"]模块的索引!\r\n");
             logger.info("开始创建["+index.getName()+"]索引======");

             // 设置索引参数
             buildIndex = new BuildIndex(index.getName());
             indexLog = new IndexLog(index.getName());

             // 写入索引
             buildIndex.setDocType(BuildIndex.DOC_TYPE_CREATE);
             maxId = pageAddDoc(buildIndex,index,new HashMap());

             // 关闭
             buildIndex.close();

             // 写如参数
             after(indexLog,maxId);

             message.append("创建["+index.getName()+"]模块索引完成!\r\n");
             logger.info("创建["+index.getName()+"]索引完成=======");
         }

         message.append("["+DateUtils.now()+"]:创建索引完成***********!\r\n");
         long end=System.currentTimeMillis();
         message.append("创建索引一共花费:"+(float)(end-begin)/1000+"秒");

         logger.info("创建索引完成********************************");
      } catch (Exception e) {
         message.append("创建索引异常:"+e.getMessage());
         logger.error("加载所有的索引失败", e);
      }

      return message.toString();
   }

    /**
     * 定时更新索引
     */
   public String updateIndex()
    {
        StringBuffer message = new StringBuffer();
        long begin=System.currentTimeMillis();
        message.append("["+DateUtils.now()+"]:开始更新索引***********!\r\n");

        List<Index> indexes = IndexConfigMgr.getInstance().getTableConfigs();
        try
        {
            BigDecimal maxId = null;
            BigDecimal addMaxId = null;
            HashMap<String,Object> params = null;
            for(Index index : indexes)
            {
                message.append("开始更新["+index.getName()+"]模块的索引!\r\n");

                // 读取参数
                buildIndex = new BuildIndex(index.getName());
                indexLog = new IndexLog(index.getName());
                params = before(indexLog);

                // 拿出最大ID
                maxId =(BigDecimal)params.get("ID");

                // 添加索引
                buildIndex.setDocType(BuildIndex.DOC_TYPE_ADD);
                addMaxId = pageAddDoc(buildIndex,index,params);
                // 更新最大ID
                if(null != addMaxId){
                    maxId = addMaxId;
                }

                // 更新索引
                buildIndex.setDocType(BuildIndex.DOC_TYPE_UPDATE);
                pageAddDoc(buildIndex,index,params);

                // 删除索引
                buildIndex.setDocType(BuildIndex.DOC_TYPE_DELETE);
                pageAddDoc(buildIndex,index,params);

                // 关闭
                buildIndex.close();

                // 写如参数
                after(indexLog,maxId);
                message.append("更新["+index.getName()+"]模块索引完!\r\n");
            }

            message.append("["+DateUtils.now()+"]:更新索引完成***********!\r\n");
            long end=System.currentTimeMillis();
            message.append("更新索引花费了时间:" + (float)(end-begin)/1000+"秒");
        } catch (Exception e) {
            message.append("更新索引异常:" + e.getMessage());
            logger.error("更新索引失败", e);
        }

        return message.toString();
    }

    /**
     * 读取索引文件内容
     * @param indexLog
     * @return
     */
    private HashMap<String,Object> before(IndexLog indexLog)
    {
        HashMap<String,Object> params = new HashMap<String, Object>();
        String content  = indexLog.readText();

        if(!StringUtils.isEmpty(content))
        {
           String id = content.split(",")[0];
           String now = content.split(",")[1];

           // 封装参数
           params.put("ID", new BigDecimal(id));
           params.put("UPDATE_TIME",DateUtils.toDate(now));

           logger.info("索引库中最大的ID:"+ id+",上次更新时间:"+now);
        }

        return params;
    }

    /**
     * 写入新的最大ID和时间
     * @param indexLog
     * @param maxId
     */
    private void after(IndexLog indexLog,BigDecimal maxId)
    {
        if(null == maxId){
            return;
        }

        String now =  DateUtils.toString(new Date());
        indexLog.WriteText(maxId + "," +now );

        logger.info("写入最大的ID:"+ maxId+",记录更新时间:"+now);
    }

    /**
     * 分页操作添加索引
     * @param buildIndex
     * @param index
     * @param param
     * @return
     * @throws Exception
     */
    private BigDecimal pageAddDoc(BuildIndex buildIndex,Index index,Map param) throws IOException {
        DataPage dataPage = new DataPage(this.indexService,index.getBlob(),param);
        BigDecimal maxId = null;
        int count = 0;
        switch (buildIndex.getDocType())
        {
            case BuildIndex.DOC_TYPE_CREATE:
            {
                dataPage.setBaseSql(index.getAll());
                count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
                dataPage.setTotalPage(count);
                for(int i =1;i<=count;i++){
                   buildIndex.addDoc(dataPage.queryPage(i));
                }
                maxId = dataPage.getMaxId();
                break;
            }
            case BuildIndex.DOC_TYPE_ADD:
            {
                dataPage.setBaseSql(index.getAdd());
                count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
                dataPage.setTotalPage(count);
                for(int i =1;i<=count;i++){
                    buildIndex.addDoc(dataPage.queryPage(i));
                }
                maxId = dataPage.getMaxId();
                break;
            }
            case BuildIndex.DOC_TYPE_UPDATE:
            {
                dataPage.setBaseSql(index.getUpdate());
                count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
                dataPage.setTotalPage(count);
                for(int i =1;i<=count;i++){
                    buildIndex.updateDoc(dataPage.queryPage(i));
                }
                break;
            }
            case BuildIndex.DOC_TYPE_DELETE:
            {
                dataPage.setBaseSql(index.getDelete());
                count = (int)Math.ceil((float)dataPage.getCount()/DataPage.PAGE_SIZE);
                dataPage.setTotalPage(count);
                for(int i =1;i<=count;i++){
                    buildIndex.deleteDoc(dataPage.queryPage(i));
                }
                break;
            }
        }

        return maxId;
    }

    public CreateIndexService getIndexService() {
        return indexService;
    }

    public void setIndexService(CreateIndexService indexService) {
        this.indexService = indexService;
    }
}



2>.//////////////////////////创建索引工具类BuildIndex.java:

package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-12
* Time: 下午4:06
* Lucene创建索引工具类
*/
public class BuildIndex
{
    /**
     * 操作类型
     */
    public static final int DOC_TYPE_CREATE = 0;
    public static final int DOC_TYPE_ADD = 1;
    public static final int DOC_TYPE_UPDATE = 2;
    public static final int DOC_TYPE_DELETE = 3;

    /**
     * 索引写入器
     */
    private IndexWriter indexWriter;

    /**
     * 操作类型
     */
    private int docType;

    /**
     * 构造方法创建索引写入器
     *
     * @param name
     */
    public BuildIndex(String name) throws IOException {
        // 创建IndexWriter
        String indexPath = Configuration.getLuceneIndexDir();
        indexWriter = getIndexWriter(indexPath + "/" + name);
    }

    // 索引写入器
    private IndexWriter getIndexWriter(String indexDir) throws IOException {
        // 存储索引在硬盘中
        Directory dir = DirCenter.getDir(indexDir);

        // Version操作开始变得非常常见
        // 中文分词器的引入,好像4.7.0对庖丁等第三方分词器兼容得并不好,可能也是因为apache对原生的做了一些整合的缘故
        Analyzer analyzer = AnalyzerCenter.getAnalyzer();

        // 同时引入了IndexWriterConfig对象,封装了早期版本的一大堆参数
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
        IndexWriter writer = new IndexWriter(dir, config);

        return writer;
    }

    // 创建document对象
    private Document createDoc(Map<String, Object> record) throws UnsupportedEncodingException {

        Document doc = new Document();
        Iterator<String> it = record.keySet().iterator();

        String key = null;
        String value = null;
        while (it.hasNext()) {
            key = it.next();
            value = String.valueOf(record.get(key));
            doc.add(new Field(key, value, TextField.TYPE_STORED));
        }

        return doc;
    }

    // 添加索引
    public void addDoc(List<Map<String, Object>> data) throws IOException {
        for (Map<String, Object> record : data) {
            Document doc = createDoc(record);
            indexWriter.addDocument(doc);
        }
    }

    // 更新索引
    public void updateDoc(List<Map<String, Object>> data) throws IOException {
        for (Map<String, Object> record : data) {
            Document doc = createDoc(record);

            Term term = new Term("ID", "" + record.get("ID"));

            indexWriter.updateDocument(term, doc);
        }
    }

    // 删除索引
    public void deleteDoc(List<Map<String, Object>> data) throws IOException {
        for (Map<String, Object> record : data) {
            Term term = new Term("ID", "" + record.get("ID"));

            indexWriter.deleteDocuments(term);
        }
    }

    //  关闭
    public void close() throws IOException {
        if (null != this.indexWriter) {
            this.indexWriter.close();
            this.indexWriter = null;
        }
    }

    public int getDocType() {
        return docType;
    }

    public void setDocType(int docType) {
        this.docType = docType;
    }
}


3>.////////////////////////////单例拿到解析器

package com.lhzq.ibms.lucene.util;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.util.Version;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-15
* Time: 上午10:21
* 单例模式 获取解析器
*/
public class AnalyzerCenter
{
    private static Analyzer analyzer;

    private AnalyzerCenter(){}

    public static Analyzer getAnalyzer()
    {
         if(null == analyzer)
         {
             analyzer = new StandardAnalyzer(Version.LUCENE_47);
         }

        return  analyzer;
    }
}


4>.///////////////////////////////打开一个索引目录工具类
package com.lhzq.ibms.lucene.util;

import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

import java.io.File;
import java.io.IOException;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-15
* Time: 上午10:31
* 打开一个目录
*/
public class DirCenter
{
    private DirCenter(){}

    public static Directory getDir(String path) throws IOException
    {
        // 检查参数
        if(null == path)
        {
            return null;
        }

        File indexDir = new File(path);

        // 如果文件不存在,则创建目录
        if(!indexDir.exists())
        {
            indexDir.mkdir();
        }

        // 存储索引在硬盘中
        Directory dir = FSDirectory.open(indexDir);
        return dir;
    }
}


5>./////////////////////////////一次创建索引太多,会导致内存溢出,需要分页创建
package com.lhzq.ibms.lucene.util;

import com.htsc.abms.lucene.service.CreateIndexService;
import java.math.BigDecimal;
import java.util.List;
import java.util.Map;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-6-3
* Time: 下午4:09
* 包装分页查询数据
*/
public class DataPage
{
    /**
     * 每页条数
     */
    public static final int PAGE_SIZE = 20;

    /**
     * 业务操作
     */
    private CreateIndexService indexService;

    /**
     * 原始sql
     */
    private String baseSql;

    /**
     * blob字段
     */
    private String blob;

    /**
     * 参数
     */
    private Map param;

    /**
     * 最大的id
     */
    private BigDecimal maxId;

    /**
     * 总页数
     */
    private Integer totalPage;


    /**
     * 构造方法设置查询条件
     * @param indexService
     * @param blob
     * @param param
     */
    public DataPage(CreateIndexService indexService,String blob,Map param)
    {
       this.indexService = indexService;
       this.blob = blob;
       this.param = param;
    }

    /**
     * 查询一页数据
     * @param pageNo
     * @return
     */
    public List<Map<String,Object>> queryPage(int pageNo)
    {
        String sql  = "SELECT * FROM (SELECT A.*,ROWNUM RN FROM ("+this.baseSql+") A WHERE ROWNUM <= "+pageNo * PAGE_SIZE+")"
                +" WHERE RN >= "+((pageNo-1) * PAGE_SIZE + 1);

        List<Map<String,Object>> data = indexService.queryPageData(sql,this.blob,this.param);

        if(pageNo == totalPage)
        {
           this.maxId = (BigDecimal)data.get(data.size() -1).get("ID");
        }

        return  data;
    }

    /**
     * 查询总数量
     * @return
     */
    public Integer getCount()
    {
        String  sql = "SELECT COUNT(*) CNT FROM ("+this.baseSql+") A";
        return indexService.getCount(sql,this.param);
    }

    public BigDecimal getMaxId() {
        return maxId;
    }

    public void setBaseSql(String baseSql) {
        this.baseSql = baseSql;
    }

    public void setTotalPage(Integer totalPage) {
        this.totalPage = totalPage;
    }
}


6>.///////////////////////////////记录上一次更新索引的时间和最大ID,方便更新索引
package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-13
* Time: 下午1:25
* 更新索引时,修改保存最大Id和更新时间
*/
public class IndexLog
{
    /**
     * 日志
     */
    private static Logger logger = LoggerFactory.getLogger(IndexLog.class);

    /**
     * 保存最大的Id和更新索引的时间的文件
     */
    private File logFile;

    /**
     * 设置日志文件
     * @param dir
     */
    public IndexLog(String dir) throws IOException
    {
        String indexPath = Configuration.getLuceneIndexDir()+"/" + dir;;
        File fileDir = new File(indexPath);

        if(!fileDir.exists()){
           fileDir.mkdir();
        }
        File file = new File(fileDir,dir + ".txt");

        if(!file.exists()){
           file.createNewFile();
        }

        logFile = file;
    }

    /**
     * 读取上一次更新索引的时间和最大ID
     * @return
     * @throws java.io.IOException
     */
    public String readText()
    {
        BufferedReader br = null;
        String content = null;
        try
        {
            br = new BufferedReader(new FileReader(logFile));
            content = br.readLine();
        }
        catch (IOException e)
        {
            logger.error("读取最大ID和上次更新索引时间失败", e);
        }
        finally
        {
            try {
                if (br != null) {
                    br.close();
                    br = null;
                }
            } catch (IOException e) {
                logger.error("读取最大ID和上次更新索引时,关闭IO失败", e);
            }
        }

        return content;
    }

    /**
     * 写入创建或者更新索引日志
     * @param text
     * @throws java.io.IOException
     */
    public void WriteText(String text)
    {
        BufferedWriter bw = null;
        try {
            bw = new BufferedWriter(new FileWriter(logFile));
            bw.write(text);
        } catch (IOException e) {
          logger.error("写入最大ID和上次更新索引失败", e);
        }
        finally
        {
           try {
             if(bw!=null){
               bw.close();
               bw = null;
             }
           } catch (IOException e) {
              logger.error("写入最大ID和上次更新索引时,关闭IO失败", e);
           }
        }
     }
}

***********************************************************3.手动创建索引******************************************
1>.////////////////////////页面:
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<%@ page contentType="text/html;charset=UTF-8" language="java" %>

<%@include file="/modules/comm/loadingData.jsp"%>
<%@include file="/common/path_header.jsp" %>
<%@include file="/common/jqgrid_header.jsp" %>

<!--dwr-->
<script type="text/javascript" src="<%=path%>/dwr/engine.js"></script>
<script type="text/javascript" src="<%=path%>/dwr/util.js"></script>
<script type="text/javascript" src="<%=path%>/dwr/interface/dwrIndexManage.js"></script>

<html>

<head>
    <title>索引维护</title>
    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
<style type="text/css">
    .indexInfo{
        width: 800px;
        height: 200px;
        border: 2px solid #E5E5E5;
    }
</style>
</head>
<body>

<div style="text-align: center">
        <ul  style="text-align:left;list-style-type:none;">
            <li>
                <a class="button glow button-rounded button-flat-primary button-tiny" onclick="createIndex();"> 创建索引 </a> &nbsp;&nbsp;
            </li>
            <li>
                <div class="indexInfo" id="createInfo">创建索引日志...</div>
            </li>
            <li style="margin-top: 10px;">
                <a class="button glow button-rounded button-flat-primary button-tiny" onclick="updateIndex();"> 更新索引 </a>
            </li>
            <li>
                <div class="indexInfo" id="updateInfo">更新索引日志...</div>
            </li>
        </ul>
    </div>
</body>
</html>

<script type="text/javascript">

    var interval = null;

    var time = null;

    // 创建索引
    function createIndex(){
       // 显示加载
        createDiv();

       // 创建索引
       dwrIndexManage.createIndex();

       // 延迟执行
       time = setTimeout(function(){
          interval = setInterval(showCtResult, "1000");
       },60000);
    }

    // 显示创建结果
    function showCtResult(){
        dwrIndexManage.queryCtResult(function(data){
            if(null!=data&&data!=''){
                clearTimeout(time);
                clearInterval(interval)
                setValue("createInfo",data);

                // 加载完成移出
                removeDiv();
            }
        })
    }

    // 更新索引
    function updateIndex(){
      // 显示加载
      createDiv();

      dwrIndexManage.updateIndex({
          //回调函数
          callback: function(data){
              setValue("updateInfo",data);

              // 加载完成移出
              removeDiv();
          },
          //超时,单位是毫秒,默认为20分钟,设置为0代表关闭超时
          timeout: 0,
          //超时后调用的处理函数
          errorHandler:function(message) { alert(message); }
      });
    }

    // 设置值
    function setValue(id,data){
       document.getElementById(id).innerHTML="";
       document.getElementById(id).innerHTML="<pre>"+data+"</pre>";
    }
</script>

2>.////////////////////////////////DWR操作
package com.lhzq.ibms.lucene.dwr;

import com.lhzq.ibms.lucene.job.CreateIndexJob;
import com.lhzq.ibms.lucene.util.CreateLog;
import org.springframework.beans.factory.annotation.Autowired;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-6-18
* Time: 下午5:24
* 手动索引的创建和更新
*/
public class DwrIndexManage
{
   @Autowired
   private CreateIndexJob indexJob;

    /**
     * 创建索引
     * @return
     */
   public void createIndex()
   {
      String logInfo = indexJob.loadIndex();
      CreateLog.write(logInfo);
   }

    /**
     * 查询创建索引结果
     * @return
     */
    public String queryCtResult()
    {
      return CreateLog.read().trim();
    }

    /**
     * 更新索引
     * @return
     */
    public String updateIndex()
    {
        return indexJob.updateIndex();
    }
}


3>.////////////////////////////页面返回创建操作日志:
package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.*;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-13
* Time: 下午1:25
* 记录创建和更新索引的操作日志
*/
public class CreateLog
{
    /**
     * 日志
     */
    private static Logger logger = LoggerFactory.getLogger(CreateLog.class);

    /**
     * 创建日志文件
     */
    private static String path = Configuration.getLuceneIndexDir()+"/createLog.txt";

    /**
     * 创建文件
     */
    public static void init(){
        try {
            File indexDir = new File(Configuration.getLuceneIndexDir());

            // 如果文件不存在,则创建目录
            if(!indexDir.exists())
            {
                indexDir.mkdir();
            }

            // 创建文件
            File createLogFile = new File(path);
            if(!createLogFile.exists()){
                createLogFile.createNewFile();
            }
        } catch (IOException e) {
            logger.error("创建日志文件失败",e);
        }
    }

    /**
     * 读取日志文件
     * @return
     */
    public static String read()
    {
        BufferedReader br = null;
        StringBuffer log =new StringBuffer();
        try
        {
            br = new BufferedReader(new FileReader(path));
            String line = null;
            while((line = br.readLine())!=null)
            {
                log.append(line).append("\r\n");
            }
        }
        catch (IOException e)
        {
            logger.error("创建索引读取日志异常", e);
        }
        finally
        {
            try {
                if (br != null) {
                    br.close();
                }
            } catch (IOException e) {
                logger.error("创建索引读取日志,关闭IO失败", e);
            }
        }

        return log.toString();
    }

    /**
     * 写入创建或者更新索引日志
     */
    public static void write(String log)
    {
        BufferedWriter bw = null;
        try {
            bw = new BufferedWriter(new FileWriter(path));
            bw.write(log);
        } catch (IOException e) {
           logger.error("创建索引写入日志错误", e);
        }
        finally
        {
           try {
             if(bw!=null){
               bw.close();
             }
           } catch (IOException e) {
              logger.error("创建索引写入日志,关闭IO失败", e);
           }
        }
     }
}


三、查询索引
1>.///////////////////////////////////页面
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN">
<%@ page contentType="text/html;charset=UTF-8" language="java" %>
<%@ taglib prefix="struts" uri="/struts-tags" %>

<!--引入path java中的path和js中的path-->
<%@include file="/common/path_header.jsp" %>
<%@include file="/common/jqgrid_header.jsp" %>

<html>
<head>
    <title>全文检索</title>
    <script type="text/javascript" src="${path}/script/common/rims.js"></script>
    <meta http-equiv="content-type" content="text/html; charset=UTF-8">
</head>
<body style="text-align: center">

<!--查询条件-->
  <form id="riskReferencePoint" method="post">
    <div style="text-align: center">
     <div style="overflow:auto;zoom:1;padding:10px 0px 5px 0px;">
       <ul  style="text-align:left;list-style-type:none;">
          <li style="float:left;">
            检索信息:
            <input type="text" id="KEYWORD" name="keyword" onkeydown="if(event.keyCode==13){ enterSearch();}" size="80" />
              &nbsp;&nbsp;&nbsp;
          </li>

          <li style="float:left;">
            <a href="javascript:search();" class="button glow button-rounded button-flat-primary button-tiny" id="save"> 检索 </a>
             &nbsp;&nbsp;
          </li>
       </ul>
     </div>
    </div>
   </form>

   <!--检索规章制度结果集-->
   <table id="riskRolesGrid">

   </table>
   <br>
   <!--检索风险问题结果集-->
   <table id="riskProblemGrid">

   </table>

</body>


<script type="text/javascript">

    //
    no_data();

    // 首次加载的时候,不到后台查询数据
    function no_data()
    {
        var keyword = $("#KEYWORD").val();

        if(keyword ==undefined||keyword ==null||keyword=='')
        {
            return;
        }
    }

    //  检索规章制度
    new AbmsGrid('riskRolesGrid',{
         colNames:['id','标题','内容','文档下载'],
         colModel:[
             {
                name:'ID'
               ,key:true
               ,width:55
               ,hidden:true
             }
            ,{
                name:'NAME'
               ,width:100
             }
             ,{
                 name:'BODY'
                ,width:400
                ,formatter:function(value){
                    return "<pre>"+trimToSummary(value)+"</pre>";
                }
             }
             ,{
                 width:60
                ,align:'center'
                ,formatter:function( value,options,rowData ){
                     //自定义渲染函数
                     if(rowData.BODY==undefined||rowData.BODY==null||rowData.BODY==''){
                         return '--';
                     }
                     return '<a href="javascript:uploadRiskRules('+rowData.ID+');" style="color:#fff" class="button glow button-rounded button-flat-primary button-tiny">文档下载</a>';
                 }
             }
         ],
         postParamNames:['KEYWORD'],
         _gridDatasourceClass:'com.htsc.abms.auditrisk.web.RiskRuleDatasource',
         showPagerTool:true,
         loadDataFlag:false,
         caption:"风险规章制度"

     });

    //  检索风险问题
    new AbmsGrid('riskProblemGrid',{
        colNames:['id','风险问题', '审计意见','处罚意见','详细信息'],// ,'处罚内容'
        colModel:[
            {
                name:'ID'
               ,key:true
               ,width:55
               ,hidden:true
            },
            {
                name:'TITLE'
               ,width:220
            },
            {
                name:'CONTENTS'
               ,width:250
            },
            {
                name:'PUNISH'
               ,width:200
          }
//        ,{
//             name:'CRITERION_CONTENT'
//             ,width:200
//         }
            ,{
                width:60
                ,align:'center'
                ,formatter:function( value,options,rowData ){
                    //自定义渲染函数
                    return '<a href="javascript:findRiskProblem('+rowData.ID+');" style="color:#fff" class="button glow button-rounded button-flat-primary button-tiny">详细信息</a>';
                }
            }
        ],
        postParamNames:['KEYWORD'],
        _gridDatasourceClass:'com.htsc.abms.auditrisk.web.RiskProblemDatasource',
        showPagerTool:true,
        loadDataFlag:false,
        caption:"风险问题"
    });

    // 检索
    function search(){
        $("#riskRolesGrid").trigger("reloadGrid");
        $("#riskProblemGrid").trigger("reloadGrid");
    }

    // 显示制度详情
    function uploadRiskRules(id)
    {
        var  inputs='<input type="hidden" name="id" value="'+id+'"/>';
        jQuery('<form action="/htsc.abms/riskRules/uploadRiskRules.do" method="post">'+inputs+'</form>').appendTo('body').submit().remove();
    }

    // 显示详细信息
    function findRiskProblem(id) {
        var _url="${path}/risk/viewRiskById.do?riskId="+id;
        rims.window.showWindow(_url,900,900,null);
    }

    // 点击回车键查询
    function enterSearch(){
      $("#KEYWORD").blur();
      search();
    }

    // 截取字符串
    function trimToSummary(str){
        var endLength = 30;
        if(null==str||str==''){
           return str;
        }

        if(str.length > endLength){
            return str.substring(0,endLength) +'...';
        } else{
            return str;
        }
    }
</script>
</html>

2>./////////////////////////////////////后台数据读取
package com.htsc.abms.auditrisk.web;

import com.htsc.abms.jqgrid.model.GridData;
import com.htsc.abms.jqgrid.model.GridPostParam;
import com.htsc.abms.jqgrid.util.GridDatasourceInterface;
import com.lhzq.ibms.lucene.util.Searcher;
import com.lhzq.leap.core.utils.AppUtils;
import com.lhzq.leap.core.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.stereotype.Component;

import java.util.List;
import java.util.Map;

/**
* User: 陈圣林
* Date: 14-5-27
* Time: 下午2:19
* 风险问题查选检索
*/
@Component
public class RiskProblemDatasource implements GridDatasourceInterface {
    /**
     * 日志
     */
    private static Logger logger = LoggerFactory.getLogger(RiskProblemDatasource.class);

    /**
     * 索引的字段
     */
    private static final String[] INDEX_FIELDS = {"ID", "TITLE", "CONTENTS", "PUNISH", "CRITERION_CONTENT"};

    /**
     * 根据参数查询检索信息
     *
     * @param gridPostParam
     * @return jqgrid数据对象
     */
    public GridData getGridData(GridPostParam gridPostParam) {
        // 拿到关键字参数
        String keyword = (String) gridPostParam.getParamMap().get("KEYWORD");
        if (StringUtils.isEmpty(keyword)) {
            return new GridData();
        }

        // 那到当前页
        Integer currentPage = gridPostParam.getPage();

        // 每页显示的行数
        Integer pageSize = gridPostParam.getPageSize();

        // 全文检索查询器
        Searcher searcher = null;
        List<Map<String, String>> data = null;

        // 处理关键字
        String [] keywords = AppUtils.keywords(keyword);
        try {
            searcher = new Searcher("riskProblem");
            data = searcher.search(keywords, INDEX_FIELDS);
        } catch (Exception e) {
            logger.error("全文检索异常", e);
        }

        if (AppUtils.isBlank(data)) {
            return new GridData();
        }

        // 返回当前对象
        GridData gridData = new GridData(pageSize, currentPage, data);

        return gridData;
    }
}


单个解析词,是根据单个字查询的,为了按词组查询,需要做处理

package com.lhzq.leap.core.utils;

import java.io.Serializable;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.GregorianCalendar;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import javax.servlet.http.Cookie;
import javax.servlet.http.HttpServletRequest;

import org.apache.commons.beanutils.BeanUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
* 应用帮助工具
*/
public class AppUtils{
/**
     * 加特殊字符做整体分词
     *
     * @param keyword
     * @return
     */
    public static String[] keywords(String keyword) {
        String[] keywords = keyword.trim().split("\\s+");

        for (int i = 0; i < keywords.length; i++) {
            keywords[i] = "\"" + keywords[i] + "\"";
        }

        return keywords;
    }
}

3>.////////////////////////封装的查询器
package com.lhzq.ibms.lucene.util;

import com.lhzq.ibms.commons.util.Configuration;
import com.lhzq.leap.core.config.CommonConfig;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Version;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
* Created with IntelliJ IDEA.
* User: 陈圣林
* Date: 14-5-12
* Time: 下午5:40
* 全文索引收索工具类
*/
public class Searcher
{
    /**
     * 最大获取的匹配文档数,比如100个总文档,
     * 你的query表达式匹配了50个,但是你传的maxCount为5,那就是选最优的前5个
     */
    private static final int MAX_COUNT = 1000;

    /**
     * 查询器
     */
    private IndexSearcher indexSearcher = null;

    /**
     * 创建索引查询器
     * @param name 索引目录
     * @throws java.io.IOException
     */
    public Searcher(String name) throws IOException
    {
         // 创建索引的位置
         String indexPath = Configuration.getLuceneIndexDir() + "/" + name;

         // 打开索引目录
         Directory indexDir = DirCenter.getDir(indexPath);

         // 读取器
         IndexReader reader = DirectoryReader.open(indexDir);

         // 创建索引
         indexSearcher = new IndexSearcher(reader);
    }


    /**
     * 根据关键字搜索
     * @param keywords 关键字
     * @return
     * @throws Exception
     */
    public List<Map<String,String>> search(String keywords,String []indexFields) throws Exception
    {
        // 解析器
        Analyzer analyzer  = AnalyzerCenter.getAnalyzer();

        MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,indexFields,analyzer);

        // 查询对象
        Query query = parser.parse(keywords);

        return search(query);
    }


    /**
     * 根据多个关键字搜索
     * @param keywords 关键字
     * @return
     * @throws Exception
     */
    public List<Map<String,String>> search(String [] keywords,String []indexFields) throws Exception
    {
        // 解析器
        Analyzer analyzer  = AnalyzerCenter.getAnalyzer();

        MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_47,indexFields,analyzer);

        // 多关键子查询
        BooleanQuery bq = new BooleanQuery();

        // 查询对象
        Query query = null;
        for(String keyword : keywords)
        {
            query=parser.parse(keyword);
            // 是表示And关系
            bq.add(query, BooleanClause.Occur.MUST);
        }

        return search(bq);
    }

    /**
     * 根据Query查询结果集
     * @param query
     * @return
     * @throws Exception
     */
    private List<Map<String,String>> search(Query query)throws Exception
    {
        // 查询匹配的前50个
        ScoreDoc[] hits = indexSearcher.search(query, null, MAX_COUNT).scoreDocs;

        // 封装检索的数据
        List<Map<String,String>>  data = new ArrayList<Map<String,String>>();
        Map<String,String> record = null;
        Document hitDoc = null;
        for (int i = 0; i < hits.length; i++) {
            hitDoc = indexSearcher.doc(hits[i].doc);
            record = getDocsItem(hitDoc);
            data.add(record);
        }

        return  data;
    }

    /**
     * 转换Doc对象为map数据结构
     * @param hitDoc  检索的doc对象
     * @return
     * @throws java.io.IOException
     */
    private Map<String,String> getDocsItem(Document hitDoc) throws IOException
    {
        // 文档的字段
        List<IndexableField> indexes = hitDoc.getFields();

        // 封装数据
        String name = null;
        String value = null;
        Map<String,String> record = new HashMap<String, String>();
        for(IndexableField  index : indexes)
        {
            name = index.name();
            value = index.stringValue();
            record.put(name,value);
        }

        return record;
    }
}




全文检索lucene

  • 0

    开心

    开心

  • 0

    板砖

    板砖

  • 0

    感动

    感动

  • 0

    有用

    有用

  • 0

    疑问

    疑问

  • 0

    难过

    难过

  • 0

    无聊

    无聊

  • 0

    震惊

    震惊

编辑推荐
一,概念介绍 目前系统中存在着大量的报文信息,每条报文的数据量较小,大概2000-3000字节左右,但是
1,信息检索与数据库的搜索对比 -- DB就不是为全文检索而设计的 a> 数据库的搜索不能实现我们的全
在讲全文检索之前,先说下信息检索。 信息检索通俗的讲,就是从信息集合中找出与用户相关的信息,除
全文检索的概念 1.从大量的信息中快速、准确的查找要的信息 2.收索的内容是文本信息 3.不是根据语句
全文检索引擎Lucene 1. 信息检索的概念 从信息集合中找出与用户需求相关的信息。被检索的信息包括文
基本概念: 前期准备: lucene-2.4.0 junit4.9 实例代码: package com.ln.ydc.lucene.test; import
原文: 使用Lucene.Net实现全文检索 目录 一 Lucene.Net概述 二 分词 三 索引 四 搜索 五 实践中的问
使用Lucene.Net实现全文检索 目录 一 Lucene.Net概述 二 分词 三 索引 四 搜索 五 实践中的问题 一
转下这篇相当不错的Lucene的基本原理介绍! 一、总论 根据http://lucene.apache.org/java/docs/inde
转下这篇相当不错的Lucene的基本原理介绍! 一、总论 根据http://lucene.apache.org/java/docs/inde
版权所有 IT知识库 CopyRight © 2009-2015 IT知识库 IT610.com , All Rights Reserved. 京ICP备09083238号