SpringBoot集成Lucene
需要编写的类大概如下
首先是pom.xml
<!-- Lucene --> <!--核心包--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-core</artifactId> <version>7.6.0</version> </dependency> <!--对分词索引查询解析--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-queryparser</artifactId> <version>7.6.0</version> </dependency> <!--一般分词器,适用于英文分词--> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-common</artifactId> <version>7.6.0</version> </dependency> <!--检索关键字高亮显示 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-highlighter</artifactId> <version>7.6.0</version> </dependency> <!-- smartcn中文分词器 --> <dependency> <groupId>org.apache.lucene</groupId> <artifactId>lucene-analyzers-smartcn</artifactId> <version>7.6.0</version> </dependency>
application.properties,配置索引文件的存放位置
#Lucene索引位置 Lucene.Index.Path=D:/java_test/Lucene
LuceneConfig.java,Lucene的Bean
import java.io.File; import java.io.IOException; import java.nio.file.Path; import java.nio.file.Paths; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.search.ControlledRealTimeReopenThread; import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Bean; import org.springframework.context.annotation.Configuration; @Configuration public class LuceneConfig { /** * lucene索引,存放位置 */ @Value("${Lucene.Index.Path}") private String LUCENE_INDEX_PATH/* = "D:\\java_test\\Lucene" */; /** * 创建一个 Analyzer 实例 * * @return */ @Bean public Analyzer analyzer() { return new SmartChineseAnalyzer(); } /** * 索引位置 * * @return * @throws IOException */ @Bean public Directory directory() throws IOException { Path path = Paths.get(LUCENE_INDEX_PATH); File file = path.toFile(); if(!file.exists()) { //如果文件夹不存在,则创建 file.mkdirs(); } return FSDirectory.open(path); } /** * 创建indexWriter * * @param directory * @param analyzer * @return * @throws IOException */ @Bean public IndexWriter indexWriter(Directory directory, Analyzer analyzer) throws IOException { IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig); // 清空索引 indexWriter.deleteAll(); indexWriter.commit(); return indexWriter; } /** * SearcherManager管理 * * @param directory * @return * @throws IOException */ @Bean public SearcherManager searcherManager(Directory directory, IndexWriter indexWriter) throws IOException { SearcherManager searcherManager = new SearcherManager(indexWriter, false, false, new SearcherFactory()); ControlledRealTimeReopenThread cRTReopenThead = new ControlledRealTimeReopenThread(indexWriter, searcherManager, 5.0, 0.025); cRTReopenThead.setDaemon(true); //线程名称 cRTReopenThead.setName("更新IndexReader线程"); // 开启线程 cRTReopenThead.start(); return searcherManager; } }
LuceneDaoImpl.java,自己的逻辑,初始化索引、更新索引、删除索引
import java.io.IOException; import java.util.ArrayList; import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Component; import com.daimeng.framework.lucene.LuceneDao; import com.daimeng.web.article.entity.ArticleInfo; @Component public class LuceneDaoImpl implements LuceneDao{ @Autowired private IndexWriter indexWriter; /** * * @功能描述: 创建索引 * StringField-字符串:构建一个字符串的Field,但不会进行分词,将整串字符串存入索引中,适合存储固定(id,身份证号,订单号等) * FloatPoint、LongPoint、DoublePoint-数值型:这个Field用来构建一个float数字型Field,进行分词和索引,比如(价格) * StoredField-重载方法、支持多种类型:这个Field用来构建不同类型Field,不分析,不索引,但要Field存储在文档中 * TextField-字符串或者流:一般此对字段需要进行检索查询 * @方法名称: createArticleIndex * @路径 com.daimeng.framework.lucene * @作者 daimeng.fun * @E-Mail sephy9527@qq.com * @创建时间 2020年10月4日 下午8:04:43 * @version V1.0 * @param list * @throws IOException * @return void */ @Override public void createArticleIndex(ArrayList<ArticleInfo> list) throws IOException { List<Document> docs = new ArrayList<Document>(); for (ArticleInfo obj : list) { Document doc = new Document(); doc.add(new StringField("id", obj.getId()+"", Field.Store.YES)); // doc.add(new TextField("title", obj.getTitle(), Field.Store.YES)); // doc.add(new TextField("shortContext", obj.getShortContext(), Field.Store.YES)); // doc.add(new TextField("context", obj.getContext(), Field.Store.YES)); doc.add(new TextField("text", obj.getTitle() + obj.getShortContext() + obj.getContext(), Field.Store.YES)); // doc.add(new TextField("articleType", obj.getArticleType(), Field.Store.YES)); // doc.add(new TextField("realname", obj.getCreateUser().getRealname(), Field.Store.YES)); // doc.add(new TextField("createTm", DateUtils.getDateStrFormat(obj.getCreateTm(), DateUtils.YYYY_MM_DDHH_MM_SS), Field.Store.YES)); // // 保存price, // float price = p.getPrice(); // // 建立倒排索引 // doc.add(new FloatPoint("price", price)); // // 正排索引用于排序、聚合 // doc.add(new FloatDocValuesField("price", price)); // // 存储到索引库 // doc.add(new StoredField("price", price)); // doc.add(new TextField("place", p.getPlace(), Field.Store.YES)); // doc.add(new StringField("code", p.getCode(), Field.Store.YES)); docs.add(doc); } //将文档放在内存中,并没有放入索引库 indexWriter.addDocuments(docs); //提交索引到索引库 indexWriter.commit(); } @Override public void reflushArticleInfoIndex(ArticleInfo obj) throws IOException{ indexWriter.deleteDocuments(new Term("id",obj.getId()+"")); Document doc = new Document(); doc.add(new StringField("id", obj.getId()+"", Field.Store.YES)); doc.add(new TextField("text", obj.getTitle() + obj.getShortContext() + obj.getContext(), Field.Store.YES)); indexWriter.addDocument(doc); indexWriter.commit(); } @Override public void deleteArticleInfoIndex(ArticleInfo obj) throws IOException { indexWriter.deleteDocuments(new Term("id",obj.getId()+"")); } }
LuceneServiceImpl.java,自己的逻辑,主要是搜索逻辑
import java.util.ArrayList; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanQuery.Builder; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.stereotype.Service; import com.daimeng.framework.lucene.LuceneService; import com.daimeng.util.Constants; import com.daimeng.web.article.entity.ArticleInfo; import com.daimeng.web.article.service.ArticleService; import com.github.pagehelper.PageInfo; @Service public class LuceneServiceImpl implements LuceneService{ @Autowired private Analyzer analyzer; @Autowired private SearcherManager searcherManager; @Autowired private ArticleService articleService; @Override public PageInfo<ArticleInfo> searchArticle(String keywords, int page) throws Exception { //新searcherManager中的searcher,获取到最新的IndexSearcher searcherManager.maybeRefresh(); IndexSearcher indexSearcher = searcherManager.acquire(); // Map<String, String> queryParam = new HashMap<String, String>(); // for(String str : keywords) { // queryParam.put("title", str); // queryParam.put("shortContext", str); // queryParam.put("context", str); // } Builder builder = new BooleanQuery.Builder(); Sort sort = new Sort(); // 排序规则 //com.infinova.yimall.entity.Sort sort1 = pageQuery.getSort(); // if (sort1 != null && sort1.getOrder() != null) { // if ("ASC".equals((sort1.getOrder()).toUpperCase())) { // sort.setSort(new SortField(sort1.getField(), SortField.Type.FLOAT, false)); // } else if ("DESC".equals((sort1.getOrder()).toUpperCase())) { // sort.setSort(new SortField(sort1.getField(), SortField.Type.FLOAT, true)); // } // } if (keywords != null) { // 输入空格,不进行模糊查询 if (!"".equals(keywords.replaceAll(" ", ""))) { // 模糊匹配,匹配词 builder.add(new QueryParser("text", analyzer).parse(keywords), Occur.MUST); // builder.add(new QueryParser("title", analyzer).parse(keywords), Occur.MUST); // builder.add(new QueryParser("shortContext", analyzer).parse(keywords), Occur.MUST); // builder.add(new QueryParser("context", analyzer).parse(keywords), Occur.MUST); // builder.add(new QueryParser("realname", analyzer).parse(keywords), Occur.MUST); // 精确查询 //builder.add(new TermQuery(new Term("id", keywords)), Occur.MUST); } } // if (queryParam.get("lowerPrice") != null && queryParam.get("upperPrice") != null) { // // 价格范围查询 // builder.add(FloatPoint.newRangeQuery("price", Float.parseFloat(queryParam.get("lowerPrice")), // Float.parseFloat(queryParam.get("upperPrice"))), Occur.MUST); // } TopDocs topDocs = indexSearcher.search(builder.build(), page * Constants.PAGE_SIZE_10, sort); ArrayList<ArticleInfo> list = new ArrayList<ArticleInfo>(); ScoreDoc[] hits = topDocs.scoreDocs; for (int i = (page - 1) * Constants.PAGE_SIZE_10; i < hits.length; i++) { Document doc = indexSearcher.doc(hits[i].doc); System.out.println(doc.toString()); ArticleInfo info = articleService.findArticleInfoById(Integer.parseInt(doc.get("id"))); // info.setId(Integer.parseInt(doc.get("id"))); // info.setTitle(doc.get("title")); // info.setShortContext(doc.get("shortContext")); // info.setContext(doc.get("context")); list.add(info); } PageInfo<ArticleInfo> pi = new PageInfo<ArticleInfo>(list); pi.setPageNum(page); pi.setSize(list.size()); pi.setPageSize(Constants.PAGE_SIZE_10); pi.setTotal(topDocs.totalHits); Long pages = 0L; if(topDocs.totalHits % Constants.PAGE_SIZE_10 > 0) { pages = (Long) (topDocs.totalHits / Constants.PAGE_SIZE_10) + 1; }else { pages = (Long) (topDocs.totalHits / Constants.PAGE_SIZE_10); } pi.setPages(Integer.valueOf(pages.toString())); return pi; } }
InitArticleIndexOrderRunner.java,启动后运行的OrderRunner,为了启动后初始化索引
import org.springframework.beans.factory.annotation.Autowired; import org.springframework.boot.CommandLineRunner; import org.springframework.core.annotation.Order; import org.springframework.stereotype.Component; import com.daimeng.util.Constants; import com.daimeng.web.article.service.ArticleService; /** * * @功能描述: 项目启动后立即执行 * @名称: InitArticleIndexOrderRunner.java * @路径 com.daimeng.framework.lucene * @作者 daimeng.fun * @E-Mail sephy9527@qq.com * @创建时间 2020年10月4日 下午7:51:04 * @version V1.0 */ @Component @Order(1) public class InitArticleIndexOrderRunner implements CommandLineRunner { @Autowired private ArticleService articleService; @Override public void run(String... args) throws Exception { Constants.println("The OrderRunner1 start to initialize ..."); articleService.synAllArticleInfo(); } }
同步方法如下,查询出需要存入索引的内容列表,然后调用LuceneDao的初始化方法
@Override public void synAllArticleInfo() { try { ArrayList<ArticleInfo> all = articleRepository.findByArticleTypeAndStatusCdOrderByIdDesc("01",1); luceneDao.createArticleIndex(all); } catch (Exception e) { e.printStackTrace(); } }
启动项目后,会初始化索引文件
至此,旧成功了,然后开发页面去调用LuceneService的searchArticle方法即可