Lucene分页查询有两种方式:

  1. 先查询出包含当前页记录数的所有结果,然后取出最后一页。
  2. 先查询出当前页之前(不包含)的所有结果,然后接着查一页的数据。

第一种方式:比如我要查询第2页的数据,每页10条。那么先查询20条结果,然后再从查询结果中取出第11到20的数据。相对于第二种方式速度快,但是耗费内存空间。
第二种方式:同样是要查询第2页的数据,每页10条。那么先查询出第一页的十条数据,然后接着查十条数据,后面的数据不管是否满足查询条件都不再查了。此方式节约内存空间,但是速度相对慢。

因为已经有了前面的技术铺垫,直接上代码,首先是第一种分页方式的代码:(本文的代码与此系列文章中的前一篇文章中的代码相关,请先看我的上一篇文章把相关的依赖和工具类写好)

public class TestLucenePage1 {

    public static void main(String[] args) throws Exception {
        // 1. 准备中文分词器
        Analyzer analyzer = new SmartChineseAnalyzer();
        // 2. 索引
        Directory index = createIndex(analyzer);

        // 3. 查询器

        String keyword = "手机";
        System.out.println("当前关键字是:" + keyword);
        Query query = new QueryParser("name", analyzer).parse(keyword);

        // 4. 搜索
        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        int pageNow = 1;
        int pageSize = 10;

        ScoreDoc[] hits = pageSearch1(query, searcher, pageNow, pageSize);

        // 5. 显示查询结果
        showSearchResults(searcher, hits, query, analyzer);
        // 6. 关闭查询
        reader.close();

    }

    private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)
            throws IOException {
        TopDocs topDocs = searcher.search(query, pageNow * pageSize);
        System.out.println("查询到的总条数\t" + topDocs.totalHits);
        ScoreDoc[] alllScores = topDocs.scoreDocs;

        List<ScoreDoc> hitScores = new ArrayList<>();

        int start = (pageNow - 1) * pageSize;
        int end = pageSize * pageNow;
        for (int i = start; i < end; i++)
            hitScores.add(alllScores[i]);

        ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});
        return hits;
    }

    private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, Analyzer analyzer) throws Exception {
        System.out.println("找到 " + hits.length + " 个命中.");

        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));

        System.out.println("找到 " + hits.length + " 个命中.");
        System.out.println("序号\t匹配度得分\t结果");
        for (int i = 0; i < hits.length; ++i) {
            ScoreDoc scoreDoc = hits[i];
            int docId = scoreDoc.doc;
            Document d = searcher.doc(docId);
            List<IndexableField> fields = d.getFields();
            System.out.print((i + 1));
            System.out.print("\t" + scoreDoc.score);
            for (IndexableField f : fields) {

                if ("name".equals(f.name())) {
                    TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
                    String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
                    System.out.print("\t" + fieldContent);
                } else {
                    System.out.print("\t" + d.get(f.name()));
                }
            }
            System.out.println("<br>");
        }
    }

    private static Directory createIndex(Analyzer analyzer) throws IOException {
        Directory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(index, config);
        String fileName = "140k_products.txt";
        InputStream inputStream = ProductUtil.class.getClassLoader().getResourceAsStream(fileName);
//        InputStream inputStream = ProductUtil.class.getResourceAsStream("/"+fileName);
        List<Product> products = file2list(inputStream);

        int total = products.size();
        int count = 0;
        int per = 0;
        int oldPer = 0;
        for (Product p : products) {
            addDoc(writer, p);
            count++;
            per = count * 100 / total;
            if (per != oldPer) {
                oldPer = per;
                System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n", total, per);
            }

            if (per > 10)
                break;

        }
        writer.close();
        return index;
    }

    private static void addDoc(IndexWriter w, Product p) throws IOException {
        Document doc = new Document();
        doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
        doc.add(new TextField("name", p.getName(), Field.Store.YES));
        doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
        doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
        doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
        doc.add(new TextField("code", p.getCode(), Field.Store.YES));
        w.addDocument(doc);
    }
}

第二种方式的代码:

public class TestLucenePage2 {

    public static void main(String[] args) throws Exception {
        // 1. 准备中文分词器
        Analyzer analyzer = new SmartChineseAnalyzer();
        // 2. 索引
        Directory index = createIndex(analyzer);

        // 3. 查询器

        String keyword = "手机";
        System.out.println("当前关键字是:" + keyword);
        Query query = new QueryParser("name", analyzer).parse(keyword);

        // 4. 搜索
        IndexReader reader = DirectoryReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        int pageNow = 1;
        int pageSize = 10;

        ScoreDoc[] hits = pageSearch2(query, searcher, pageNow, pageSize);

        // 5. 显示查询结果
        showSearchResults(searcher, hits, query, analyzer);
        // 6. 关闭查询
        reader.close();

    }

    private static ScoreDoc[] pageSearch1(Query query, IndexSearcher searcher, int pageNow, int pageSize)
            throws IOException {
        TopDocs topDocs = searcher.search(query, pageNow * pageSize);
        System.out.println("查询到的总条数\t" + topDocs.totalHits);
        ScoreDoc[] alllScores = topDocs.scoreDocs;

        List<ScoreDoc> hitScores = new ArrayList<>();

        int start = (pageNow - 1) * pageSize;
        int end = pageSize * pageNow;
        for (int i = start; i < end; i++)
            hitScores.add(alllScores[i]);

        ScoreDoc[] hits = hitScores.toArray(new ScoreDoc[]{});
        return hits;
    }

    private static ScoreDoc[] pageSearch2(Query query, IndexSearcher searcher, int pageNow, int pageSize)
            throws IOException {

        int start = (pageNow - 1) * pageSize;
        if (0 == start) {
            TopDocs topDocs = searcher.search(query, pageNow * pageSize);
            return topDocs.scoreDocs;
        }
        // 查询数据, 结束页面之前的数据都会查询到,但是只取本页的数据
        TopDocs topDocs = searcher.search(query, start);
        //获取到上一页最后一条

        ScoreDoc preScore = topDocs.scoreDocs[start - 1];

        //查询最后一条数据的后一页数据
        topDocs = searcher.searchAfter(preScore, query, pageSize);
        return topDocs.scoreDocs;

    }

    private static void showSearchResults(IndexSearcher searcher, ScoreDoc[] hits, Query query, Analyzer analyzer) throws Exception {
        System.out.println("找到 " + hits.length + " 个命中.");

        SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<span style='color:red'>", "</span>");
        Highlighter highlighter = new Highlighter(simpleHTMLFormatter, new QueryScorer(query));

        System.out.println("找到 " + hits.length + " 个命中.");
        System.out.println("序号\t匹配度得分\t结果");
        for (int i = 0; i < hits.length; ++i) {
            ScoreDoc scoreDoc = hits[i];
            int docId = scoreDoc.doc;
            Document d = searcher.doc(docId);
            List<IndexableField> fields = d.getFields();
            System.out.print((i + 1));
            System.out.print("\t" + scoreDoc.score);
            for (IndexableField f : fields) {

                if ("name".equals(f.name())) {
                    TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(d.get(f.name())));
                    String fieldContent = highlighter.getBestFragment(tokenStream, d.get(f.name()));
                    System.out.print("\t" + fieldContent);
                } else {
                    System.out.print("\t" + d.get(f.name()));
                }
            }
            System.out.println("<br>");
        }
    }

    private static Directory createIndex(Analyzer analyzer) throws IOException {
        Directory index = new RAMDirectory();
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        IndexWriter writer = new IndexWriter(index, config);
        String fileName = "140k_products.txt";
        InputStream inputStream = ProductUtil.class.getClassLoader().getResourceAsStream(fileName);
//        InputStream inputStream = ProductUtil.class.getResourceAsStream("/"+fileName);
        List<Product> products = file2list(inputStream);
        int total = products.size();
        int count = 0;
        int per = 0;
        int oldPer = 0;
        for (Product p : products) {
            addDoc(writer, p);
            count++;
            per = count * 100 / total;
            if (per != oldPer) {
                oldPer = per;
                System.out.printf("索引中,总共要添加 %d 条记录,当前添加进度是: %d%% %n", total, per);
            }

            if (per > 10)
                break;

        }
        writer.close();
        return index;
    }

    private static void addDoc(IndexWriter w, Product p) throws IOException {
        Document doc = new Document();
        doc.add(new TextField("id", String.valueOf(p.getId()), Field.Store.YES));
        doc.add(new TextField("name", p.getName(), Field.Store.YES));
        doc.add(new TextField("category", p.getCategory(), Field.Store.YES));
        doc.add(new TextField("price", String.valueOf(p.getPrice()), Field.Store.YES));
        doc.add(new TextField("place", p.getPlace(), Field.Store.YES));
        doc.add(new TextField("code", p.getCode(), Field.Store.YES));
        w.addDocument(doc);
    }
}

注意这两行代码:

            if (per > 10)
                break;

这两行代码的作用是生成部分索引之后就跳过生成索引文件,因为要生成完整的索引需要时间太久了。

Q.E.D.


擅长前端的Java程序员