Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,9 @@

import io.kafbat.ui.model.InternalTopic;
import io.kafbat.ui.model.InternalTopicConfig;
import io.kafbat.ui.service.index.lucene.IndexedTextField;
import io.kafbat.ui.service.index.lucene.NameDistanceScoringFunction;
import io.kafbat.ui.service.index.lucene.ShortWordAnalyzer;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
Expand All @@ -18,11 +21,11 @@
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
Expand Down Expand Up @@ -59,11 +62,14 @@ public LuceneTopicsIndex(List<InternalTopic> topics) throws IOException {

private Directory build(List<InternalTopic> topics) {
Directory directory = new ByteBuffersDirectory();

try (IndexWriter directoryWriter = new IndexWriter(directory, new IndexWriterConfig(this.analyzer))) {
for (InternalTopic topic : topics) {
Document doc = new Document();


doc.add(new StringField(FIELD_NAME_RAW, topic.getName(), Field.Store.YES));
doc.add(new TextField(FIELD_NAME, topic.getName(), Field.Store.NO));
doc.add(new IndexedTextField(FIELD_NAME, topic.getName(), Field.Store.YES));
doc.add(new IntPoint(FIELD_PARTITIONS, topic.getPartitionCount()));
doc.add(new IntPoint(FIELD_REPLICATION, topic.getReplicationFactor()));
doc.add(new LongPoint(FIELD_SIZE, topic.getSegmentSize()));
Expand Down Expand Up @@ -117,9 +123,9 @@ public List<InternalTopic> find(String search, Boolean showInternal,
closeLock.readLock().lock();
try {

QueryParser queryParser = new PrefixQueryParser(FIELD_NAME, this.analyzer);
PrefixQueryParser queryParser = new PrefixQueryParser(FIELD_NAME, this.analyzer);
queryParser.setDefaultOperator(QueryParser.Operator.AND);
Query nameQuery = queryParser.parse(search);;
Query nameQuery = queryParser.parse(search);

Query internalFilter = new TermQuery(new Term(FIELD_INTERNAL, "true"));

Expand All @@ -129,6 +135,12 @@ public List<InternalTopic> find(String search, Boolean showInternal,
queryBuilder.add(internalFilter, BooleanClause.Occur.MUST_NOT);
}

BooleanQuery combined = queryBuilder.build();
Query wrapped = new FunctionScoreQuery(
combined,
new NameDistanceScoringFunction(FIELD_NAME, queryParser.getPrefixes())
);

List<SortField> sortFields = new ArrayList<>();
sortFields.add(SortField.FIELD_SCORE);
if (!sortField.equals(FIELD_NAME)) {
Expand All @@ -137,7 +149,7 @@ public List<InternalTopic> find(String search, Boolean showInternal,

Sort sort = new Sort(sortFields.toArray(new SortField[0]));

TopDocs result = this.indexSearcher.search(queryBuilder.build(), count != null ? count : this.maxSize, sort);
TopDocs result = this.indexSearcher.search(wrapped, count != null ? count : this.maxSize, sort);

List<String> topics = new ArrayList<>();
for (ScoreDoc scoreDoc : result.scoreDocs) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
import static org.apache.lucene.search.BoostAttribute.DEFAULT_BOOST;

import io.kafbat.ui.service.index.TopicsIndex.FieldType;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.IntPoint;
Expand All @@ -14,10 +16,11 @@
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;

public class PrefixQueryParser extends QueryParser {

private final List<String> prefixes = new ArrayList<>();

public PrefixQueryParser(String field, Analyzer analyzer) {
super(field, analyzer);
}
Expand Down Expand Up @@ -60,7 +63,13 @@ protected Query newTermQuery(Term term, float boost) {
.orElse(FieldType.STRING);

Query query = switch (fieldType) {
case STRING -> new PrefixQuery(term);
case STRING -> {
if (Objects.equals(term.field(), field)) {
prefixes.add(term.text());
}

yield new PrefixQuery(term);
}
case INT -> IntPoint.newExactQuery(term.field(), Integer.parseInt(term.text()));
case LONG -> LongPoint.newExactQuery(term.field(), Long.parseLong(term.text()));
case BOOLEAN -> new TermQuery(term);
Expand All @@ -72,4 +81,7 @@ protected Query newTermQuery(Term term, float boost) {
return new BoostQuery(query, boost);
}

public List<String> getPrefixes() {
return prefixes;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
package io.kafbat.ui.service.index.lucene;


import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredValue;
import org.apache.lucene.index.IndexOptions;

public class IndexedTextField extends Field {

/** Indexed, tokenized, not stored. */
public static final FieldType TYPE_NOT_STORED = new FieldType();

/** Indexed, tokenized, stored. */
public static final FieldType TYPE_STORED = new FieldType();

static {
TYPE_NOT_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
TYPE_NOT_STORED.setTokenized(true);
TYPE_NOT_STORED.freeze();

TYPE_STORED.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
TYPE_STORED.setTokenized(true);
TYPE_STORED.setStored(true);
TYPE_STORED.setStoreTermVectors(true);
TYPE_STORED.setStoreTermVectorOffsets(true);
TYPE_STORED.setStoreTermVectorPositions(true);
TYPE_STORED.freeze();
}

private final StoredValue storedValue;

/**
* Creates a new un-stored TextField with Reader value.
*
* @param name field name
* @param reader reader value
* @throws IllegalArgumentException if the field name is null
* @throws NullPointerException if the reader is null
*/
public IndexedTextField(String name, Reader reader) {
super(name, reader, TYPE_NOT_STORED);
storedValue = null;
}

/**
* Creates a new TextField with String value.
*
* @param name field name
* @param value string value
* @param store Store.YES if the content should also be stored
* @throws IllegalArgumentException if the field name or value is null.
*/
public IndexedTextField(String name, String value, Store store) {
super(name, value, store == Store.YES ? TYPE_STORED : TYPE_NOT_STORED);
if (store == Store.YES) {
storedValue = new StoredValue(value);
} else {
storedValue = null;
}
}

/**
* Creates a new un-stored TextField with TokenStream value.
*
* @param name field name
* @param stream TokenStream value
* @throws IllegalArgumentException if the field name is null.
* @throws NullPointerException if the tokenStream is null
*/
public IndexedTextField(String name, TokenStream stream) {
super(name, stream, TYPE_NOT_STORED);
storedValue = null;
}

@Override
public void setStringValue(String value) {
super.setStringValue(value);
if (storedValue != null) {
storedValue.setStringValue(value);
}
}

@Override
public StoredValue storedValue() {
return storedValue;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
package io.kafbat.ui.service.index.lucene;

import java.io.IOException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.BytesRef;

public class NameDistanceScoringFunction extends DoubleValuesSource {
private final String fieldName;
private final List<String> prefixes;

public NameDistanceScoringFunction(String fieldName, List<String> prefixes) {
this.fieldName = fieldName;
this.prefixes = prefixes;
}

@Override
public DoubleValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {

Terms terms = ctx.reader().terms(fieldName);
Map<Integer, Integer> positions = new HashMap<>();

for (String prefix : prefixes) {
TermsEnum iterator = terms.iterator();
TermsEnum.SeekStatus seekStatus = iterator.seekCeil(new BytesRef(prefix));
if (!seekStatus.equals(TermsEnum.SeekStatus.END)) {

PostingsEnum postings = iterator.postings(
null,
PostingsEnum.OFFSETS | PostingsEnum.FREQS | PostingsEnum.POSITIONS
);

while (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
int doc = postings.docID();
int smallest = Integer.MAX_VALUE;

for (int i = 0; i < postings.freq(); i++) {
postings.nextPosition();
smallest = Math.min(smallest, postings.startOffset());
}
int finalSmall = smallest;
int s = positions.computeIfAbsent(doc, d -> finalSmall);
if (finalSmall < s) {
positions.put(doc, finalSmall);
}
}
}
}

return new DoubleValues() {
int doc = -1;

@Override
public double doubleValue() {
Integer pos = positions.get(doc);
if (pos == null) {
return 1.0;
}
return 1.0 / (1.0 + pos);
}

@Override
public boolean advanceExact(int target) {
doc = target;
return true;
}
};
}

@Override
public boolean needsScores() {
return false;
}

@Override
public DoubleValuesSource rewrite(IndexSearcher searcher) {
return this;
}

@Override
public int hashCode() {
return 0;
}

@Override
public boolean equals(Object obj) {
return false;
}

@Override
public String toString() {
return "NameDistanceScoringFunction";
}

@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
}
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
package io.kafbat.ui.service.index;
package io.kafbat.ui.service.index.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
import org.apache.lucene.analysis.standard.StandardTokenizer;

class ShortWordAnalyzer extends Analyzer {
public class ShortWordAnalyzer extends Analyzer {

public ShortWordAnalyzer() {}

Expand All @@ -17,12 +18,13 @@ protected TokenStreamComponents createComponents(String fieldName) {

TokenStream tokenStream = new WordDelimiterGraphFilter(
tokenizer,
true,
WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE,
WordDelimiterGraphFilter.GENERATE_WORD_PARTS
| WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE
| WordDelimiterGraphFilter.PRESERVE_ORIGINAL
| WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS
| WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE,

null
);

Expand Down
Loading
Loading