本文整理汇总了Java中org.apache.lucene.codecs.TermStats类的典型用法代码示例。如果您正苦于以下问题:Java TermStats类的具体用法?Java TermStats怎么用?Java TermStats使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TermStats类属于org.apache.lucene.codecs包,在下文中一共展示了TermStats类的21个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: finishTerm
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// write term meta data into fst
final BlockTermState state = postingsWriter.newTermState();
final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
meta.longs = new long[longsSize];
meta.bytes = null;
meta.docFreq = state.docFreq = stats.docFreq;
meta.totalTermFreq = state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true);
final int bytesSize = (int)metaWriter.getFilePointer();
if (bytesSize > 0) {
meta.bytes = new byte[bytesSize];
metaWriter.writeTo(meta.bytes, 0);
metaWriter.reset();
}
builder.add(Util.toIntsRef(text, scratchTerm), meta);
numTerms++;
}
开发者ID:europeana,
项目名称:search,
代码行数:21,
代码来源:FSTTermsWriter.java
示例2: finishTerm
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
/** Writes one term's worth of postings. */
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq != 0;
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY || stats.totalTermFreq >= stats.docFreq: "postingsWriter=" + postingsWriter;
pushTerm(text);
BlockTermState state = postingsWriter.newTermState();
state.docFreq = stats.docFreq;
state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), state);
pending.add(term);
numTerms++;
if (firstPendingTerm == null) {
firstPendingTerm = term;
}
lastPendingTerm = term;
}
开发者ID:europeana,
项目名称:search,
代码行数:23,
代码来源:OrdsBlockTreeTermsWriter.java
示例3: add
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
if (text.length == 0) {
// We already added empty string in ctor
assert termsFilePointer == startTermsFilePointer;
return;
}
final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm.get(), text);
try {
fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
} finally {
text.length = lengthSave;
}
lastTerm.copyBytes(text);
}
开发者ID:europeana,
项目名称:search,
代码行数:17,
代码来源:VariableGapTermsIndexWriter.java
示例4: finishTerm
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq > 0;
//if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);
if (((IDVersionPostingsWriter) postingsWriter).lastDocID != -1) {
pushTerm(text);
BlockTermState state = postingsWriter.newTermState();
state.docFreq = stats.docFreq;
state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), state);
pending.add(term);
numTerms++;
if (firstPendingTerm == null) {
firstPendingTerm = term;
}
lastPendingTerm = term;
}
}
开发者ID:europeana,
项目名称:search,
代码行数:22,
代码来源:VersionBlockTreeTermsWriter.java
示例5: finishTerm
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert state == TermsConsumerState.START;
state = TermsConsumerState.INITIAL;
assert text.equals(lastTerm);
assert stats.docFreq > 0; // otherwise, this method should not be called.
assert stats.docFreq == lastPostingsConsumer.docFreq;
sumDocFreq += stats.docFreq;
if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
assert stats.totalTermFreq == -1;
} else {
assert stats.totalTermFreq == lastPostingsConsumer.totalTermFreq;
sumTotalTermFreq += stats.totalTermFreq;
}
in.finishTerm(text, stats);
}
开发者ID:europeana,
项目名称:search,
代码行数:17,
代码来源:AssertingPostingsFormat.java
示例6: write
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
public long write(final TermsConsumer termsConsumer) throws Throwable {
final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
long totTF = 0;
for(int i=0;i<docs.length;i++) {
final int termDocFreq;
if (field.omitTF) {
termDocFreq = -1;
} else {
termDocFreq = positions[i].length;
}
postingsConsumer.startDoc(docs[i], termDocFreq);
if (!field.omitTF) {
totTF += positions[i].length;
for(int j=0;j<positions[i].length;j++) {
final PositionData pos = positions[i][j];
postingsConsumer.addPosition(pos.pos, pos.payload, -1, -1);
}
}
postingsConsumer.finishDoc();
}
termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
return totTF;
}
开发者ID:europeana,
项目名称:search,
代码行数:24,
代码来源:TestCodecs.java
示例7: add
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
if (text.length == 0) {
// We already added empty string in ctor
assert termsFilePointer == startTermsFilePointer;
return;
}
final int lengthSave = text.length;
text.length = indexedTermPrefixLength(lastTerm, text);
try {
fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
} finally {
text.length = lengthSave;
}
lastTerm.copyBytes(text);
}
开发者ID:pkarmstr,
项目名称:NYBC,
代码行数:17,
代码来源:VariableGapTermsIndexWriter.java
示例8: finishTerm
点赞 3
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {
// if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
assert stats.docFreq > 0;
// TODO: wasteful we are counting this (counting # docs
// for this term) in two places?
assert stats.docFreq == df;
final long skipOffset;
if (df >= skipMinimum) {
skipOffset = skipListWriter.writeSkip(freqOut)-freqStart;
} else {
skipOffset = -1;
}
pendingTerms.add(new PendingTerm(freqStart, proxStart, skipOffset));
lastDocID = 0;
df = 0;
}
开发者ID:pkarmstr,
项目名称:NYBC,
代码行数:24,
代码来源:Lucene40PostingsWriter.java
示例9: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
/*
if (DEBUG) {
int[] tmp = new int[lastTerm.length];
System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
}
*/
assert stats.docFreq > 0;
//if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY || stats.totalTermFreq >= stats.docFreq: "postingsWriter=" + postingsWriter;
BlockTermState state = postingsWriter.newTermState();
state.docFreq = stats.docFreq;
state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
sumDocFreq += state.docFreq;
sumTotalTermFreq += state.totalTermFreq;
pushTerm(text);
PendingTerm term = new PendingTerm(text, state);
pending.add(term);
numTerms++;
if (firstPendingTerm == null) {
firstPendingTerm = term;
}
lastPendingTerm = term;
}
开发者ID:lamsfoundation,
项目名称:lams,
代码行数:33,
代码来源:BlockTreeTermsWriter.java
示例10: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert postingsWriter.docCount == stats.docFreq;
assert buffer2.getFilePointer() == 0;
buffer2.writeVInt(stats.docFreq);
if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
}
int pos = (int) buffer2.getFilePointer();
buffer2.writeTo(finalBuffer, 0);
buffer2.reset();
final int totalBytes = pos + (int) postingsWriter.buffer.getFilePointer();
if (totalBytes > finalBuffer.length) {
finalBuffer = ArrayUtil.grow(finalBuffer, totalBytes);
}
postingsWriter.buffer.writeTo(finalBuffer, pos);
postingsWriter.buffer.reset();
spare.bytes = finalBuffer;
spare.length = totalBytes;
//System.out.println(" finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
//for(int i=0;i<totalBytes;i++) {
// System.out.println(" " + Integer.toHexString(finalBuffer[i]&0xFF));
//}
builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
termCount++;
}
开发者ID:europeana,
项目名称:search,
代码行数:34,
代码来源:MemoryPostingsFormat.java
示例11: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) {
bufferSkip();
}
// write term meta data into fst
final long longs[] = new long[longsSize];
final long delta = stats.totalTermFreq - stats.docFreq;
if (stats.totalTermFreq > 0) {
if (delta == 0) {
statsOut.writeVInt(stats.docFreq<<1|1);
} else {
statsOut.writeVInt(stats.docFreq<<1|0);
statsOut.writeVLong(stats.totalTermFreq-stats.docFreq);
}
} else {
statsOut.writeVInt(stats.docFreq);
}
BlockTermState state = postingsWriter.newTermState();
state.docFreq = stats.docFreq;
state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(state);
postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, true);
for (int i = 0; i < longsSize; i++) {
metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
lastLongs[i] = longs[i];
}
metaLongsOut.writeVLong(metaBytesOut.getFilePointer() - lastMetaBytesFP);
builder.add(Util.toIntsRef(text, scratchTerm), numTerms);
numTerms++;
lastMetaBytesFP = metaBytesOut.getFilePointer();
}
开发者ID:europeana,
项目名称:search,
代码行数:35,
代码来源:FSTOrdTermsWriter.java
示例12: isIndexTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public boolean isIndexTerm(BytesRef term, TermStats stats) {
if (count >= interval) {
count = 1;
return true;
} else {
count++;
return false;
}
}
开发者ID:europeana,
项目名称:search,
代码行数:11,
代码来源:VariableGapTermsIndexWriter.java
示例13: checkIndexTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
//System.out.println("VGW: index term=" + text.utf8ToString());
// NOTE: we must force the first term per field to be
// indexed, in case policy doesn't:
if (policy.isIndexTerm(text, stats) || first) {
first = false;
//System.out.println(" YES");
return true;
} else {
lastTerm.copyBytes(text);
return false;
}
}
开发者ID:europeana,
项目名称:search,
代码行数:15,
代码来源:VariableGapTermsIndexWriter.java
示例14: checkIndexTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
// First term is first indexed term:
//System.out.println("FGW: checkIndexTerm text=" + text.utf8ToString());
if (0 == (numTerms++ % termIndexInterval)) {
return true;
} else {
if (0 == numTerms % termIndexInterval) {
// save last term just before next index term so we
// can compute wasted suffix
lastTerm.copyBytes(text);
}
return false;
}
}
开发者ID:europeana,
项目名称:search,
代码行数:16,
代码来源:FixedGapTermsIndexWriter.java
示例15: add
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
final int indexedTermLength = indexedTermPrefixLength(lastTerm.get(), text);
//System.out.println("FGW: add text=" + text.utf8ToString() + " " + text + " fp=" + termsFilePointer);
// write only the min prefix that shows the diff
// against prior term
out.writeBytes(text.bytes, text.offset, indexedTermLength);
if (termLengths.length == numIndexTerms) {
termLengths = ArrayUtil.grow(termLengths);
}
if (termsPointerDeltas.length == numIndexTerms) {
termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
}
// save delta terms pointer
termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer);
lastTermsPointer = termsFilePointer;
// save term length (in bytes)
assert indexedTermLength <= Short.MAX_VALUE;
termLengths[numIndexTerms] = (short) indexedTermLength;
totTermLength += indexedTermLength;
lastTerm.copyBytes(text);
numIndexTerms++;
}
开发者ID:europeana,
项目名称:search,
代码行数:29,
代码来源:FixedGapTermsIndexWriter.java
示例16: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
assert stats.docFreq > 0;
//System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);
final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);
if (isIndexTerm) {
if (pendingCount > 0) {
// Instead of writing each term, live, we gather terms
// in RAM in a pending buffer, and then write the
// entire block in between index terms:
flushBlock();
}
fieldIndexWriter.add(text, stats, out.getFilePointer());
//System.out.println(" index term!");
}
if (pendingTerms.length == pendingCount) {
pendingTerms = Arrays.copyOf(pendingTerms, ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
for(int i=pendingCount;i<pendingTerms.length;i++) {
pendingTerms[i] = new TermEntry();
}
}
final TermEntry te = pendingTerms[pendingCount];
te.term.copyBytes(text);
te.state = postingsWriter.newTermState();
te.state.docFreq = stats.docFreq;
te.state.totalTermFreq = stats.totalTermFreq;
postingsWriter.finishTerm(te.state);
pendingCount++;
numTerms++;
}
开发者ID:europeana,
项目名称:search,
代码行数:36,
代码来源:BlockTermsWriter.java
示例17: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
// Record this term in our BloomFilter
if (stats.docFreq > 0) {
bloomFilter.addValue(text);
}
delegateTermsConsumer.finishTerm(text, stats);
}
开发者ID:europeana,
项目名称:search,
代码行数:10,
代码来源:BloomFilteringPostingsFormat.java
示例18: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) {
assert stats.docFreq > 0;
assert stats.docFreq == current.docs.size();
current.totalTermFreq = stats.totalTermFreq;
field.termToDocs.put(current.term, current);
}
开发者ID:europeana,
项目名称:search,
代码行数:8,
代码来源:RAMOnlyPostingsFormat.java
示例19: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
if (stats.docFreq > 0) {
long skipPointer = skipListWriter.writeSkip(freqOut);
termInfo.docFreq = stats.docFreq;
termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer);
//System.out.println(" w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number);
termsOut.add(fieldInfo.number,
text,
termInfo);
}
}
开发者ID:europeana,
项目名称:search,
代码行数:13,
代码来源:PreFlexRWFieldsWriter.java
示例20: finishTerm
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
if (random.nextInt(10000) == 0) {
throw new IOException("Fake IOException from TermsConsumer.finishTerm()");
}
delegate.finishTerm(text, stats);
}
开发者ID:europeana,
项目名称:search,
代码行数:8,
代码来源:CrankyPostingsFormat.java
示例21: merge
点赞 2
import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public TermStats merge(MergeState mergeState, IndexOptions indexOptions, DocsEnum postings, FixedBitSet visitedDocs) throws IOException {
if (random.nextInt(10000) == 0) {
throw new IOException("Fake IOException from PostingsConsumer.merge()");
}
return super.merge(mergeState, indexOptions, postings, visitedDocs);
}
开发者ID:europeana,
项目名称:search,
代码行数:8,
代码来源:CrankyPostingsFormat.java