• 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏吧

Java TermStats类的典型用法和代码示例

java 2次浏览

本文整理汇总了Java中org.apache.lucene.codecs.TermStats的典型用法代码示例。如果您正苦于以下问题:Java TermStats类的具体用法?Java TermStats怎么用?Java TermStats使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

TermStats类属于org.apache.lucene.codecs包,在下文中一共展示了TermStats类的21个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: finishTerm

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  // write term meta data into fst
  final BlockTermState state = postingsWriter.newTermState();
  final FSTTermOutputs.TermData meta = new FSTTermOutputs.TermData();
  meta.longs = new long[longsSize];
  meta.bytes = null;
  meta.docFreq = state.docFreq = stats.docFreq;
  meta.totalTermFreq = state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(state);
  postingsWriter.encodeTerm(meta.longs, metaWriter, fieldInfo, state, true);
  final int bytesSize = (int)metaWriter.getFilePointer();
  if (bytesSize > 0) {
    meta.bytes = new byte[bytesSize];
    metaWriter.writeTo(meta.bytes, 0);
    metaWriter.reset();
  }
  builder.add(Util.toIntsRef(text, scratchTerm), meta);
  numTerms++;
}
 

开发者ID:europeana,
项目名称:search,
代码行数:21,
代码来源:FSTTermsWriter.java

示例2: finishTerm

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
/** Writes one term's worth of postings. */
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  assert stats.docFreq != 0;
  assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY || stats.totalTermFreq >= stats.docFreq: "postingsWriter=" + postingsWriter;

  pushTerm(text);

  BlockTermState state = postingsWriter.newTermState();
  state.docFreq = stats.docFreq;
  state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(state);

  PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), state);
  pending.add(term);
  numTerms++;

  if (firstPendingTerm == null) {
    firstPendingTerm = term;
  }
  lastPendingTerm = term;
}
 

开发者ID:europeana,
项目名称:search,
代码行数:23,
代码来源:OrdsBlockTreeTermsWriter.java

示例3: add

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
  if (text.length == 0) {
    // We already added empty string in ctor
    assert termsFilePointer == startTermsFilePointer;
    return;
  }
  final int lengthSave = text.length;
  text.length = indexedTermPrefixLength(lastTerm.get(), text);
  try {
    fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
  } finally {
    text.length = lengthSave;
  }
  lastTerm.copyBytes(text);
}
 

开发者ID:europeana,
项目名称:search,
代码行数:17,
代码来源:VariableGapTermsIndexWriter.java

示例4: finishTerm

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {

  assert stats.docFreq > 0;
  //if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);
  if (((IDVersionPostingsWriter) postingsWriter).lastDocID != -1) {
    pushTerm(text);
    BlockTermState state = postingsWriter.newTermState();
    state.docFreq = stats.docFreq;
    state.totalTermFreq = stats.totalTermFreq;
    postingsWriter.finishTerm(state);

    PendingTerm term = new PendingTerm(BytesRef.deepCopyOf(text), state);
    pending.add(term);
    numTerms++;
    if (firstPendingTerm == null) {
      firstPendingTerm = term;
    }
    lastPendingTerm = term;
  }
}
 

开发者ID:europeana,
项目名称:search,
代码行数:22,
代码来源:VersionBlockTreeTermsWriter.java

示例5: finishTerm

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  assert state == TermsConsumerState.START;
  state = TermsConsumerState.INITIAL;
  assert text.equals(lastTerm);
  assert stats.docFreq > 0; // otherwise, this method should not be called.
  assert stats.docFreq == lastPostingsConsumer.docFreq;
  sumDocFreq += stats.docFreq;
  if (fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY) {
    assert stats.totalTermFreq == -1;
  } else {
    assert stats.totalTermFreq == lastPostingsConsumer.totalTermFreq;
    sumTotalTermFreq += stats.totalTermFreq;
  }
  in.finishTerm(text, stats);
}
 

开发者ID:europeana,
项目名称:search,
代码行数:17,
代码来源:AssertingPostingsFormat.java

示例6: write

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
public long write(final TermsConsumer termsConsumer) throws Throwable {
  final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text);
  long totTF = 0;
  for(int i=0;i<docs.length;i++) {
    final int termDocFreq;
    if (field.omitTF) {
      termDocFreq = -1;
    } else {
      termDocFreq = positions[i].length;
    }
    postingsConsumer.startDoc(docs[i], termDocFreq);
    if (!field.omitTF) {
      totTF += positions[i].length;
      for(int j=0;j<positions[i].length;j++) {
        final PositionData pos = positions[i][j];
        postingsConsumer.addPosition(pos.pos, pos.payload, -1, -1);
      }
    }
    postingsConsumer.finishDoc();
  }
  termsConsumer.finishTerm(text, new TermStats(docs.length, field.omitTF ? -1 : totTF));
  return totTF;
}
 

开发者ID:europeana,
项目名称:search,
代码行数:24,
代码来源:TestCodecs.java

示例7: add

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
  if (text.length == 0) {
    // We already added empty string in ctor
    assert termsFilePointer == startTermsFilePointer;
    return;
  }
  final int lengthSave = text.length;
  text.length = indexedTermPrefixLength(lastTerm, text);
  try {
    fstBuilder.add(Util.toIntsRef(text, scratchIntsRef), termsFilePointer);
  } finally {
    text.length = lengthSave;
  }
  lastTerm.copyBytes(text);
}
 

开发者ID:pkarmstr,
项目名称:NYBC,
代码行数:17,
代码来源:VariableGapTermsIndexWriter.java

示例8: finishTerm

点赞 3

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
/** Called when we are done adding docs to this term */
@Override
public void finishTerm(TermStats stats) throws IOException {

  // if (DEBUG) System.out.println("SPW: finishTerm seg=" + segment + " freqStart=" + freqStart);
  assert stats.docFreq > 0;

  // TODO: wasteful we are counting this (counting # docs
  // for this term) in two places?
  assert stats.docFreq == df;

  final long skipOffset;
  if (df >= skipMinimum) {
    skipOffset = skipListWriter.writeSkip(freqOut)-freqStart;
  } else {
    skipOffset = -1;
  }

  pendingTerms.add(new PendingTerm(freqStart, proxStart, skipOffset));

  lastDocID = 0;
  df = 0;
}
 

开发者ID:pkarmstr,
项目名称:NYBC,
代码行数:24,
代码来源:Lucene40PostingsWriter.java

示例9: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  /*
  if (DEBUG) {
    int[] tmp = new int[lastTerm.length];
    System.arraycopy(prefixStarts, 0, tmp, 0, tmp.length);
    System.out.println("BTTW: write term=" + brToString(text) + " prefixStarts=" + Arrays.toString(tmp) + " pending.size()=" + pending.size());
  }
  */

  assert stats.docFreq > 0;
  //if (DEBUG) System.out.println("BTTW.finishTerm term=" + fieldInfo.name + ":" + toString(text) + " seg=" + segment + " df=" + stats.docFreq);

  assert fieldInfo.getIndexOptions() == IndexOptions.DOCS_ONLY || stats.totalTermFreq >= stats.docFreq: "postingsWriter=" + postingsWriter;

  BlockTermState state = postingsWriter.newTermState();
  state.docFreq = stats.docFreq;
  state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(state);

  sumDocFreq += state.docFreq;
  sumTotalTermFreq += state.totalTermFreq;
  pushTerm(text);
   
  PendingTerm term = new PendingTerm(text, state);
  pending.add(term);
  numTerms++;
  if (firstPendingTerm == null) {
    firstPendingTerm = term;
  }
  lastPendingTerm = term;
}
 

开发者ID:lamsfoundation,
项目名称:lams,
代码行数:33,
代码来源:BlockTreeTermsWriter.java

示例10: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {

  assert postingsWriter.docCount == stats.docFreq;

  assert buffer2.getFilePointer() == 0;

  buffer2.writeVInt(stats.docFreq);
  if (field.getIndexOptions() != IndexOptions.DOCS_ONLY) {
    buffer2.writeVLong(stats.totalTermFreq-stats.docFreq);
  }
  int pos = (int) buffer2.getFilePointer();
  buffer2.writeTo(finalBuffer, 0);
  buffer2.reset();

  final int totalBytes = pos + (int) postingsWriter.buffer.getFilePointer();
  if (totalBytes > finalBuffer.length) {
    finalBuffer = ArrayUtil.grow(finalBuffer, totalBytes);
  }
  postingsWriter.buffer.writeTo(finalBuffer, pos);
  postingsWriter.buffer.reset();

  spare.bytes = finalBuffer;
  spare.length = totalBytes;

  //System.out.println("    finishTerm term=" + text.utf8ToString() + " " + totalBytes + " bytes totalTF=" + stats.totalTermFreq);
  //for(int i=0;i<totalBytes;i++) {
  //  System.out.println("      " + Integer.toHexString(finalBuffer[i]&0xFF));
  //}

  builder.add(Util.toIntsRef(text, scratchIntsRef), BytesRef.deepCopyOf(spare));
  termCount++;
}
 

开发者ID:europeana,
项目名称:search,
代码行数:34,
代码来源:MemoryPostingsFormat.java

示例11: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  if (numTerms > 0 && numTerms % SKIP_INTERVAL == 0) {
    bufferSkip();
  }
  // write term meta data into fst
  final long longs[] = new long[longsSize];
  final long delta = stats.totalTermFreq - stats.docFreq;
  if (stats.totalTermFreq > 0) {
    if (delta == 0) {
      statsOut.writeVInt(stats.docFreq<<1|1);
    } else {
      statsOut.writeVInt(stats.docFreq<<1|0);
      statsOut.writeVLong(stats.totalTermFreq-stats.docFreq);
    }
  } else {
    statsOut.writeVInt(stats.docFreq);
  }
  BlockTermState state = postingsWriter.newTermState();
  state.docFreq = stats.docFreq;
  state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(state);
  postingsWriter.encodeTerm(longs, metaBytesOut, fieldInfo, state, true);
  for (int i = 0; i < longsSize; i++) {
    metaLongsOut.writeVLong(longs[i] - lastLongs[i]);
    lastLongs[i] = longs[i];
  }
  metaLongsOut.writeVLong(metaBytesOut.getFilePointer() - lastMetaBytesFP);

  builder.add(Util.toIntsRef(text, scratchTerm), numTerms);
  numTerms++;

  lastMetaBytesFP = metaBytesOut.getFilePointer();
}
 

开发者ID:europeana,
项目名称:search,
代码行数:35,
代码来源:FSTOrdTermsWriter.java

示例12: isIndexTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public boolean isIndexTerm(BytesRef term, TermStats stats) {
  if (count >= interval) {
    count = 1;
    return true;
  } else {
    count++;
    return false;
  }
}
 

开发者ID:europeana,
项目名称:search,
代码行数:11,
代码来源:VariableGapTermsIndexWriter.java

示例13: checkIndexTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
  //System.out.println("VGW: index term=" + text.utf8ToString());
  // NOTE: we must force the first term per field to be
  // indexed, in case policy doesn't:
  if (policy.isIndexTerm(text, stats) || first) {
    first = false;
    //System.out.println("  YES");
    return true;
  } else {
    lastTerm.copyBytes(text);
    return false;
  }
}
 

开发者ID:europeana,
项目名称:search,
代码行数:15,
代码来源:VariableGapTermsIndexWriter.java

示例14: checkIndexTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public boolean checkIndexTerm(BytesRef text, TermStats stats) throws IOException {
  // First term is first indexed term:
  //System.out.println("FGW: checkIndexTerm text=" + text.utf8ToString());
  if (0 == (numTerms++ % termIndexInterval)) {
    return true;
  } else {
    if (0 == numTerms % termIndexInterval) {
      // save last term just before next index term so we
      // can compute wasted suffix
      lastTerm.copyBytes(text);
    }
    return false;
  }
}
 

开发者ID:europeana,
项目名称:search,
代码行数:16,
代码来源:FixedGapTermsIndexWriter.java

示例15: add

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void add(BytesRef text, TermStats stats, long termsFilePointer) throws IOException {
  final int indexedTermLength = indexedTermPrefixLength(lastTerm.get(), text);
  //System.out.println("FGW: add text=" + text.utf8ToString() + " " + text + " fp=" + termsFilePointer);

  // write only the min prefix that shows the diff
  // against prior term
  out.writeBytes(text.bytes, text.offset, indexedTermLength);

  if (termLengths.length == numIndexTerms) {
    termLengths = ArrayUtil.grow(termLengths);
  }
  if (termsPointerDeltas.length == numIndexTerms) {
    termsPointerDeltas = ArrayUtil.grow(termsPointerDeltas);
  }

  // save delta terms pointer
  termsPointerDeltas[numIndexTerms] = (int) (termsFilePointer - lastTermsPointer);
  lastTermsPointer = termsFilePointer;

  // save term length (in bytes)
  assert indexedTermLength <= Short.MAX_VALUE;
  termLengths[numIndexTerms] = (short) indexedTermLength;
  totTermLength += indexedTermLength;

  lastTerm.copyBytes(text);
  numIndexTerms++;
}
 

开发者ID:europeana,
项目名称:search,
代码行数:29,
代码来源:FixedGapTermsIndexWriter.java

示例16: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {

  assert stats.docFreq > 0;
  //System.out.println("BTW: finishTerm term=" + fieldInfo.name + ":" + text.utf8ToString() + " " + text + " seg=" + segment + " df=" + stats.docFreq);

  final boolean isIndexTerm = fieldIndexWriter.checkIndexTerm(text, stats);

  if (isIndexTerm) {
    if (pendingCount > 0) {
      // Instead of writing each term, live, we gather terms
      // in RAM in a pending buffer, and then write the
      // entire block in between index terms:
      flushBlock();
    }
    fieldIndexWriter.add(text, stats, out.getFilePointer());
    //System.out.println("  index term!");
  }

  if (pendingTerms.length == pendingCount) {
    pendingTerms = Arrays.copyOf(pendingTerms, ArrayUtil.oversize(pendingCount+1, RamUsageEstimator.NUM_BYTES_OBJECT_REF));
    for(int i=pendingCount;i<pendingTerms.length;i++) {
      pendingTerms[i] = new TermEntry();
    }
  }
  final TermEntry te = pendingTerms[pendingCount];
  te.term.copyBytes(text);
  te.state = postingsWriter.newTermState();
  te.state.docFreq = stats.docFreq;
  te.state.totalTermFreq = stats.totalTermFreq;
  postingsWriter.finishTerm(te.state);

  pendingCount++;
  numTerms++;
}
 

开发者ID:europeana,
项目名称:search,
代码行数:36,
代码来源:BlockTermsWriter.java

示例17: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  
  // Record this term in our BloomFilter
  if (stats.docFreq > 0) {
    bloomFilter.addValue(text);
  }
  delegateTermsConsumer.finishTerm(text, stats);
}
 

开发者ID:europeana,
项目名称:search,
代码行数:10,
代码来源:BloomFilteringPostingsFormat.java

示例18: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) {
  assert stats.docFreq > 0;
  assert stats.docFreq == current.docs.size();
  current.totalTermFreq = stats.totalTermFreq;
  field.termToDocs.put(current.term, current);
}
 

开发者ID:europeana,
项目名称:search,
代码行数:8,
代码来源:RAMOnlyPostingsFormat.java

示例19: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  if (stats.docFreq > 0) {
    long skipPointer = skipListWriter.writeSkip(freqOut);
    termInfo.docFreq = stats.docFreq;
    termInfo.skipOffset = (int) (skipPointer - termInfo.freqPointer);
    //System.out.println("  w finish term=" + text.utf8ToString() + " fnum=" + fieldInfo.number);
    termsOut.add(fieldInfo.number,
                 text,
                 termInfo);
  }
}
 

开发者ID:europeana,
项目名称:search,
代码行数:13,
代码来源:PreFlexRWFieldsWriter.java

示例20: finishTerm

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public void finishTerm(BytesRef text, TermStats stats) throws IOException {
  if (random.nextInt(10000) == 0) {
    throw new IOException("Fake IOException from TermsConsumer.finishTerm()");
  }
  delegate.finishTerm(text, stats);
}
 

开发者ID:europeana,
项目名称:search,
代码行数:8,
代码来源:CrankyPostingsFormat.java

示例21: merge

点赞 2

import org.apache.lucene.codecs.TermStats; //导入依赖的package包/类
@Override
public TermStats merge(MergeState mergeState, IndexOptions indexOptions, DocsEnum postings, FixedBitSet visitedDocs) throws IOException {
  if (random.nextInt(10000) == 0) {
    throw new IOException("Fake IOException from PostingsConsumer.merge()");
  }
  return super.merge(mergeState, indexOptions, postings, visitedDocs);
}
 

开发者ID:europeana,
项目名称:search,
代码行数:8,
代码来源:CrankyPostingsFormat.java


版权声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。
喜欢 (0)