本文整理汇总了Java中edu.berkeley.nlp.lm.NgramLanguageModel类的典型用法代码示例。如果您正苦于以下问题:Java NgramLanguageModel类的具体用法?Java NgramLanguageModel怎么用?Java NgramLanguageModel使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
NgramLanguageModel类属于edu.berkeley.nlp.lm包,在下文中一共展示了NgramLanguageModel类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: main
点赞 3
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void main(final String[] argv) throws IOException {
int i = 0;
if (i >= argv.length) usage();
String vocabFile = null;
if (argv[i].equals("-o")) {
if (++i >= argv.length) usage();
writer = new PrintStream(new File(argv[i++]));
}
if (argv[i].equals("-g")) {
if (++i >= argv.length) usage();
vocabFile = argv[i++];
}
if (i >= argv.length) usage();
String binaryFile = argv[i++];
List<String> files = Arrays.asList(Arrays.copyOfRange(argv, i, argv.length));
if (files.isEmpty()) files = Collections.singletonList("-");
Logger.setGlobalLogger(new Logger.SystemLogger(System.out, System.err));
NgramLanguageModel<String> lm = readBinary(vocabFile, binaryFile);
double prob = computeProb(files, lm);
System.out.println(String.format("Normalized Log probability of text is: %f", prob));
if (writer != null) {
writer.close();
}
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:25,
代码来源:ComputeLogProbabilityOfTextStream.java
示例2: computeProb
点赞 3
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
* @param files
* @param lm
* @throws IOException
*/
public static double computeProb(List<String> files, NgramLanguageModel<String> lm) throws IOException {
double logProb = 0.0;
long wordCount = 0;
for (String file : files) {
Logger.startTrack("Scoring file " + file + "; current log probability is " + logProb);
final InputStream is = (file.equals("-")) ? System.in : (file.endsWith(".gz") ? new GZIPInputStream(new FileInputStream(file))
: new FileInputStream(file));
BufferedReader reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(is)));
for (String line : Iterators.able(IOUtils.lineIterator(reader))) {
List<String> words = Arrays.asList(line.trim().split("\\s+"));
double sentenceProb = lm.scoreSentence(words);
if (writer != null)
writer.println(String.format("%f;%f", sentenceProb, sentenceProb / words.size()));
logProb += sentenceProb;
wordCount += words.size();
}
Logger.endTrack();
}
if (writer != null)
writer.println(String.format("%f;%f", logProb, logProb / wordCount));
return logProb/wordCount;
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:28,
代码来源:ComputeLogProbabilityOfTextStream.java
示例3: getLMProb
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static float getLMProb(NgramLanguageModel<String> lm, int sent[]) {
List<String> words = new ArrayList<String>();
for (int i = 1; i < sent.length; i++) {
words.add("" + sent[i]);
}
return lm.getLogProb(words);
}
开发者ID:amirkamran,
项目名称:InvitationModel,
代码行数:8,
代码来源:InvitationModel.java
示例4: createLM
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void createLM(final String fileName, final float lm[][],
final int index, final int corpus[][]) {
jobs.execute(new Runnable() {
@Override
public void run() {
log.info("Creating language model");
NgramLanguageModel<String> createdLM = null;
final int lmOrder = 4;
final List<String> inputFiles = new ArrayList<String>();
inputFiles.add(fileName);
final StringWordIndexer wordIndexer = new StringWordIndexer();
wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);
createdLM = LmReaders
.readContextEncodedKneserNeyLmFromTextFile(inputFiles,
wordIndexer, lmOrder, new ConfigOptions(),
new File(fileName + ".lm"));
lm[index] = new float[corpus.length];
for (int i = 0; i < corpus.length; i++) {
int sent[] = corpus[i];
lm[index][i] = getLMProb(createdLM, sent);
}
log.info(".");
InvitationModel.latch.countDown();
}
});
}
开发者ID:amirkamran,
项目名称:InvitationModel,
代码行数:38,
代码来源:InvitationModel.java
示例5: readLmFromFile
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
* decides if the lm is stored in a binary file (based on extensions .b,
* .bi, .bin, .binary)
*
* @param lmfile
*/
private void readLmFromFile(String lmfile) {
System.err.println("Loading language model from " + lmfile);
StringWordIndexer swi = new StringWordIndexer();
NgramLanguageModel<String> ngramLm;
if (lmfile.endsWith(".b") || lmfile.endsWith(".bi")
|| lmfile.endsWith(".bin") || lmfile.endsWith("binary")) {
ngramLm = LmReaders.readLmBinary(lmfile);
} else {
ngramLm = LmReaders
.readArrayEncodedLmFromArpa(lmfile, false, swi);
}
lm = ArrayEncodedCachingLmWrapper
.wrapWithCacheNotThreadSafe((ArrayEncodedNgramLanguageModel<String>) ngramLm);
}
开发者ID:siddBanPsu,
项目名称:WikiKreator,
代码行数:21,
代码来源:MyBerkeleyLm.java
示例6: readLmFromFile
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
private void readLmFromFile(String lmfile) {
System.err.println("Loading language model from " + lmfile);
StringWordIndexer swi = new StringWordIndexer();
NgramLanguageModel<String> ngramLm;
if (lmfile.endsWith(".b") || lmfile.endsWith(".bi")
|| lmfile.endsWith(".bin") || lmfile.endsWith("binary")) {
ngramLm = LmReaders.readLmBinary(lmfile);
} else {
ngramLm = LmReaders
.readArrayEncodedLmFromArpa(lmfile, false, swi);
}
lm = ArrayEncodedCachingLmWrapper
.wrapWithCacheNotThreadSafe((ArrayEncodedNgramLanguageModel<String>) ngramLm);
}
开发者ID:siddBanPsu,
项目名称:WikiKreator,
代码行数:15,
代码来源:LMReader.java
示例7: main
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void main(final String[] argv) {
if (argv.length != 2) usage();
Logger.setGlobalLogger(new Logger.SystemLogger(System.out, System.err));
Logger.startTrack("Reading Lm File " + argv[0] + " . . . ");
final String googleDir = argv[0];
final NgramLanguageModel<String> lm = LmReaders.readLmFromGoogleNgramDir(googleDir, true, false);
Logger.endTrack();
final String outFile = argv[1];
Logger.startTrack("Writing to file " + outFile + " . . . ");
LmReaders.writeLmBinary(lm, outFile);
Logger.endTrack();
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:14,
代码来源:MakeLmBinaryFromGoogle.java
示例8: readBinary
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
* @param vocabFile
* @param binaryFile
* @return
*/
public static NgramLanguageModel<String> readBinary(String vocabFile, String binaryFile) {
NgramLanguageModel<String> lm;
if (vocabFile != null) {
Logger.startTrack("Reading Google Binary " + binaryFile + " with vocab " + vocabFile);
lm = LmReaders.readGoogleLmBinary(binaryFile, vocabFile);
Logger.endTrack();
} else {
Logger.startTrack("Reading LM Binary " + binaryFile);
lm = LmReaders.readLmBinary(binaryFile);
Logger.endTrack();
}
return lm;
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:19,
代码来源:ComputeLogProbabilityOfTextStream.java
示例9: main
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void main(final String[] argv) {
final List<String> fileArgs = new ArrayList<String>();
Opts finalOpt = Opts.HASH_OPT;
OUTER: for (final String arg : argv) {
if (arg.startsWith("-")) {
for (final Opts opts : Opts.values()) {
if (opts.toString().equals(arg)) {
finalOpt = opts;
continue OUTER;
}
}
System.err.println("Unrecognized opts: " + arg);
usage();
} else
fileArgs.add(arg);
}
if (fileArgs.size() != 2) {
usage();
}
Logger.setGlobalLogger(new Logger.SystemLogger(System.out, System.err));
final String lmFile = fileArgs.get(0);
Logger.startTrack("Reading Lm File " + lmFile + " . . . ");
final NgramLanguageModel<String> lm = finalOpt.makeLm(lmFile);
Logger.endTrack();
final String outFile = fileArgs.get(1);
Logger.startTrack("Writing to file " + outFile + " . . . ");
LmReaders.writeLmBinary(lm, outFile);
Logger.endTrack();
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:32,
代码来源:MakeLmBinaryFromArpa.java
示例10: createFileFromText
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void createFileFromText() {
// MakeKneserNeyArpaFromText.main(new String[] { "3", "lmorder3.arpa", "in/1.txt" });
String txtfile = "src/test/resources/test.txt";
String arpafile = "_svnignore/test.arpa.gz";
String binfile = "_svnignore/test.blm.gz";
// if (!new File(arpafile).exists()) {
final StringWordIndexer wordIndexer = new StringWordIndexer();
wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);
ConfigOptions opts = new ConfigOptions();
opts.kneserNeyDiscounts = new double[] { 0.75f, 0.6f, 0.6f };
opts.kneserNeyMinCounts = new double[] { 0, 0, 0, 0, 0, 0, 0 };
final TextReader<String> reader = new TextReader<String>(Arrays.asList(txtfile), wordIndexer);
KneserNeyLmReaderCallback<String> kneserNeyReader = new KneserNeyLmReaderCallback<String>(wordIndexer, 3, opts);
reader.parse(kneserNeyReader);
// NgramLanguageModel<String> lm = kneserNeyReader;
kneserNeyReader.parse(new KneserNeyFileWritingLmReaderCallback<String>(new File(arpafile), wordIndexer));
// }
// if (!new File(binfile).exists()) {
// // HASH OPT
NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, false);
// // CONTEXT OPT
// // NgramLanguageModel<String> lm = LmReaders.readContextEncodedLmFromArpa(arpafile);
// // HASH COMPRESS OPT
// // NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, true);
//
// LmReaders.writeLmBinary(lm, binfile);
// }
//
// // NgramLanguageModel<String> lm = LmReaders.readLmBinary(binfile);
// NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, false);
System.out.println(lm.getLogProb(Arrays.asList("Hallo")));
System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "welt")));
System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "pups")));
System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "globus")));
System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "erde")));
// LmReaders.readn
}
开发者ID:tudarmstadt-lt,
项目名称:topicrawler,
代码行数:48,
代码来源:BerkeleyLmPlayground.java
示例11: readLmBinary
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
* Reads a binary file representing an LM. These will need to be cast down
* to either {@link ContextEncodedNgramLanguageModel} or
* {@link ArrayEncodedNgramLanguageModel} to be useful.
*/
public static <W> NgramLanguageModel<W> readLmBinary(final String file) {
@SuppressWarnings("unchecked")
final NgramLanguageModel<W> lm = (NgramLanguageModel<W>) IOUtils.readObjFileHard(file);
return lm;
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:11,
代码来源:LmReaders.java
示例12: makeLm
点赞 2
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
@Override
public NgramLanguageModel<String> makeLm(final String file) {
return LmReaders.readArrayEncodedLmFromArpa(file, false);
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:5,
代码来源:MakeLmBinaryFromArpa.java
示例13: writeLmBinary
点赞 1
import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
* Writes a binary file representing the LM using the built-in
* serialization. These binaries should load much faster than ARPA files.
*
* @param <W>
* @param lm
* @param file
*/
public static <W> void writeLmBinary(final NgramLanguageModel<W> lm, final String file) {
IOUtils.writeObjFileHard(file, lm);
}
开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:12,
代码来源:LmReaders.java