• 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏吧

Java NgramLanguageModel类的典型用法和代码示例

java 2次浏览

本文整理汇总了Java中edu.berkeley.nlp.lm.NgramLanguageModel的典型用法代码示例。如果您正苦于以下问题:Java NgramLanguageModel类的具体用法?Java NgramLanguageModel怎么用?Java NgramLanguageModel使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

NgramLanguageModel类属于edu.berkeley.nlp.lm包,在下文中一共展示了NgramLanguageModel类的13个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: main

点赞 3

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void main(final String[] argv) throws IOException {
    int i = 0;
    if (i >= argv.length) usage();
    String vocabFile = null;
    if (argv[i].equals("-o")) {
        if (++i >= argv.length) usage();
        writer = new PrintStream(new File(argv[i++]));
    }
    if (argv[i].equals("-g")) {
        if (++i >= argv.length) usage();
        vocabFile = argv[i++];
    }
    if (i >= argv.length) usage();
    String binaryFile = argv[i++];
    List<String> files = Arrays.asList(Arrays.copyOfRange(argv, i, argv.length));
    if (files.isEmpty()) files = Collections.singletonList("-");
    Logger.setGlobalLogger(new Logger.SystemLogger(System.out, System.err));
    NgramLanguageModel<String> lm = readBinary(vocabFile, binaryFile);
    double prob = computeProb(files, lm);
    System.out.println(String.format("Normalized Log probability of text is: %f", prob));
    if (writer != null) {
        writer.close();
    }
}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:25,
代码来源:ComputeLogProbabilityOfTextStream.java

示例2: computeProb

点赞 3

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
 * @param files
 * @param lm
 * @throws IOException
 */
public static double computeProb(List<String> files, NgramLanguageModel<String> lm) throws IOException {
    double logProb = 0.0;
    long wordCount = 0;
    for (String file : files) {
        Logger.startTrack("Scoring file " + file + "; current log probability is " + logProb);
        final InputStream is = (file.equals("-")) ? System.in : (file.endsWith(".gz") ? new GZIPInputStream(new FileInputStream(file))
                : new FileInputStream(file));
        BufferedReader reader = new BufferedReader(new InputStreamReader(new BufferedInputStream(is)));
        for (String line : Iterators.able(IOUtils.lineIterator(reader))) {
            List<String> words = Arrays.asList(line.trim().split("\\s+"));
            double sentenceProb = lm.scoreSentence(words);
            if (writer != null)
                writer.println(String.format("%f;%f", sentenceProb, sentenceProb / words.size()));
            logProb += sentenceProb;
            wordCount +=  words.size();
        }
        Logger.endTrack();
    }
    if (writer != null)
        writer.println(String.format("%f;%f", logProb, logProb / wordCount));
    return logProb/wordCount;
}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:28,
代码来源:ComputeLogProbabilityOfTextStream.java

示例3: getLMProb

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static float getLMProb(NgramLanguageModel<String> lm, int sent[]) {
	List<String> words = new ArrayList<String>();
	for (int i = 1; i < sent.length; i++) {
		words.add("" + sent[i]);
	}
	return lm.getLogProb(words);
}
 

开发者ID:amirkamran,
项目名称:InvitationModel,
代码行数:8,
代码来源:InvitationModel.java

示例4: createLM

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void createLM(final String fileName, final float lm[][],
		final int index, final int corpus[][]) {

	jobs.execute(new Runnable() {

		@Override
		public void run() {
			log.info("Creating language model");

			NgramLanguageModel<String> createdLM = null;
			final int lmOrder = 4;
			final List<String> inputFiles = new ArrayList<String>();
			inputFiles.add(fileName);
			final StringWordIndexer wordIndexer = new StringWordIndexer();
			wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
			wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
			wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);

			createdLM = LmReaders
					.readContextEncodedKneserNeyLmFromTextFile(inputFiles,
							wordIndexer, lmOrder, new ConfigOptions(),
							new File(fileName + ".lm"));

			lm[index] = new float[corpus.length];
			
			for (int i = 0; i < corpus.length; i++) {
				int sent[] = corpus[i];
				lm[index][i] = getLMProb(createdLM, sent);
			}

			log.info(".");

			InvitationModel.latch.countDown();
		}

	});
}
 

开发者ID:amirkamran,
项目名称:InvitationModel,
代码行数:38,
代码来源:InvitationModel.java

示例5: readLmFromFile

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
 * decides if the lm is stored in a binary file (based on extensions .b,
 * .bi, .bin, .binary)
 * 
 * @param lmfile
 */
private void readLmFromFile(String lmfile) {
	System.err.println("Loading language model from " + lmfile);
	StringWordIndexer swi = new StringWordIndexer();
	NgramLanguageModel<String> ngramLm;
	if (lmfile.endsWith(".b") || lmfile.endsWith(".bi")
			|| lmfile.endsWith(".bin") || lmfile.endsWith("binary")) {
		ngramLm = LmReaders.readLmBinary(lmfile);
	} else {
		ngramLm = LmReaders
				.readArrayEncodedLmFromArpa(lmfile, false, swi);
	}
	lm = ArrayEncodedCachingLmWrapper
			.wrapWithCacheNotThreadSafe((ArrayEncodedNgramLanguageModel<String>) ngramLm);
}
 

开发者ID:siddBanPsu,
项目名称:WikiKreator,
代码行数:21,
代码来源:MyBerkeleyLm.java

示例6: readLmFromFile

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
private void readLmFromFile(String lmfile) {
	System.err.println("Loading language model from " + lmfile);
	StringWordIndexer swi = new StringWordIndexer();
	NgramLanguageModel<String> ngramLm;
	if (lmfile.endsWith(".b") || lmfile.endsWith(".bi")
			|| lmfile.endsWith(".bin") || lmfile.endsWith("binary")) {
		ngramLm = LmReaders.readLmBinary(lmfile);
	} else {
		ngramLm = LmReaders
				.readArrayEncodedLmFromArpa(lmfile, false, swi);
	}
	lm = ArrayEncodedCachingLmWrapper
			.wrapWithCacheNotThreadSafe((ArrayEncodedNgramLanguageModel<String>) ngramLm);
}
 

开发者ID:siddBanPsu,
项目名称:WikiKreator,
代码行数:15,
代码来源:LMReader.java

示例7: main

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void main(final String[] argv) {
	if (argv.length != 2) usage();
	Logger.setGlobalLogger(new Logger.SystemLogger(System.out, System.err));
	Logger.startTrack("Reading Lm File " + argv[0] + " . . . ");
	final String googleDir = argv[0];
	final NgramLanguageModel<String> lm = LmReaders.readLmFromGoogleNgramDir(googleDir, true, false);
	Logger.endTrack();
	final String outFile = argv[1];
	Logger.startTrack("Writing to file " + outFile + " . . . ");
	LmReaders.writeLmBinary(lm, outFile);
	Logger.endTrack();

}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:14,
代码来源:MakeLmBinaryFromGoogle.java

示例8: readBinary

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
 * @param vocabFile
 * @param binaryFile
 * @return
 */
public static NgramLanguageModel<String> readBinary(String vocabFile, String binaryFile) {
    NgramLanguageModel<String> lm;
    if (vocabFile != null) {
        Logger.startTrack("Reading Google Binary " + binaryFile + " with vocab " + vocabFile);
        lm = LmReaders.readGoogleLmBinary(binaryFile, vocabFile);
        Logger.endTrack();
    } else {
        Logger.startTrack("Reading LM Binary " + binaryFile);
        lm = LmReaders.readLmBinary(binaryFile);
        Logger.endTrack();
    }
    return lm;
}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:19,
代码来源:ComputeLogProbabilityOfTextStream.java

示例9: main

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void main(final String[] argv) {
	final List<String> fileArgs = new ArrayList<String>();
	Opts finalOpt = Opts.HASH_OPT;
	OUTER: for (final String arg : argv) {
		if (arg.startsWith("-")) {
			for (final Opts opts : Opts.values()) {
				if (opts.toString().equals(arg)) {
					finalOpt = opts;
					continue OUTER;
				}
			}
			System.err.println("Unrecognized opts: " + arg);
			usage();
		} else
			fileArgs.add(arg);
	}
	if (fileArgs.size() != 2) {
		usage();
	}

	Logger.setGlobalLogger(new Logger.SystemLogger(System.out, System.err));
	final String lmFile = fileArgs.get(0);
	Logger.startTrack("Reading Lm File " + lmFile + " . . . ");
	final NgramLanguageModel<String> lm = finalOpt.makeLm(lmFile);
	Logger.endTrack();
	final String outFile = fileArgs.get(1);
	Logger.startTrack("Writing to file " + outFile + " . . . ");
	LmReaders.writeLmBinary(lm, outFile);
	Logger.endTrack();

}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:32,
代码来源:MakeLmBinaryFromArpa.java

示例10: createFileFromText

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
public static void createFileFromText() {
	// MakeKneserNeyArpaFromText.main(new String[] { "3", "lmorder3.arpa", "in/1.txt" });
	String txtfile = "src/test/resources/test.txt";
	String arpafile = "_svnignore/test.arpa.gz";
	String binfile = "_svnignore/test.blm.gz";

	// if (!new File(arpafile).exists()) {
	final StringWordIndexer wordIndexer = new StringWordIndexer();
	wordIndexer.setStartSymbol(ArpaLmReader.START_SYMBOL);
	wordIndexer.setEndSymbol(ArpaLmReader.END_SYMBOL);
	wordIndexer.setUnkSymbol(ArpaLmReader.UNK_SYMBOL);
	ConfigOptions opts = new ConfigOptions();
	opts.kneserNeyDiscounts = new double[] { 0.75f, 0.6f, 0.6f };
	opts.kneserNeyMinCounts = new double[] { 0, 0, 0, 0, 0, 0, 0 };

	final TextReader<String> reader = new TextReader<String>(Arrays.asList(txtfile), wordIndexer);
	KneserNeyLmReaderCallback<String> kneserNeyReader = new KneserNeyLmReaderCallback<String>(wordIndexer, 3, opts);
	reader.parse(kneserNeyReader);
	// NgramLanguageModel<String> lm = kneserNeyReader;

	kneserNeyReader.parse(new KneserNeyFileWritingLmReaderCallback<String>(new File(arpafile), wordIndexer));
	//		}
	//		if (!new File(binfile).exists()) {
	//			// HASH OPT
	NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, false);
	//			// CONTEXT OPT
	//			// NgramLanguageModel<String> lm = LmReaders.readContextEncodedLmFromArpa(arpafile);
	//			// HASH COMPRESS OPT
	//			// NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, true);
	//
	//			LmReaders.writeLmBinary(lm, binfile);
	//		}
	//
	//		// NgramLanguageModel<String> lm = LmReaders.readLmBinary(binfile);
	//		NgramLanguageModel<String> lm = LmReaders.readArrayEncodedLmFromArpa(arpafile, false);


	System.out.println(lm.getLogProb(Arrays.asList("Hallo")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "welt")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "pups")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "globus")));
	System.out.println(lm.getLogProb(Arrays.asList("schöne", "neue", "erde")));

	// LmReaders.readn


}
 

开发者ID:tudarmstadt-lt,
项目名称:topicrawler,
代码行数:48,
代码来源:BerkeleyLmPlayground.java

示例11: readLmBinary

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
 * Reads a binary file representing an LM. These will need to be cast down
 * to either {@link ContextEncodedNgramLanguageModel} or
 * {@link ArrayEncodedNgramLanguageModel} to be useful.
 */
public static <W> NgramLanguageModel<W> readLmBinary(final String file) {
	@SuppressWarnings("unchecked")
	final NgramLanguageModel<W> lm = (NgramLanguageModel<W>) IOUtils.readObjFileHard(file);
	return lm;
}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:11,
代码来源:LmReaders.java

示例12: makeLm

点赞 2

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
@Override
public NgramLanguageModel<String> makeLm(final String file) {
	return LmReaders.readArrayEncodedLmFromArpa(file, false);
}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:5,
代码来源:MakeLmBinaryFromArpa.java

示例13: writeLmBinary

点赞 1

import edu.berkeley.nlp.lm.NgramLanguageModel; //导入依赖的package包/类
/**
 * Writes a binary file representing the LM using the built-in
 * serialization. These binaries should load much faster than ARPA files.
 * 
 * @param <W>
 * @param lm
 * @param file
 */
public static <W> void writeLmBinary(final NgramLanguageModel<W> lm, final String file) {
	IOUtils.writeObjFileHard(file, lm);
}
 

开发者ID:jasonbaldridge,
项目名称:maul,
代码行数:12,
代码来源:LmReaders.java


版权声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。
喜欢 (0)