• 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏吧

Java ModelSentence类的典型用法和代码示例

java 3次浏览

本文整理汇总了Java中cmu.arktweetnlp.impl.ModelSentence的典型用法代码示例。如果您正苦于以下问题:Java ModelSentence类的具体用法?Java ModelSentence怎么用?Java ModelSentence使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

ModelSentence类属于cmu.arktweetnlp.impl包,在下文中一共展示了ModelSentence类的25个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: tag

点赞 3

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public List<String> tag(List<String> tokens) {

		sentence = new Sentence();
		tt = new TaggedToken();
		sentence.tokens = tokens;
		ms = new ModelSentence(sentence.T());
		featureExtractor.computeFeatures(sentence, ms);
		model.greedyDecode(ms, false);

		ArrayList<String> taggedTokens = new ArrayList<String>(sentence.T());

		for (int t = 0; t < sentence.T(); t++) {
			taggedTokens.add(model.labelVocab.name(ms.labels[t]));
		}

		return taggedTokens;
	}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:18,
代码来源:ENTweetPOSTagger.java

示例2: tokenizeAndTag

点赞 3

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
 * Run the tokenizer and tagger on one tweet's text.
 **/
public List<TaggedToken> tokenizeAndTag(String text) {
	if (model == null) throw new RuntimeException("Must loadModel() first before tagging anything");
	List<String> tokens = Twokenize.tokenizeRawTweetText(text);

	Sentence sentence = new Sentence();
	sentence.tokens = tokens;
	ModelSentence ms = new ModelSentence(sentence.T());
	featureExtractor.computeFeatures(sentence, ms);
	model.greedyDecode(ms, false);

	ArrayList<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();

	for (int t=0; t < sentence.T(); t++) {
		TaggedToken tt = new TaggedToken();
		tt.token = tokens.get(t);
		tt.tag = model.labelVocab.name( ms.labels[t] );
		taggedTokens.add(tt);
	}

	return taggedTokens;
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:25,
代码来源:Tagger.java

示例3: outputJustTagging

点赞 3

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
 * assume mSent's labels hold the tagging.
 */
public void outputJustTagging(Sentence lSent, ModelSentence mSent) {
	// mSent might be null!

	if (outputFormat.equals("conll")) {
		for (int t=0; t < lSent.T(); t++) {
			outputStream.printf("%s\t%s", 
					lSent.tokens.get(t),  
					tagger.model.labelVocab.name(mSent.labels[t]));
			if (mSent.confidences != null) {
				outputStream.printf("\t%s", formatConfidence(mSent.confidences[t]));
			}
			outputStream.printf("\n");
		}
		outputStream.println("");
	} 
	else {
		die("bad output format for just tagging: " + outputFormat);
	}
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:23,
代码来源:RunTagger.java

示例4: getPOStags

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
 * Returns POS tags from a List of tokens using the CMU TweetNLP tool
 * 
 * @param tokens the input tokens 
 * @return the list of POS tags
 */	
public List<String> getPOStags(List<String> tokens) {

	ArrayList<String> tags = new ArrayList<String>();

	try{
		Sentence sentence = new Sentence();
		sentence.tokens = tokens;
		ModelSentence ms = new ModelSentence(sentence.T());
		this.tagger.featureExtractor.computeFeatures(sentence, ms);
		this.tagger.model.greedyDecode(ms, false);



		for (int t = 0; t < sentence.T(); t++) {
			String tag = this.tagger.model.labelVocab.name(ms.labels[t]);
			tags.add(tag);
		}


	}
	catch(Exception e){
		System.err.println("Tagging Problem");
		for(int i=0;i<tokens.size();i++){
			tags.add("?");
			System.err.print(tokens.get(i));
		}

		e.printStackTrace(System.err);
	}

	return tags;
}
 

开发者ID:felipebravom,
项目名称:AffectiveTweets,
代码行数:39,
代码来源:TweetToSparseFeatureVector.java

示例5: tag

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private List<TaggedToken> tag(List<String> tokens) {
  Sentence sentence = new Sentence();
  sentence.tokens = tokens;
  ModelSentence ms = new ModelSentence(sentence.T());
  m_featureExtractor.computeFeatures(sentence, ms);
  m_model.greedyDecode(ms, false);

  List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();
  for (int t = 0; t < sentence.T(); t++) {
    TaggedToken tt = new TaggedToken(tokens.get(t),
        m_model.labelVocab.name(ms.labels[t]));
    taggedTokens.add(tt);
  }
  return taggedTokens;
}
 

开发者ID:millecker,
项目名称:senti-storm,
代码行数:16,
代码来源:POSTaggerBolt.java

示例6: tag

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public List<TaggedToken> tag(List<String> tokens) {
  Sentence sentence = new Sentence();
  sentence.tokens = tokens;
  ModelSentence ms = new ModelSentence(sentence.T());
  m_featureExtractor.computeFeatures(sentence, ms);
  m_model.greedyDecode(ms, false);

  List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();
  for (int t = 0; t < sentence.T(); t++) {
    TaggedToken tt = new TaggedToken(tokens.get(t),
        m_model.labelVocab.name(ms.labels[t]));
    taggedTokens.add(tt);
  }
  return taggedTokens;
}
 

开发者ID:millecker,
项目名称:senti-storm,
代码行数:16,
代码来源:POSTagger.java

示例7: goDecode

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/** Runs the correct algorithm (make config option perhaps) **/
	public void goDecode(ModelSentence mSent) {
		if (decoder == Decoder.GREEDY) {
			tagger.model.greedyDecode(mSent, showConfidence);
		} else if (decoder == Decoder.VITERBI) {
//			if (showConfidence) throw new RuntimeException("--confidence only works with greedy decoder right now, sorry, yes this is a lame limitation");
			tagger.model.viterbiDecode(mSent);
		}		
	}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:10,
代码来源:RunTagger.java

示例8: evaluateOOV

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void evaluateOOV(Sentence lSent, ModelSentence mSent) throws FileNotFoundException, IOException, ClassNotFoundException {
for (int t=0; t < mSent.T; t++) {
	int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
	int predLabel = mSent.labels[t];
	if(wordsInCluster().contains(lSent.tokens.get(t))){
		oovTokensCorrect += (trueLabel == predLabel) ? 1 : 0;
		oovTokens += 1;
	}
}
  }
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:11,
代码来源:RunTagger.java

示例9: getconfusion

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void getconfusion(Sentence lSent, ModelSentence mSent, int[][] confusion) {
for (int t=0; t < mSent.T; t++) {
	int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
	int predLabel = mSent.labels[t];
	if(trueLabel!=-1)
		confusion[trueLabel][predLabel]++;
}


  }
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:11,
代码来源:RunTagger.java

示例10: evaluateSentenceTagging

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void evaluateSentenceTagging(Sentence lSent, ModelSentence mSent) {
	for (int t=0; t < mSent.T; t++) {
		int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
		int predLabel = mSent.labels[t];
		numTokensCorrect += (trueLabel == predLabel) ? 1 : 0;
		numTokens += 1;
	}
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:9,
代码来源:RunTagger.java

示例11: outputPrependedTagging

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
 * assume mSent's labels hold the tagging.
 * 
 * @param lSent
 * @param mSent
 * @param inputLine -- assume does NOT have trailing newline.  (default from java's readLine)
 */
public void outputPrependedTagging(Sentence lSent, ModelSentence mSent, 
		boolean suppressTags, String inputLine) {
	// mSent might be null!
	
	int T = lSent.T();
	String[] tokens = new String[T];
	String[] tags = new String[T];
	String[] confs = new String[T];
	for (int t=0; t < T; t++) {
		tokens[t] = lSent.tokens.get(t);
		if (!suppressTags) {
			tags[t] = tagger.model.labelVocab.name(mSent.labels[t]);	
		}
		if (showConfidence) {
			confs[t] = formatConfidence(mSent.confidences[t]);
		}
	}
	
	StringBuilder sb = new StringBuilder();
	sb.append(StringUtils.join(tokens));
	sb.append("\t");
	if (!suppressTags) {
		sb.append(StringUtils.join(tags));
		sb.append("\t");
	}
	if (showConfidence) {
		sb.append(StringUtils.join(confs));
		sb.append("\t");
	}
	sb.append(inputLine);
	
	outputStream.println(sb.toString());
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:41,
代码来源:RunTagger.java

示例12: computeFeatures

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
 * Does feature extraction on one sentence.
 * 
 * Input: textual representation of sentence
 * Output: fills up modelSentence with numberized features
 */
public void computeFeatures(Sentence linguisticSentence, ModelSentence modelSentence) {
	int T = linguisticSentence.T();
	assert linguisticSentence.T() > 0; //TODO: handle this when assertions are off
	computeObservationFeatures(linguisticSentence, modelSentence);
	if (isTrainingTime) {
		for (int t=0; t < T; t++) {
			modelSentence.labels[t] = model.labelVocab.num( linguisticSentence.labels.get(t) );
		}
		computeCheatingEdgeFeatures(linguisticSentence, modelSentence);
	}
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:18,
代码来源:FeatureExtractor.java

示例13: computeCheatingEdgeFeatures

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
 * Peek at the modelSentence to see its labels -- for training only!
 * @param sentence
 * @param modelSentence
 */
private void computeCheatingEdgeFeatures(Sentence sentence, ModelSentence modelSentence) {
	assert isTrainingTime;
	modelSentence.edgeFeatures[0] = model.startMarker();
	for (int t=1; t < sentence.T(); t++) {
		modelSentence.edgeFeatures[t] = modelSentence.labels[t-1];
	}
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:13,
代码来源:FeatureExtractor.java

示例14: computeObservationFeatures

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void computeObservationFeatures(Sentence sentence, ModelSentence modelSentence) {
	PositionFeaturePairs pairs = new PositionFeaturePairs();
	// Extract in featurename form
	for (FeatureExtractorInterface fe : allFeatureExtractors) {
		fe.addFeatures(sentence.tokens, pairs);
	}

	// Numberize.  This should be melded with the addFeatures() loop above, so no wasteful
	// temporaries that later turn out to be OOV... but is this really an issue?
	for (int i=0; i < pairs.size(); i++) {
		int t = pairs.labelIndexes.get(i);
		String fName = pairs.featureNames.get(i);
		int fID = model.featureVocab.num(fName);
		if ( ! isTrainingTime && fID == -1) {
			// Skip OOV features at test time.
			// Note we have implicit conjunctions from base features, so
			// these are base features that weren't seen for *any* label at training time -- of course they will be useless for us...
			continue;
		}
		double fValue = pairs.featureValues.get(i);
		modelSentence.observationFeatures.get(t).add(new Pair<Integer,Double>(fID, fValue));
	}
	if (dumpMode) {
		Util.p("");
		for (int t=0; t < sentence.T(); t++) {
			System.out.printf("%s\n\t", sentence.tokens.get(t));
			for (Pair<Integer,Double> fv : modelSentence.observationFeatures.get(t)) {
				System.out.printf("%s ", model.featureVocab.name(fv.first));
			}
			System.out.printf("\n");
		}
	}
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:34,
代码来源:FeatureExtractor.java

示例15: dumpFeatures

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void dumpFeatures() throws IOException {
	FeatureExtractor fe = new FeatureExtractor(model, true);
	fe.dumpMode = true;
	for (Sentence lSent : lSentences) {
		ModelSentence mSent = new ModelSentence(lSent.T());
		fe.computeFeatures(lSent, mSent);
	}
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:9,
代码来源:Train.java

示例16: extractFeatures

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void extractFeatures() throws IOException {
	System.out.println("Extracting features");
	FeatureExtractor fe = new FeatureExtractor(model, true);
	for (Sentence lSent : lSentences) {

		ModelSentence mSent = new ModelSentence(lSent.T());
		fe.computeFeatures(lSent, mSent);
		mSentences.add(mSent);
	}
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:11,
代码来源:Train.java

示例17: valueAt

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
@Override
public double valueAt(double[] flatCoefs) {
	model.setCoefsFromFlat(flatCoefs);
	double loglik = 0;
	for (ModelSentence s : mSentences) {
		loglik += model.computeLogLik(s);
	}
	return -loglik + regularizerValue(flatCoefs);
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:10,
代码来源:Train.java

示例18: derivativeAt

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
@Override
public double[] derivativeAt(double[] flatCoefs) {
	double[] g = new double[model.flatIDsize()];
	model.setCoefsFromFlat(flatCoefs);
	for (ModelSentence s : mSentences) {
		model.computeGradient(s, g);
	}
	ArrayMath.multiplyInPlace(g, -1);
	addL2regularizerGradient(g, flatCoefs);
	return g;
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:12,
代码来源:Train.java

示例19: printWeights

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
@Override
public void printWeights() {
	double loglik = 0;
	for (ModelSentence s : mSentences) {
		loglik += model.computeLogLik(s);
	}
	System.out.printf("\tTokLL %.6f\t", loglik/numTokens);
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:9,
代码来源:Train.java

示例20: goDecode

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/** Runs the correct algorithm (make config option perhaps) **/
public void goDecode(ModelSentence mSent) {
if (decoder == Decoder.GREEDY) {
tagger.model.greedyDecode(mSent, showConfidence);
} else if (decoder == Decoder.VITERBI) {
//if (showConfidence) throw new RuntimeException("--confidence only works with greedy decoder right now, sorry, yes this is a lame limitation");
tagger.model.viterbiDecode(mSent);
}	
}
 

开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:10,
代码来源:RunTagger.java

示例21: getconfusion

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void getconfusion(Sentence lSent, ModelSentence mSent, int[][] confusion) {
for (int t=0; t < mSent.T; t++) {
int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
int predLabel = mSent.labels[t];
if(trueLabel!=-1)
confusion[trueLabel][predLabel]++;
}


  }
 

开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:11,
代码来源:RunTagger.java

示例22: evaluateSentenceTagging

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void evaluateSentenceTagging(Sentence lSent, ModelSentence mSent) {
for (int t=0; t < mSent.T; t++) {
int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
int predLabel = mSent.labels[t];
numTokensCorrect += (trueLabel == predLabel) ? 1 : 0;
numTokens += 1;
}
}
 

开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:9,
代码来源:RunTagger.java

示例23: runTaggerInEvalMode

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void runTaggerInEvalMode() throws IOException, ClassNotFoundException {
		
		long t0 = System.currentTimeMillis();
		int n=0;

		List<Sentence> examples = CoNLLReader.readFile(inputFilename); 
		inputIterable = examples;

		int[][] confusion = new int[tagger.model.numLabels][tagger.model.numLabels];
		
		for (Sentence sentence : examples) {
			n++;
			
			ModelSentence mSent = new ModelSentence(sentence.T());
			tagger.featureExtractor.computeFeatures(sentence, mSent);
			goDecode(mSent);
			
			if ( ! noOutput) {
				outputJustTagging(sentence, mSent);	
			}
			evaluateSentenceTagging(sentence, mSent);
			//evaluateOOV(sentence, mSent);
			//getconfusion(sentence, mSent, confusion);
		}

		System.err.printf("%d / %d correct = %.4f acc, %.4f err\n", 
				numTokensCorrect, numTokens,
				numTokensCorrect*1.0 / numTokens,
				1 - (numTokensCorrect*1.0 / numTokens)
		);
		double elapsed = ((double) (System.currentTimeMillis() - t0)) / 1000.0;
		System.err.printf("%d tweets in %.1f seconds, %.1f tweets/sec\n",
				n, elapsed, n*1.0/elapsed);
		
/*		System.err.printf("%d / %d cluster words correct = %.4f acc, %.4f err\n", 
				oovTokensCorrect, oovTokens,
				oovTokensCorrect*1.0 / oovTokens,
				1 - (oovTokensCorrect*1.0 / oovTokens)
		);	*/
/*		int i=0;
		System.out.println("\t"+tagger.model.labelVocab.toString().replaceAll(" ", ", "));
		for (int[] row:confusion){
			System.out.println(tagger.model.labelVocab.name(i)+"\t"+Arrays.toString(row));
			i++;
		}		*/
	}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:47,
代码来源:RunTagger.java

示例24: Train

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
Train() {
	lSentences = new ArrayList<Sentence>();
	mSentences = new ArrayList<ModelSentence>();
	model = new Model();
}
 

开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:6,
代码来源:Train.java

示例25: outputPrependedTagging

点赞 2

import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* assume mSent's labels hold the tagging.
*
* @param lSent
* @param mSent
* @param inputLine -- assume does NOT have trailing newline. (default from java's readLine)
*/
public String outputPrependedTagging(Sentence lSent, ModelSentence mSent,
boolean suppressTags, String inputLine) {
//mSent might be null!

int T = lSent.T();
String[] tokens = new String[T];
String[] tags = new String[T];
String[] confs = new String[T];
for (int t=0; t < T; t++) {
tokens[t] = lSent.tokens.get(t);
if (!suppressTags) {
tags[t] = tagger.model.labelVocab.name(mSent.labels[t]);	
}
//if (showConfidence) {
//confs[t] = formatConfidence(mSent.confidences[t]);
//}
}

StringBuilder sb = new StringBuilder();

for(int i = 0; i < T; ++i) 
{	
	if(tags[i].equals("V") || tags[i].equals("T") ||tags[i].equals("N") || tags[i].equals("O") ||
		tags[i].equals("A") || tags[i].equals("R") || tags[i].equals("!") || tags[i].equals("L") ||
		tags[i].equals("E") || tags[i].equals("#") || tags[i].equals("^") || tags[i].equals("@") )
	{		
		if(!tags[i].equals("E"))
			tokens[i] = tokens[i].toLowerCase();
		sb.append(normalize(tokens[i]));
		sb.append(" ");
		sb.append(tags[i]);
		sb.append(" ");
	}	
}


/*StringBuilder sb = new StringBuilder();
sb.append(StringUtils.join(tokens));
sb.append("\t");

sb.append("TAGS");

if (!suppressTags) {
sb.append(StringUtils.join(tags));
sb.append("\t");
}

if (showConfidence) {
sb.append(StringUtils.join(confs));
sb.append("\t");
}

sb.append(inputLine);*/

//outputStream.println(sb.toString());
return sb.toString();
}
 

开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:65,
代码来源:RunTagger.java


版权声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。
喜欢 (0)