本文整理汇总了Java中cmu.arktweetnlp.impl.ModelSentence类的典型用法代码示例。如果您正苦于以下问题:Java ModelSentence类的具体用法?Java ModelSentence怎么用?Java ModelSentence使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
ModelSentence类属于cmu.arktweetnlp.impl包,在下文中一共展示了ModelSentence类的25个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: tag
点赞 3
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public List<String> tag(List<String> tokens) {
sentence = new Sentence();
tt = new TaggedToken();
sentence.tokens = tokens;
ms = new ModelSentence(sentence.T());
featureExtractor.computeFeatures(sentence, ms);
model.greedyDecode(ms, false);
ArrayList<String> taggedTokens = new ArrayList<String>(sentence.T());
for (int t = 0; t < sentence.T(); t++) {
taggedTokens.add(model.labelVocab.name(ms.labels[t]));
}
return taggedTokens;
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:18,
代码来源:ENTweetPOSTagger.java
示例2: tokenizeAndTag
点赞 3
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* Run the tokenizer and tagger on one tweet's text.
**/
public List<TaggedToken> tokenizeAndTag(String text) {
if (model == null) throw new RuntimeException("Must loadModel() first before tagging anything");
List<String> tokens = Twokenize.tokenizeRawTweetText(text);
Sentence sentence = new Sentence();
sentence.tokens = tokens;
ModelSentence ms = new ModelSentence(sentence.T());
featureExtractor.computeFeatures(sentence, ms);
model.greedyDecode(ms, false);
ArrayList<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();
for (int t=0; t < sentence.T(); t++) {
TaggedToken tt = new TaggedToken();
tt.token = tokens.get(t);
tt.tag = model.labelVocab.name( ms.labels[t] );
taggedTokens.add(tt);
}
return taggedTokens;
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:25,
代码来源:Tagger.java
示例3: outputJustTagging
点赞 3
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* assume mSent's labels hold the tagging.
*/
public void outputJustTagging(Sentence lSent, ModelSentence mSent) {
// mSent might be null!
if (outputFormat.equals("conll")) {
for (int t=0; t < lSent.T(); t++) {
outputStream.printf("%s\t%s",
lSent.tokens.get(t),
tagger.model.labelVocab.name(mSent.labels[t]));
if (mSent.confidences != null) {
outputStream.printf("\t%s", formatConfidence(mSent.confidences[t]));
}
outputStream.printf("\n");
}
outputStream.println("");
}
else {
die("bad output format for just tagging: " + outputFormat);
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:23,
代码来源:RunTagger.java
示例4: getPOStags
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* Returns POS tags from a List of tokens using the CMU TweetNLP tool
*
* @param tokens the input tokens
* @return the list of POS tags
*/
public List<String> getPOStags(List<String> tokens) {
ArrayList<String> tags = new ArrayList<String>();
try{
Sentence sentence = new Sentence();
sentence.tokens = tokens;
ModelSentence ms = new ModelSentence(sentence.T());
this.tagger.featureExtractor.computeFeatures(sentence, ms);
this.tagger.model.greedyDecode(ms, false);
for (int t = 0; t < sentence.T(); t++) {
String tag = this.tagger.model.labelVocab.name(ms.labels[t]);
tags.add(tag);
}
}
catch(Exception e){
System.err.println("Tagging Problem");
for(int i=0;i<tokens.size();i++){
tags.add("?");
System.err.print(tokens.get(i));
}
e.printStackTrace(System.err);
}
return tags;
}
开发者ID:felipebravom,
项目名称:AffectiveTweets,
代码行数:39,
代码来源:TweetToSparseFeatureVector.java
示例5: tag
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private List<TaggedToken> tag(List<String> tokens) {
Sentence sentence = new Sentence();
sentence.tokens = tokens;
ModelSentence ms = new ModelSentence(sentence.T());
m_featureExtractor.computeFeatures(sentence, ms);
m_model.greedyDecode(ms, false);
List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();
for (int t = 0; t < sentence.T(); t++) {
TaggedToken tt = new TaggedToken(tokens.get(t),
m_model.labelVocab.name(ms.labels[t]));
taggedTokens.add(tt);
}
return taggedTokens;
}
开发者ID:millecker,
项目名称:senti-storm,
代码行数:16,
代码来源:POSTaggerBolt.java
示例6: tag
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public List<TaggedToken> tag(List<String> tokens) {
Sentence sentence = new Sentence();
sentence.tokens = tokens;
ModelSentence ms = new ModelSentence(sentence.T());
m_featureExtractor.computeFeatures(sentence, ms);
m_model.greedyDecode(ms, false);
List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>();
for (int t = 0; t < sentence.T(); t++) {
TaggedToken tt = new TaggedToken(tokens.get(t),
m_model.labelVocab.name(ms.labels[t]));
taggedTokens.add(tt);
}
return taggedTokens;
}
开发者ID:millecker,
项目名称:senti-storm,
代码行数:16,
代码来源:POSTagger.java
示例7: goDecode
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/** Runs the correct algorithm (make config option perhaps) **/
public void goDecode(ModelSentence mSent) {
if (decoder == Decoder.GREEDY) {
tagger.model.greedyDecode(mSent, showConfidence);
} else if (decoder == Decoder.VITERBI) {
// if (showConfidence) throw new RuntimeException("--confidence only works with greedy decoder right now, sorry, yes this is a lame limitation");
tagger.model.viterbiDecode(mSent);
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:10,
代码来源:RunTagger.java
示例8: evaluateOOV
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void evaluateOOV(Sentence lSent, ModelSentence mSent) throws FileNotFoundException, IOException, ClassNotFoundException {
for (int t=0; t < mSent.T; t++) {
int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
int predLabel = mSent.labels[t];
if(wordsInCluster().contains(lSent.tokens.get(t))){
oovTokensCorrect += (trueLabel == predLabel) ? 1 : 0;
oovTokens += 1;
}
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:11,
代码来源:RunTagger.java
示例9: getconfusion
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void getconfusion(Sentence lSent, ModelSentence mSent, int[][] confusion) {
for (int t=0; t < mSent.T; t++) {
int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
int predLabel = mSent.labels[t];
if(trueLabel!=-1)
confusion[trueLabel][predLabel]++;
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:11,
代码来源:RunTagger.java
示例10: evaluateSentenceTagging
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void evaluateSentenceTagging(Sentence lSent, ModelSentence mSent) {
for (int t=0; t < mSent.T; t++) {
int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
int predLabel = mSent.labels[t];
numTokensCorrect += (trueLabel == predLabel) ? 1 : 0;
numTokens += 1;
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:9,
代码来源:RunTagger.java
示例11: outputPrependedTagging
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* assume mSent's labels hold the tagging.
*
* @param lSent
* @param mSent
* @param inputLine -- assume does NOT have trailing newline. (default from java's readLine)
*/
public void outputPrependedTagging(Sentence lSent, ModelSentence mSent,
boolean suppressTags, String inputLine) {
// mSent might be null!
int T = lSent.T();
String[] tokens = new String[T];
String[] tags = new String[T];
String[] confs = new String[T];
for (int t=0; t < T; t++) {
tokens[t] = lSent.tokens.get(t);
if (!suppressTags) {
tags[t] = tagger.model.labelVocab.name(mSent.labels[t]);
}
if (showConfidence) {
confs[t] = formatConfidence(mSent.confidences[t]);
}
}
StringBuilder sb = new StringBuilder();
sb.append(StringUtils.join(tokens));
sb.append("\t");
if (!suppressTags) {
sb.append(StringUtils.join(tags));
sb.append("\t");
}
if (showConfidence) {
sb.append(StringUtils.join(confs));
sb.append("\t");
}
sb.append(inputLine);
outputStream.println(sb.toString());
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:41,
代码来源:RunTagger.java
示例12: computeFeatures
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* Does feature extraction on one sentence.
*
* Input: textual representation of sentence
* Output: fills up modelSentence with numberized features
*/
public void computeFeatures(Sentence linguisticSentence, ModelSentence modelSentence) {
int T = linguisticSentence.T();
assert linguisticSentence.T() > 0; //TODO: handle this when assertions are off
computeObservationFeatures(linguisticSentence, modelSentence);
if (isTrainingTime) {
for (int t=0; t < T; t++) {
modelSentence.labels[t] = model.labelVocab.num( linguisticSentence.labels.get(t) );
}
computeCheatingEdgeFeatures(linguisticSentence, modelSentence);
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:18,
代码来源:FeatureExtractor.java
示例13: computeCheatingEdgeFeatures
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* Peek at the modelSentence to see its labels -- for training only!
* @param sentence
* @param modelSentence
*/
private void computeCheatingEdgeFeatures(Sentence sentence, ModelSentence modelSentence) {
assert isTrainingTime;
modelSentence.edgeFeatures[0] = model.startMarker();
for (int t=1; t < sentence.T(); t++) {
modelSentence.edgeFeatures[t] = modelSentence.labels[t-1];
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:13,
代码来源:FeatureExtractor.java
示例14: computeObservationFeatures
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void computeObservationFeatures(Sentence sentence, ModelSentence modelSentence) {
PositionFeaturePairs pairs = new PositionFeaturePairs();
// Extract in featurename form
for (FeatureExtractorInterface fe : allFeatureExtractors) {
fe.addFeatures(sentence.tokens, pairs);
}
// Numberize. This should be melded with the addFeatures() loop above, so no wasteful
// temporaries that later turn out to be OOV... but is this really an issue?
for (int i=0; i < pairs.size(); i++) {
int t = pairs.labelIndexes.get(i);
String fName = pairs.featureNames.get(i);
int fID = model.featureVocab.num(fName);
if ( ! isTrainingTime && fID == -1) {
// Skip OOV features at test time.
// Note we have implicit conjunctions from base features, so
// these are base features that weren't seen for *any* label at training time -- of course they will be useless for us...
continue;
}
double fValue = pairs.featureValues.get(i);
modelSentence.observationFeatures.get(t).add(new Pair<Integer,Double>(fID, fValue));
}
if (dumpMode) {
Util.p("");
for (int t=0; t < sentence.T(); t++) {
System.out.printf("%s\n\t", sentence.tokens.get(t));
for (Pair<Integer,Double> fv : modelSentence.observationFeatures.get(t)) {
System.out.printf("%s ", model.featureVocab.name(fv.first));
}
System.out.printf("\n");
}
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:34,
代码来源:FeatureExtractor.java
示例15: dumpFeatures
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void dumpFeatures() throws IOException {
FeatureExtractor fe = new FeatureExtractor(model, true);
fe.dumpMode = true;
for (Sentence lSent : lSentences) {
ModelSentence mSent = new ModelSentence(lSent.T());
fe.computeFeatures(lSent, mSent);
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:9,
代码来源:Train.java
示例16: extractFeatures
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void extractFeatures() throws IOException {
System.out.println("Extracting features");
FeatureExtractor fe = new FeatureExtractor(model, true);
for (Sentence lSent : lSentences) {
ModelSentence mSent = new ModelSentence(lSent.T());
fe.computeFeatures(lSent, mSent);
mSentences.add(mSent);
}
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:11,
代码来源:Train.java
示例17: valueAt
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
@Override
public double valueAt(double[] flatCoefs) {
model.setCoefsFromFlat(flatCoefs);
double loglik = 0;
for (ModelSentence s : mSentences) {
loglik += model.computeLogLik(s);
}
return -loglik + regularizerValue(flatCoefs);
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:10,
代码来源:Train.java
示例18: derivativeAt
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
@Override
public double[] derivativeAt(double[] flatCoefs) {
double[] g = new double[model.flatIDsize()];
model.setCoefsFromFlat(flatCoefs);
for (ModelSentence s : mSentences) {
model.computeGradient(s, g);
}
ArrayMath.multiplyInPlace(g, -1);
addL2regularizerGradient(g, flatCoefs);
return g;
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:12,
代码来源:Train.java
示例19: printWeights
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
@Override
public void printWeights() {
double loglik = 0;
for (ModelSentence s : mSentences) {
loglik += model.computeLogLik(s);
}
System.out.printf("\tTokLL %.6f\t", loglik/numTokens);
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:9,
代码来源:Train.java
示例20: goDecode
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/** Runs the correct algorithm (make config option perhaps) **/
public void goDecode(ModelSentence mSent) {
if (decoder == Decoder.GREEDY) {
tagger.model.greedyDecode(mSent, showConfidence);
} else if (decoder == Decoder.VITERBI) {
//if (showConfidence) throw new RuntimeException("--confidence only works with greedy decoder right now, sorry, yes this is a lame limitation");
tagger.model.viterbiDecode(mSent);
}
}
开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:10,
代码来源:RunTagger.java
示例21: getconfusion
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
private void getconfusion(Sentence lSent, ModelSentence mSent, int[][] confusion) {
for (int t=0; t < mSent.T; t++) {
int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
int predLabel = mSent.labels[t];
if(trueLabel!=-1)
confusion[trueLabel][predLabel]++;
}
}
开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:11,
代码来源:RunTagger.java
示例22: evaluateSentenceTagging
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void evaluateSentenceTagging(Sentence lSent, ModelSentence mSent) {
for (int t=0; t < mSent.T; t++) {
int trueLabel = tagger.model.labelVocab.num(lSent.labels.get(t));
int predLabel = mSent.labels[t];
numTokensCorrect += (trueLabel == predLabel) ? 1 : 0;
numTokens += 1;
}
}
开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:9,
代码来源:RunTagger.java
示例23: runTaggerInEvalMode
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
public void runTaggerInEvalMode() throws IOException, ClassNotFoundException {
long t0 = System.currentTimeMillis();
int n=0;
List<Sentence> examples = CoNLLReader.readFile(inputFilename);
inputIterable = examples;
int[][] confusion = new int[tagger.model.numLabels][tagger.model.numLabels];
for (Sentence sentence : examples) {
n++;
ModelSentence mSent = new ModelSentence(sentence.T());
tagger.featureExtractor.computeFeatures(sentence, mSent);
goDecode(mSent);
if ( ! noOutput) {
outputJustTagging(sentence, mSent);
}
evaluateSentenceTagging(sentence, mSent);
//evaluateOOV(sentence, mSent);
//getconfusion(sentence, mSent, confusion);
}
System.err.printf("%d / %d correct = %.4f acc, %.4f err\n",
numTokensCorrect, numTokens,
numTokensCorrect*1.0 / numTokens,
1 - (numTokensCorrect*1.0 / numTokens)
);
double elapsed = ((double) (System.currentTimeMillis() - t0)) / 1000.0;
System.err.printf("%d tweets in %.1f seconds, %.1f tweets/sec\n",
n, elapsed, n*1.0/elapsed);
/* System.err.printf("%d / %d cluster words correct = %.4f acc, %.4f err\n",
oovTokensCorrect, oovTokens,
oovTokensCorrect*1.0 / oovTokens,
1 - (oovTokensCorrect*1.0 / oovTokens)
); */
/* int i=0;
System.out.println("\t"+tagger.model.labelVocab.toString().replaceAll(" ", ", "));
for (int[] row:confusion){
System.out.println(tagger.model.labelVocab.name(i)+"\t"+Arrays.toString(row));
i++;
} */
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:47,
代码来源:RunTagger.java
示例24: Train
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
Train() {
lSentences = new ArrayList<Sentence>();
mSentences = new ArrayList<ModelSentence>();
model = new Model();
}
开发者ID:weizh,
项目名称:geolocator-3.0,
代码行数:6,
代码来源:Train.java
示例25: outputPrependedTagging
点赞 2
import cmu.arktweetnlp.impl.ModelSentence; //导入依赖的package包/类
/**
* assume mSent's labels hold the tagging.
*
* @param lSent
* @param mSent
* @param inputLine -- assume does NOT have trailing newline. (default from java's readLine)
*/
public String outputPrependedTagging(Sentence lSent, ModelSentence mSent,
boolean suppressTags, String inputLine) {
//mSent might be null!
int T = lSent.T();
String[] tokens = new String[T];
String[] tags = new String[T];
String[] confs = new String[T];
for (int t=0; t < T; t++) {
tokens[t] = lSent.tokens.get(t);
if (!suppressTags) {
tags[t] = tagger.model.labelVocab.name(mSent.labels[t]);
}
//if (showConfidence) {
//confs[t] = formatConfidence(mSent.confidences[t]);
//}
}
StringBuilder sb = new StringBuilder();
for(int i = 0; i < T; ++i)
{
if(tags[i].equals("V") || tags[i].equals("T") ||tags[i].equals("N") || tags[i].equals("O") ||
tags[i].equals("A") || tags[i].equals("R") || tags[i].equals("!") || tags[i].equals("L") ||
tags[i].equals("E") || tags[i].equals("#") || tags[i].equals("^") || tags[i].equals("@") )
{
if(!tags[i].equals("E"))
tokens[i] = tokens[i].toLowerCase();
sb.append(normalize(tokens[i]));
sb.append(" ");
sb.append(tags[i]);
sb.append(" ");
}
}
/*StringBuilder sb = new StringBuilder();
sb.append(StringUtils.join(tokens));
sb.append("\t");
sb.append("TAGS");
if (!suppressTags) {
sb.append(StringUtils.join(tags));
sb.append("\t");
}
if (showConfidence) {
sb.append(StringUtils.join(confs));
sb.append("\t");
}
sb.append(inputLine);*/
//outputStream.println(sb.toString());
return sb.toString();
}
开发者ID:Avlessi,
项目名称:SentimentAnalysis,
代码行数:65,
代码来源:RunTagger.java