本文整理汇总了Java中banner.tagging.TaggedToken.TagFormat类的典型用法代码示例。如果您正苦于以下问题:Java TagFormat类的具体用法?Java TagFormat怎么用?Java TagFormat使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TagFormat类属于banner.tagging.TaggedToken包,在下文中一共展示了TagFormat类的16个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: getTokenLabels
点赞 3
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
public List<String> getTokenLabels(TagFormat format)
{
List<String> labels = new ArrayList<String>();
for (int i = 0; i < tokens.size(); i++)
{
List<Mention> tokenMentions = getMentions(tokens.get(i));
if (tokenMentions.size() == 0)
labels.add(TagPosition.O.name());
else if (tokenMentions.size() == 1)
labels.add(TagPosition.getPositionText(format, tokenMentions.get(0), i));
else
{
StringBuilder label = new StringBuilder();
Iterator<Mention> mentionIterator = tokenMentions.iterator();
label.append(mentionIterator.next());
while (mentionIterator.hasNext())
{
label.append("&");
label.append(mentionIterator.next());
}
labels.add(label.toString());
}
}
return Collections.unmodifiableList(labels);
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:26,
代码来源:Sentence.java
示例2: getSGML
点赞 3
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* Returns a SGML/XML representation of the tagged sentence. Mentions are
* surrounded by opening and closing tags containing the mention type.
* Assumes that each token is tagged either 0 or 1 times.
*
* @return A SGML/XML representation of the tagged sentence
*/
public String getSGML()
{
List<TaggedToken> taggedTokens = getTaggedTokens();
StringBuffer text2 = new StringBuffer();
for (int i = 0; i < taggedTokens.size(); i++)
{
TaggedToken token = taggedTokens.get(i);
TagPosition position = token.getPosition(TagFormat.IOBEW);
if (position == TagPosition.B || position == TagPosition.W)
text2.append("<" + token.getMention().getType().getText() + "> ");
text2.append(token.getToken().getText() + " ");
if (position == TagPosition.E || position == TagPosition.W)
text2.append("</" + token.getMention().getType().getText() + "> ");
}
return text2.toString().trim();
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:24,
代码来源:Sentence.java
示例3: getSGML
点赞 3
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* Returns a SGML/XML representation of the tagged sentence. Mentions are surrounded by opening and closing tags containing the mention type.
* Assumes that each token is tagged either 0 or 1 times.
*
* @return A SGML/XML representation of the tagged sentence
*/
public String getSGML()
{
List<TaggedToken> taggedTokens = getTaggedTokens();
StringBuffer text2 = new StringBuffer();
for (int i = 0; i < taggedTokens.size(); i++)
{
TaggedToken token = taggedTokens.get(i);
TagPosition position = token.getPosition(TagFormat.IOBEW);
if (position == TagPosition.B || position == TagPosition.W)
text2.append("<" + token.getMention().getType().getText() + "> ");
text2.append(token.getToken().getText() + " ");
if (position == TagPosition.E || position == TagPosition.W)
text2.append("</" + token.getMention().getType().getText() + "> ");
}
return text2.toString().trim();
}
开发者ID:leebird,
项目名称:legonlp,
代码行数:23,
代码来源:Sentence.java
示例4: train
点赞 3
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
private static CRF4 train(List<Sentence> sentences, int order, boolean useFeatureInduction, TagFormat format, Pipe pipe, boolean reverse)
{
InstanceList instances = new InstanceList(pipe);
for (Sentence sentence : sentences)
{
String text = sentence.getTrainingText(format, reverse);
instances.add(new Instance(text, null, sentence.getTag(), null, pipe));
}
CRF4 crf = new CRF4(pipe, null);
if (order == 1)
crf.addStatesForLabelsConnectedAsIn(instances);
else if (order == 2)
crf.addStatesForBiLabelsConnectedAsIn(instances);
else
throw new IllegalArgumentException("Order must be equal to 1 or 2");
if (useFeatureInduction)
crf.trainWithFeatureInduction(instances, null, null, null, 99999, 100, 10, 1000, 0.5, false, new double[] {.2, .5, .8});
else
crf.train(instances, null, null, (MultiSegmentationEvaluator)null, 99999, 10, new double[] {.2, .5, .8});
return crf;
}
开发者ID:leebird,
项目名称:legonlp,
代码行数:22,
代码来源:CRFTagger.java
示例5: CRFTagger
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
private CRFTagger(CRF4 forwardCRF, CRF4 reverseCRF, String2TokenSequencePipe basePipe, int order, boolean useFeatureInduction, TagFormat format, TextDirection textDirection)
{
// TODO Verify crf==null matches textDirection
this.forwardCRF = forwardCRF;
this.reverseCRF = reverseCRF;
this.basePipe = basePipe;
this.order = order;
this.useFeatureInduction = useFeatureInduction;
this.format = format;
this.textDirection = textDirection;
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:12,
代码来源:CRFTagger.java
示例6: getTrainingText
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* Returns a text representation of the tagging for this {@link Sentence}, using the specified {@link TagFormat}. In other words, each token in
* the sentence is given a tag indicating its position in a mention or that the token is not a mention. Assumes that each token is tagged either 0
* or 1 times.
*
* @param format
* The {@link TagFormat} to use
* @param reverse
* Whether to return the text in reverse order
* @return A text representation of the tagging for this {@link Sentence}, using the specified {@link TagFormat}
*/
public String getTrainingText(TagFormat format, boolean reverse)
{
List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>(getTaggedTokens());
if (reverse)
Collections.reverse(taggedTokens);
StringBuffer trainingText = new StringBuffer();
for (TaggedToken token : taggedTokens)
{
trainingText.append(token.getText(format));
trainingText.append(" ");
}
return trainingText.toString().trim();
}
开发者ID:leebird,
项目名称:legonlp,
代码行数:25,
代码来源:Sentence.java
示例7: CRFTagger
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
private CRFTagger(CRF4 forwardCRF, CRF4 reverseCRF, String2TokenSequencePipe basePipe, int order, boolean useFeatureInduction, TagFormat format,
TextDirection textDirection)
{
// TODO Verify crf==null matches textDirection
this.forwardCRF = forwardCRF;
this.reverseCRF = reverseCRF;
this.basePipe = basePipe;
this.order = order;
this.useFeatureInduction = useFeatureInduction;
this.format = format;
this.textDirection = textDirection;
}
开发者ID:leebird,
项目名称:legonlp,
代码行数:13,
代码来源:CRFTagger.java
示例8: train
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* Trains and returns a {@link CRFTagger} on the specified {@link Sentence}
* s. This method may take hours or even days to complete. When training,
* you will likely need to increase the amount of memory used by the Java
* virtual machine (try adding "-Xms1024m" to the command line).
*
* @param sentences
* The {@link Sentence}s to train the tagger on
* @param order
* The CRF order to use
* @param useFeatureInduction
* Whether or not to use feature induction
* @param format
* The {@link TagFormat} to use
* @param textDirection
* The {@link TextDirection} to use
* @param lemmatiser
* The {@link Lemmatiser} to use
* @param posTagger
* The part-of-speech {@link dragon.nlp.tool.Tagger} to use
* @param useNumericalNormalization
* Whether to use numeric normalization
* @return A trained CRFTagger; ready to tag unseen sentences or be output
* to disk
*/
public static CRFTagger train(List<Sentence> sentences, int order, boolean useFeatureInduction, TagFormat format, TextDirection textDirection, Lemmatiser lemmatiser,
dragon.nlp.tool.Tagger posTagger, boolean useNumericalNormalization, Tagger preTagger, String regexFilename)
{
if (sentences.size() == 0)
throw new RuntimeException("Number of sentences must be greater than zero");
String2TokenSequencePipe localBasePipe = new String2TokenSequencePipe(lemmatiser, posTagger, useNumericalNormalization, preTagger);
ArrayList<Pipe> pipes = new ArrayList<Pipe>();
pipes.add(localBasePipe);
setupPipes(pipes, regexFilename);
Pipe pipe = new SerialPipes(pipes);
CRF4 forwardCRF = null;
if (textDirection == TextDirection.Intersection)
throw new UnsupportedOperationException("TextDirection.Intersection not yet supported");
if (textDirection.doForward())
forwardCRF = train(sentences, order, useFeatureInduction, format, pipe, false);
CRF4 reverseCRF = null;
if (textDirection.doReverse())
reverseCRF = train(sentences, order, useFeatureInduction, format, pipe, true);
return new CRFTagger(forwardCRF, reverseCRF, localBasePipe, order, useFeatureInduction, format, textDirection);
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:46,
代码来源:CRFTagger.java
示例9: getFormat
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* @return The {@link TagFormat} used by this tagger
*/
public TagFormat getFormat()
{
return format;
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:8,
代码来源:CRFTagger.java
示例10: load
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
public static BannerProperties load(Properties properties) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException {
BannerProperties bannerProperties = new BannerProperties();
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
bannerProperties.tokenizer = (Tokenizer) Class.forName(tokenizer).newInstance();
// Note assumption that the tokenizer constructor takes no
// parameters
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
String dictionaryFileName = properties.getProperty("dictionary");
if (dictionaryFileName != null)
{
// FIXME This is a temporary hack
DictionaryTagger dictTagger = new GeneDictionaryTagger(bannerProperties.tokenizer, true);
FileReader reader = new FileReader(dictionaryFileName);
dictTagger.add(reader, MentionType.getType("GENE"));
reader.close();
System.out.println("Dict size - " + dictTagger.size());
bannerProperties.preTagger = dictTagger;
}
bannerProperties.regexFilename = properties.getProperty("regexFilename");
return bannerProperties;
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:43,
代码来源:BannerProperties.java
示例11: getTagFormat
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* @return The {@link TagFormat} (IO/IOB/IOBEW) which should be used for
* training and tagging. Default is IOB
*/
public TagFormat getTagFormat()
{
return tagFormat;
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:9,
代码来源:BannerProperties.java
示例12: getFormat
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* @return The {@link TagFormat} used by this tagger
*/
public TagFormat getFormat()
{
return format;
}
开发者ID:leebird,
项目名称:legonlp,
代码行数:8,
代码来源:CRFTagger.java
示例13: load
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* Loads the properties file from the specified filename, and instantiates any objects to be used, such as the lemmatiser and part-of-speech (pos)
* tagger
*
* @param filename
* @return An instance of {@link BannerProperties} which can be queried for configuration parameters
*/
public static BannerProperties load(String filename)
{
Properties properties = new Properties();
BannerProperties bannerProperties = new BannerProperties();
try {
properties.load(new FileInputStream(filename));
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
if (tokenizer.equals(NaiveTokenizer.class.getName()))
bannerProperties.tokenizer = new NaiveTokenizer();
else if (tokenizer.equals(SimpleTokenizer.class.getName()))
bannerProperties.tokenizer = new SimpleTokenizer();
else if (tokenizer.equals(BaseTokenizer.class.getName()))
bannerProperties.tokenizer = new BaseTokenizer();
else
throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
} catch (Exception e) {
throw new RuntimeException(e);
}
return bannerProperties;
}
开发者ID:leebird,
项目名称:legonlp,
代码行数:50,
代码来源:BannerProperties.java
示例14: getTagFormat
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* @return The {@link TagFormat} (IO/IOB/IOBEW) which should be used for training and tagging. Default is IOB
*/
public TagFormat getTagFormat() {
return tagFormat;
}
开发者ID:leebird,
项目名称:legonlp,
代码行数:7,
代码来源:BannerProperties.java
示例15: load
点赞 2
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
public static BannerProperties load(String filename, String dataroot)
{
Properties properties = new Properties();
BannerProperties bannerProperties = new BannerProperties();
try {
properties.load(new FileInputStream(filename));
String lemmatiserDataDirectory = properties.getProperty("lemmatiserDataDirectory");
if (lemmatiserDataDirectory != null)
bannerProperties.lemmatiser = new EngLemmatiser(dataroot+lemmatiserDataDirectory, false, true);
String posTaggerDataDirectory = properties.getProperty("posTaggerDataDirectory");
if (posTaggerDataDirectory != null)
{
String posTagger = properties.getProperty("posTagger", HeppleTagger.class.getName());
if (posTagger.equals(HeppleTagger.class.getName()))
bannerProperties.posTagger = new HeppleTagger(dataroot+posTaggerDataDirectory);
else if (posTagger.equals(MedPostTagger.class.getName()))
bannerProperties.posTagger = new MedPostTagger(dataroot+posTaggerDataDirectory);
else
throw new IllegalArgumentException("Unknown POS tagger type: " + posTagger);
}
String tokenizer = properties.getProperty("tokenizer", SimpleTokenizer.class.getName());
if (tokenizer.equals(NaiveTokenizer.class.getName()))
bannerProperties.tokenizer = new NaiveTokenizer();
else if (tokenizer.equals(SimpleTokenizer.class.getName()))
bannerProperties.tokenizer = new SimpleTokenizer();
else if (tokenizer.equals(BaseTokenizer.class.getName()))
bannerProperties.tokenizer = new BaseTokenizer();
else
throw new IllegalArgumentException("Unknown tokenizer type: " + tokenizer);
bannerProperties.tagFormat = TagFormat.valueOf(properties.getProperty("tagFormat", "IOB"));
if (Boolean.parseBoolean(properties.getProperty("useParenthesisPostProcessing", "true")))
bannerProperties.postProcessor = new ParenthesisPostProcessor();
bannerProperties.useNumericNormalization = Boolean.parseBoolean(properties.getProperty("useNumericNormalization", "true"));
bannerProperties.order = Integer.parseInt(properties.getProperty("order", "2"));
bannerProperties.useFeatureInduction = Boolean.parseBoolean(properties.getProperty("useFeatureInduction", "false"));
bannerProperties.textDirection = TextDirection.valueOf(properties.getProperty("textDirection", "Forward"));
} catch (Exception e) {
throw new RuntimeException(e);
}
return bannerProperties;
}
开发者ID:BlueBrain,
项目名称:bluima,
代码行数:43,
代码来源:BannerProperties.java
示例16: getTrainingText
点赞 1
import banner.tagging.TaggedToken.TagFormat; //导入依赖的package包/类
/**
* Returns a text representation of the tagging for this {@link Sentence},
* using the specified {@link TagFormat}. In other words, each token in the
* sentence is given a tag indicating its position in a mention or that the
* token is not a mention. Assumes that each token is tagged either 0 or 1
* times.
*
* @param format
* The {@link TagFormat} to use
* @param reverse
* Whether to return the text in reverse order
* @return A text representation of the tagging for this {@link Sentence},
* using the specified {@link TagFormat}
*/
public String getTrainingText(TagFormat format, boolean reverse)
{
List<TaggedToken> taggedTokens = new ArrayList<TaggedToken>(getTaggedTokens());
if (reverse)
Collections.reverse(taggedTokens);
StringBuffer trainingText = new StringBuffer();
for (TaggedToken token : taggedTokens)
{
trainingText.append(token.getText(format));
trainingText.append(" ");
}
return trainingText.toString().trim();
}
开发者ID:clulab,
项目名称:reach-banner,
代码行数:28,
代码来源:Sentence.java