本文整理汇总了Java中net.htmlparser.jericho.TextExtractor类的典型用法代码示例。如果您正苦于以下问题:Java TextExtractor类的具体用法?Java TextExtractor怎么用?Java TextExtractor使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
TextExtractor类属于net.htmlparser.jericho包,在下文中一共展示了TextExtractor类的6个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。
示例1: extractText
点赞 2
import net.htmlparser.jericho.TextExtractor; //导入依赖的package包/类
private static String extractText(String contentType, Object content) {
if (contentType.startsWith("text/")) {
if (contentType.startsWith("text/plain")) {
return (String)content;
} else if (contentType.startsWith("text/html")) {
return new TextExtractor(new Source((String)content)).toString();
}
} else {
log.debug("ignored content type {}", contentType);
}
return EMPTY_STRING;
}
开发者ID:mrbald,
项目名称:ownmail,
代码行数:14,
代码来源:Indexer.java
示例2: main
点赞 2
import net.htmlparser.jericho.TextExtractor; //导入依赖的package包/类
public static void main(String[] args) throws Exception {
final File localDir = new File("bobah.net/d4d");
localDir.mkdirs();
fetch(new URL("http://www.bobah.net/book/export/html/13"), new File(localDir, "bobah.txt"));
final Collection<File> files = Utils.findFiles(localDir, "txt");
for(File file: files) {
System.out.println(new TextExtractor(new Source(file.toURI().toURL())).toString());
}
shutdownLogger();
}
开发者ID:mrbald,
项目名称:ownmail,
代码行数:12,
代码来源:Wget.java
示例3: displayJustificationNode
点赞 2
import net.htmlparser.jericho.TextExtractor; //导入依赖的package包/类
public static void displayJustificationNode(final ProofViewNode node) throws IOException {
final StringBuilder renderer = new StringBuilder();
// Indent according to node's depth:
for (int i = 0; i < node.getDepth(); i++) {
renderer.append(' ');
}
// Render initially hidden nodes with parens:
final boolean expandInitially = node.isExpandInitially();
if (!expandInitially) {
renderer.append('(');
}
// Render the node's HTML, or the label:
final String html = node.getHTML();
if (html == null || html.isEmpty()) {
final String label = node.getLabel();
if (label != null) {
renderer.append(label);
}
} else {
// Render HTML as text:
new TextExtractor(new Source(html)).appendTo(renderer);
}
if (!expandInitially) {
renderer.append(')');
}
System.out.println(renderer);
}
开发者ID:cycorp,
项目名称:example-code,
代码行数:28,
代码来源:QueryAnswerJustifying.java
示例4: htmlToText
点赞 2
import net.htmlparser.jericho.TextExtractor; //导入依赖的package包/类
/**
* Gets the content from html/text as plain text.
*/
public static synchronized String htmlToText(String html) {
LOG.info("extractFromHTML ... ");
// Adds line breaks to keep structure
html = html.replaceAll("<li>", "<li>, ");
html = html.replaceAll("</li>", ", </li>");
html = html.replaceAll("<dd>", "<dd>, ");
html = html.replaceAll("</dd>", ", </dd>");
final Source src = new Source(html);
return new TextExtractor(new Segment(src, src.getBegin(), src.getEnd()))
.setConvertNonBreakingSpaces(true).toString();
}
开发者ID:dice-group,
项目名称:FOX,
代码行数:17,
代码来源:FoxTextUtil.java
示例5: html2text
点赞 2
import net.htmlparser.jericho.TextExtractor; //导入依赖的package包/类
public static String html2text(String html) {
Source source = new Source(html);
TextExtractor extractor = source.getTextExtractor();
extractor.setExcludeNonHTMLElements(true);
String text = source.getTextExtractor().toString();
return text;
}
开发者ID:macc704,
项目名称:KBDeX,
代码行数:8,
代码来源:KHtmlConverter.java
示例6: removeHtml
点赞 1
import net.htmlparser.jericho.TextExtractor; //导入依赖的package包/类
/**
* Removes all HTML tags/markup present in the source string. Strings with
* no HTML will be returned unchanged.
*
* @param sourceString
* The string to strip HTML from.
* @return A string with all HTML stripped from it.
* @since 1.0.0
*/
public static String removeHtml(final String sourceString) {
if (StringUtils.isNotBlank(sourceString)) {
final TextExtractor extractor = new TextExtractor(
new Segment(new Source(sourceString), 0, sourceString.length()));
return extractor.toString();
}
return sourceString;
}
开发者ID:stevensimmons,
项目名称:restalm,
代码行数:18,
代码来源:ConversionUtils.java