• 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏吧

Java BaseSearcher类的典型用法和代码示例

java 1次浏览

本文整理汇总了Java中com.hankcs.hanlp.dictionary.BaseSearcher的典型用法代码示例。如果您正苦于以下问题:Java BaseSearcher类的具体用法?Java BaseSearcher怎么用?Java BaseSearcher使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。

BaseSearcher类属于com.hankcs.hanlp.dictionary包,在下文中一共展示了BaseSearcher类的12个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Java代码示例。

示例1: segLongest

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
protected static String segLongest(char[] charArray, DoubleArrayTrie<String> trie)
{
    StringBuilder sb = new StringBuilder(charArray.length);
    BaseSearcher searcher = getSearcher(charArray, trie);
    Map.Entry<String, String> entry;
    int p = 0;  // 当前处理到什么位置
    int offset;
    while ((entry = searcher.next()) != null)
    {
        offset = searcher.getOffset();
        // 补足没查到的词
        while (p < offset)
        {
            sb.append(charArray[p]);
            ++p;
        }
        sb.append(entry.getValue());
        p = offset + entry.getKey().length();
    }
    // 补足没查到的词
    while (p < charArray.length)
    {
        sb.append(charArray[p]);
        ++p;
    }
    return sb.toString();
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:28,
代码来源:BaseChineseDictionary.java

示例2: main

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public static void main(String[] args)
    {
        // 动态增加
        CustomDictionary.add("攻城狮");
        // 强行插入
        CustomDictionary.insert("白富美", "nz 1024");
        // 删除词语(注释掉试试)
//        CustomDictionary.remove("攻城狮");
        System.out.println(CustomDictionary.add("单身狗", "nz 1024 n 1"));
        System.out.println(CustomDictionary.get("单身狗"));

        String text = "攻城狮逆袭单身狗,迎娶白富美,走上人生巅峰";  // 怎么可能噗哈哈!

        // DoubleArrayTrie分词
        final char[] charArray = text.toCharArray();
        CustomDictionary.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>()
        {
            @Override
            public void hit(int begin, int end, CoreDictionary.Attribute value)
            {
                System.out.printf("[%d:%d]=%s %s\n", begin, end, new String(charArray, begin, end - begin), value);
            }
        });
        // 首字哈希之后二分的trie树分词
        BaseSearcher searcher = CustomDictionary.getSearcher(text);
        Map.Entry entry;
        while ((entry = searcher.next()) != null)
        {
            System.out.println(entry);
        }

        // 标准分词
        System.out.println(HanLP.segment(text));

        // Note:动态增删不会影响词典文件
        // 目前CustomDictionary使用DAT储存词典文件中的词语,用BinTrie储存动态加入的词语,前者性能高,后者性能低
        // 之所以保留动态增删功能,一方面是历史遗留特性,另一方面是调试用;未来可能会去掉动态增删特性。
    }
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:39,
代码来源:DemoCustomDictionary.java

示例3: testNext

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public void testNext() throws Exception
{
    BaseSearcher searcher = CustomDictionary.getSearcher("都要亲口");
    Map.Entry<String, CoreDictionary.Attribute> entry;
    while ((entry = searcher.next()) != null)
    {
        int offset = searcher.getOffset();
        System.out.println(offset + 1 + " " + entry);
    }
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:11,
代码来源:TestCustomDictionary.java

示例4: main

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public static void main(String[] args)
{
    BaseSearcher searcher = CustomDictionary.getSearcher("我是一个码农");
    Map.Entry entry;
    while ((entry = searcher.next()) != null)
    {
        System.out.println(entry);
    }
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:10,
代码来源:TestCustomDictionary.java

示例5: testLoad

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public void testLoad() throws Exception
{
    NRDictionary dictionary = new NRDictionary();
    dictionary.load("data/dictionary/person/nr.txt");
    System.out.println(dictionary.get("为"));
    BaseSearcher searcher = dictionary.getSearcher("为");
    Map.Entry<String, String> entry;
    while ((entry = searcher.next()) != null)
    {
        System.out.println(entry);
    }
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:13,
代码来源:TestNRDictionary.java

示例6: main

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public static void main(String[] args)
    {
        // 动态增加
        CustomDictionary.add("攻城狮");
        // 强行插入
        CustomDictionary.insert("白富美", "nz 1024");
        // 删除词语(注释掉试试)
//        CustomDictionary.remove("攻城狮");
        System.out.println(CustomDictionary.add("单身狗", "nz 1024 n 1"));
        System.out.println(CustomDictionary.get("单身狗"));

        String text = "攻城狮逆袭单身狗,迎娶白富美,走上人生巅峰";  // 怎么可能噗哈哈!

        // AhoCorasickDoubleArrayTrie自动机分词
        final char[] charArray = text.toCharArray();
        CustomDictionary.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>()
        {
            @Override
            public void hit(int begin, int end, CoreDictionary.Attribute value)
            {
                System.out.printf("[%d:%d]=%s %s\n", begin, end, new String(charArray, begin, end - begin), value);
            }
        });
        // trie树分词
        BaseSearcher searcher = CustomDictionary.getSearcher(text);
        Map.Entry entry;
        while ((entry = searcher.next()) != null)
        {
            System.out.println(entry);
        }

        // 标准分词
        System.out.println(HanLP.segment(text));
    }
 

开发者ID:East196,
项目名称:maker,
代码行数:35,
代码来源:DemoCustomDictionary.java

示例7: getSearcher

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public static BaseSearcher getSearcher(char[] charArray, DoubleArrayTrie<String> trie)
{
    return new Searcher(charArray, trie);
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:5,
代码来源:BaseChineseDictionary.java

示例8: getSearcher

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public static BaseSearcher getSearcher(char[] charArray)
{
    return new Searcher(charArray, trie);
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:5,
代码来源:JapanesePersonDictionary.java

示例9: getSearcher

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public BaseSearcher getSearcher(String text)
{
    return new Searcher(text);
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:5,
代码来源:CommonDictionary.java

示例10: getSearcher

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
public static BaseSearcher getSearcher(char[] charArray, DoubleArrayTrie<Pinyin[]> trie)
{
    return new Searcher(charArray, trie);
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:5,
代码来源:PinyinDictionary.java

示例11: Recognition

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
/**
 * 执行识别
 *
 * @param segResult      粗分结果
 * @param wordNetOptimum 粗分结果对应的词图
 * @param wordNetAll     全词图
 */
public static void Recognition(List<Vertex> segResult, WordNet wordNetOptimum, WordNet wordNetAll)
{
    StringBuilder sbName = new StringBuilder();
    int appendTimes = 0;
    char[] charArray = wordNetAll.charArray;
    BaseSearcher searcher = JapanesePersonDictionary.getSearcher(charArray);
    Map.Entry<String, Character> entry;
    int activeLine = 1;
    int preOffset = 0;
    while ((entry = searcher.next()) != null)
    {
        Character label = entry.getValue();
        String key = entry.getKey();
        int offset = searcher.getOffset();
        if (preOffset != offset)
        {
            if (appendTimes > 1 && sbName.length() > 2) // 日本人名最短为3字
            {
                insertName(sbName.toString(), activeLine, wordNetOptimum, wordNetAll);
            }
            sbName.setLength(0);
            appendTimes = 0;
        }
        if (appendTimes == 0)
        {
            if (label == JapanesePersonDictionary.X)
            {
                sbName.append(key);
                ++appendTimes;
                activeLine = offset + 1;
            }
        }
        else
        {
            if (label == JapanesePersonDictionary.M)
            {
                sbName.append(key);
                ++appendTimes;
            }
            else
            {
                if (appendTimes > 1 && sbName.length() > 2)
                {
                    insertName(sbName.toString(), activeLine, wordNetOptimum, wordNetAll);
                }
                sbName.setLength(0);
                appendTimes = 0;
            }
        }
        preOffset = offset + key.length();
    }
    if (sbName.length() > 0)
    {
        if (appendTimes > 1)
        {
            insertName(sbName.toString(), activeLine, wordNetOptimum, wordNetAll);
        }
    }
}
 

开发者ID:priester,
项目名称:hanlpStudy,
代码行数:67,
代码来源:JapanesePersonRecognition.java

示例12: Recognition

点赞 2

import com.hankcs.hanlp.dictionary.BaseSearcher; //导入依赖的package包/类
/**
 * 执行识别
 *
 * @param segResult      粗分结果
 * @param wordNetOptimum 粗分结果对应的词图
 * @param wordNetAll     全词图
 */
public static void Recognition(List<Vertex> segResult, WordNet wordNetOptimum, WordNet wordNetAll)
{
    StringBuilder sbName = new StringBuilder();
    int appendTimes = 0;
    char[] charArray = wordNetAll.charArray;
    BaseSearcher searcher = JapanesePersonDictionary.getSearcher(charArray);
    Map.Entry<String, Character> entry;
    int activeLine = 1;
    int preOffset = 0;
    while ((entry = searcher.next()) != null)
    {
        Character label = entry.getValue();
        String key = entry.getKey();
        int offset = searcher.getOffset();
        if (preOffset != offset)
        {
            if (appendTimes > 1 && sbName.length() > 2) // 日本人名最短为3字
            {
                wordNetOptimum.insert(activeLine, new Vertex(Predefine.TAG_PEOPLE, sbName.toString(), new CoreDictionary.Attribute(Nature.nrj), WORD_ID), wordNetAll);
            }
            sbName.setLength(0);
            appendTimes = 0;
        }
        if (appendTimes == 0)
        {
            if (label == JapanesePersonDictionary.X)
            {
                sbName.append(key);
                ++appendTimes;
                activeLine = offset + 1;
            }
        }
        else
        {
            if (label == JapanesePersonDictionary.M)
            {
                sbName.append(key);
                ++appendTimes;
            }
            else
            {
                if (appendTimes > 1 && sbName.length() > 2)
                {
                    wordNetOptimum.insert(activeLine, new Vertex(Predefine.TAG_PEOPLE, sbName.toString(), new CoreDictionary.Attribute(Nature.nrj), WORD_ID), wordNetAll);
                }
                sbName.setLength(0);
                appendTimes = 0;
            }
        }
        preOffset = offset + key.length();
    }
    if (sbName.length() > 0)
    {
        if (appendTimes > 1)
        {
            wordNetOptimum.insert(activeLine, new Vertex(Predefine.TAG_PEOPLE, sbName.toString(), new CoreDictionary.Attribute(Nature.nrj), WORD_ID), wordNetAll);
        }
    }
}
 

开发者ID:ml-distribution,
项目名称:HanLP,
代码行数:67,
代码来源:JapanesePersonRecognition.java


版权声明:本文转自网络文章,转载此文章仅为分享知识,如有侵权,请联系管理员进行删除。
喜欢 (0)