IKanalyzer分词器分词并且统计词频


#pom引入 ikanalyzer相关jar包

<dependency>
   <groupId>com.janeluo</groupId>
   <artifactId>ikanalyzer</artifactId>
   <version>2012_u6</version>
</dependency
/**
 * @Description:
 * @Author: lizhang
 * @CreateDate: 2018/7/31 22:35
 * @UpdateDate: 2018/7/31 22:35
 * @Version: 1.0
 */
import org.wltea.analyzer.core.IKSegmenter;
import org.wltea.analyzer.core.Lexeme;

import java.io.IOException;
import java.io.StringReader;
import java.util.*;

public class Test {
        /**
         * 对语句进行分词
         *
         * @param text 语句
         * @return 分词后的集合
         * @throws IOException
         */
        private static Map segment(String text) throws IOException {
            Map<String, Integer> map = new HashMap<String, Integer>();
            StringReader re = new StringReader(text);   
            IKSegmenter ik = new IKSegmenter(re, false);//true 使用smart分词,false使用最小颗粒分词
            Lexeme lex;
            while ((lex = ik.next()) != null) {
                if (lex.getLexemeText().length() > 1) {
                    if (map.containsKey(lex.getLexemeText())) {
                        map.put(lex.getLexemeText(), map.get(lex.getLexemeText()) + 1);
                    } else {
                        map.put(lex.getLexemeText(), 1);
                    }
                }
            }
            return map;
        }

        public static void main(String[] args) throws IOException {
            Map<String, Integer> map = segment("中国,中国,我爱你");
            System.out.println(map.toString());
        }
    }

输出结果



文章作者: 知己而知彼
版权声明: 本博客所有文章除特別声明外,均采用 CC BY 4.0 许可协议。转载请注明来源 知己而知彼 !
评论
  目录