1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
| /** * @Description: * @Author: lizhang * @CreateDate: 2018/7/31 22:35 * @UpdateDate: 2018/7/31 22:35 * @Version: 1.0 */ import org.wltea.analyzer.core.IKSegmenter; import org.wltea.analyzer.core.Lexeme;
import java.io.IOException; import java.io.StringReader; import java.util.*;
public class Test { /** * 对语句进行分词 * * @param text 语句 * @return 分词后的集合 * @throws IOException */ private static Map segment(String text) throws IOException { Map<String, Integer> map = new HashMap<String, Integer>(); StringReader re = new StringReader(text); IKSegmenter ik = new IKSegmenter(re, false);//true 使用smart分词,false使用最小颗粒分词 Lexeme lex; while ((lex = ik.next()) != null) { if (lex.getLexemeText().length() > 1) { if (map.containsKey(lex.getLexemeText())) { map.put(lex.getLexemeText(), map.get(lex.getLexemeText()) + 1); } else { map.put(lex.getLexemeText(), 1); } } } return map; }
public static void main(String[] args) throws IOException { Map<String, Integer> map = segment("中国,中国,我爱你"); System.out.println(map.toString()); } }
|