"; System.out.println(s); System.out.println(StringEscapeUtils.unescapeHtml(s)); String temp =s.replaceAll(" ", " "); temp = temp.replaceAll("\s ", " "); temp = StringEscapeUtils.unescapeHtml(temp); temp = langs(temp, false); System.out.println(temp);// // String str = "/hao.html";// String regex = "\w .html";//// // 编译正则表达式// Pattern pattern = Pattern.compile(regex);//// // 指定要匹配的字符串// Matcher matcher = pattern.matcher(str);// // boolean find();// 尝试查找与该模式匹配的输入序列的下一个子序列// boolean result = matcher.find();// System.out.println(result);//// System.out.println(matcher.group()); } public static void readFiles(String path) { File file = new File(path); BufferedReader reader = null; StringBuffer buffer = new StringBuffer(); try { reader = new BufferedReader(new FileReader(file)); String tempStr = ""; while ((tempStr = reader.readLine()) != null) { buffer.append(tempStr); buffer.append("
"); } reader.close(); parseStr(buffer.toString()); } catch (IOException e) { e.printStackTrace(); } } public static void parseStr(String str) { String regex = ".*?
"; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(str); // boolean find();// 尝试查找与该模式匹配的输入序列的下一个子序列 while (matcher.find()) { //String str1 = matcher.group(); //System.out.println(str1); //parse1(str1); } } public static String parse1(String str) { String regex = "<.*?>"; Pattern pattern = Pattern.compile(regex); Matcher matcher = pattern.matcher(str); String res = matcher.replaceAll(""); regex = " "; pattern = Pattern.compile(regex); matcher = pattern.matcher(res); res = matcher.replaceAll(""); regex = "\s"; pattern = Pattern.compile(regex); matcher = pattern.matcher(res); res = matcher.replaceAll(""); return res; }}