• 欢迎访问开心洋葱网站,在线教程,推荐使用最新版火狐浏览器和Chrome浏览器访问本网站,欢迎加入开心洋葱 QQ群
  • 为方便开心洋葱网用户,开心洋葱官网已经开启复制功能!
  • 欢迎访问开心洋葱网站,手机也能访问哦~欢迎加入开心洋葱多维思维学习平台 QQ群
  • 如果您觉得本站非常有看点,那么赶紧使用Ctrl+D 收藏开心洋葱吧~~~~~~~~~~~~~!
  • 由于近期流量激增,小站的ECS没能经的起亲们的访问,本站依然没有盈利,如果各位看如果觉着文字不错,还请看官给小站打个赏~~~~~~~~~~~~~!

Java读取Html文本解析email地址的代码

JAVA相关 水墨上仙 2288次浏览

Java读取Html文本解析email地址的代码
代码来自:http://blog.csdn.net/javaalpha/article/details/8332587

package com.alpha.test;import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.Writer;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.regex.Matcher;
import java.util.regex.Pattern;/**
 * 读取html页面文件解析邮箱地址
 * 
 * @author JavaAlpha 2012-12-19 13:45:11
 */
public class ReadHtmlToTxt { // 读取文件
 public static String readHtml(String path) {  StringBuffer emailCont = new StringBuffer();  File htmlFile = new File(path);
  if (htmlFile.exists() && htmlFile.isFile() && htmlFile.canRead()) {
   Reader in;
   try {
    in = new FileReader(htmlFile);
    char[] buff = new char[4096];
    int nch;
    while ((nch = in.read(buff, 0, buff.length)) != -1) {
     emailCont.append(checkEmail(new String(buff, 0, nch)));
    }
   } catch (FileNotFoundException e) {
    e.printStackTrace();
   } catch (IOException e) {
    e.printStackTrace();
   }  }  return emailCont.toString();
 } // 判断字符串里面是否包括@符号
 public static String checkEmail(String str) {  String postCont = "";
  // 判断是否回复的内容
  if (str.indexOf("@") > -1) {   postCont = str.substring(str.indexOf("@") - 10,
     str.indexOf("@") + 10);   if (postCont.indexOf(">") > -1 || postCont.indexOf("<") > -1) {
    postCont = postCont.replaceAll(">", "");
    postCont = postCont.replaceAll("<", "");
    postCont = postCont.replaceAll("/", "");
   }   if (postCont.indexOf(",") > -1 || postCont.indexOf(",") > -1
     || postCont.indexOf("。") > -1 || postCont.indexOf(";") > -1) {
    postCont = postCont.replaceAll(",", "");
    postCont = postCont.replaceAll(",", "");
    postCont = postCont.replaceAll("。", "");
   }   postCont = postCont.substring(0, postCont.indexOf(".com") + 4);   System.out.println(postCont);
  }  return postCont;
 }
 
 //过滤汉字
 public static boolean checkChinese(String str) {
  
  String regEx = "[\\u4e00-\\u9fa5]";
  Pattern p = Pattern.compile(regEx);
  Matcher m = p.matcher(str);
  if (m != null && m.find()){
   return true;//是汉字
  }
  return false;
 } // 将整理是邮箱地址写入文件
 public static void writerFile(String cont, String path) {  File emailFile = new File(path);  try {
   //如果文件不存在,创建文件
   if (!emailFile.exists()) {
    emailFile.createNewFile();
   }
   
   Writer out = new FileWriter(emailFile);   out.write(cont);
   out.flush();
   out.close();
  } catch (Exception e) {
   e.printStackTrace();
  } }
 
 /**
  * 读取网络内容 
  */
 public static void readUrlCont(String strUrl) {
  
  StringBuffer cont = new StringBuffer();//内容
  
  try {
   URL url = new URL(strUrl);
   URLConnection conn = url.openConnection();
   BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
   String lineCont = "";
   while ((lineCont = reader.readLine())!= null) {
    cont.append(lineCont+"</br>");
   }
   
   reader.close();
   
  } catch (MalformedURLException e) {
   e.printStackTrace();
  } catch (IOException e) {
   e.printStackTrace();
  }
  
  System.out.println(cont.toString());
 } public static void main(String[] args) {
  
  //String cont = readHtml("e://test.htm");//读取文件
  
  //writerFile(cont, "e://test.txt");//写文件
  
  //checkChinese("qwe123");
  
  readUrlCont("http://www.163.com");
  
 }}


开心洋葱 , 版权所有丨如未注明 , 均为原创丨未经授权请勿修改 , 转载请注明Java读取Html文本解析email地址的代码
喜欢 (0)
加载中……