java抓取网页

import java.io.BufferedReader;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLConnection;

public class Test {

/**
* @param args
*/

public static void main(String[] args) {
try {
URL url = new URL(
"http://jj.24365pt.com/index.jhtml");

URLConnection conn = url.openConnection();
conn.setDoOutput(true);
InputStream in = null;
in = url.openStream();
String content = pipe(in,
"utf-8");
System.out.println(content);
} catch (Exception e) {
e.printStackTrace();
}
}

static String pipe(InputStream in,String charset) throws IOException {
        StringBuffer s = new StringBuffer();
        if(charset==null||
"".equals(charset)){
         charset=
"utf-8";
        }
        String rLine = null;
        BufferedReader bReader = new BufferedReader(new InputStreamReader(in,charset));
        PrintWriter pw = null;
        
FileOutputStream fo = new FileOutputStream(
"../index.html");
OutputStreamWriter writer = new OutputStreamWriter(fo,
"utf-8");
pw = new PrintWriter(writer);
        while ( (rLine = bReader.readLine()) != null) {
            String tmp_rLine = rLine;
            int str_len = tmp_rLine.length();
            if (str_len > 0) {
              s.append(tmp_rLine);
              pw.println(tmp_rLine);
              pw.flush();
            }
            tmp_rLine = null;
       }
        in.close();
        pw.close();
        return s.toString();
}
}

lunzi   2007-12-26 16:33:35 评论:1   阅读:10990   引用:0
无题 @2012-04-11 18:26:30  
in = url.openStream();应该为
in = conn.getInputStream();

发表评论>>

署名发表(评论可管理,不必输入下面的姓名)

姓名:

主题:

内容: 最少15个,最长1000个字符

验证码: (如不清楚,请刷新)

Copyright@2004-2010 powered by YuLog