JAVA抓取网页内容:生成静态页面

来源:考试大发布时间:2009-12-29

  private static String getStaticPage(String surl) {

             String htmlContent = "";

             try {

                java.io.InputStream inputStream;

                java.net.URL url = new java.net.URL(surl);

                java.net.HttpURLConnection connection = (java.net.HttpURLConnection) url.openConnection();

                connection.connect();

                inputStream = connection.getInputStream();

                byte bytes[] = new byte[1024 * 2000];

                int index = 0;

                int count = inputStream.read(bytes, index, 1024 * 2000);

                while (count != -1) {

                    index += count;

                    count = inputStream.read(bytes, index, 1);

                }

                htmlContent = new String(bytes, "UTF-8");

                connection.disconnect();

             } catch (Exception ex) {

                ex.printStackTrace();

             }

             return htmlContent.trim();

         }

  
     String src= getStaticPage(“http://www.google.com”);

         File file = new File(“d:\aa.html”);

         FileWriter resultFile = new FileWriter(file);

         PrintWriter myFile = new PrintWriter(resultFile);

         //写文件

         myFile.println(src);

         resultFile.close();

         myFile.close();