![]() |
The Java Developers Almanac 1.4Order this book from Amazon. |
e1018. Getting the Text in an HTML Document// This method takes a URI which can be either a filename (e.g. file://c:/dir/file.html) // or a URL (e.g. http://host.com/page.html) and returns all text in the document. public static String getText(String uriStr) { final StringBuffer buf = new StringBuffer(1000); try { // Create an HTML document that appends all text to buf HTMLDocument doc = new HTMLDocument() { public HTMLEditorKit.ParserCallback getReader(int pos) { return new HTMLEditorKit.ParserCallback() { // This method is whenever text is encountered in the HTML file public void handleText(char[] data, int pos) { buf.append(data); buf.append('\n'); } }; } }; // Create a reader on the HTML content URL url = new URI(uriStr).toURL(); URLConnection conn = url.openConnection(); Reader rd = new InputStreamReader(conn.getInputStream()); // Parse the HTML EditorKit kit = new HTMLEditorKit(); kit.read(rd, doc, 0); } catch (MalformedURLException e) { } catch (URISyntaxException e) { } catch (BadLocationException e) { } catch (IOException e) { } // Return the text return buf.toString(); }
© 2002 Addison-Wesley. |