The Java Developers Almanac 1.4


Order this book from Amazon.

   
Home > List of Packages > javax.swing.text.html  [2 examples]

e1017. Getting the Links in an HTML Document

    // This method takes a URI which can be either a filename (e.g. file://c:/dir/file.html)
    // or a URL (e.g. http://host.com/page.html) and returns all HREF links in the document.
    public static String[] getLinks(String uriStr) {
        List result = new ArrayList();
    
        try {
            // Create a reader on the HTML content
            URL url = new URI(uriStr).toURL();
            URLConnection conn = url.openConnection();
            Reader rd = new InputStreamReader(conn.getInputStream());
    
            // Parse the HTML
            EditorKit kit = new HTMLEditorKit();
            HTMLDocument doc = (HTMLDocument)kit.createDefaultDocument();
            kit.read(rd, doc, 0);
    
            // Find all the A elements in the HTML document
            HTMLDocument.Iterator it = doc.getIterator(HTML.Tag.A);
            while (it.isValid()) {
                SimpleAttributeSet s = (SimpleAttributeSet)it.getAttributes();
    
                String link = (String)s.getAttribute(HTML.Attribute.HREF);
                if (link != null) {
                    // Add the link to the result list
                    result.add(link);
                }
                it.next();
            }
        } catch (MalformedURLException e) {
        } catch (URISyntaxException e) {
        } catch (BadLocationException e) {
        } catch (IOException e) {
        }
    
        // Return all found links
        return (String[])result.toArray(new String[result.size()]);
    }

 Related Examples
e1018. Getting the Text in an HTML Document


© 2002 Addison-Wesley.