info.bliki.htmlcleaner
Class HtmlCleaner

java.lang.Object
  extended by info.bliki.htmlcleaner.HtmlCleaner

public class HtmlCleaner
extends java.lang.Object

Main HtmlCleaner class.

It represents public interface to the user. It's task is to call tokenizer with specified source HTML, traverse list of produced token list and create internal object model. It also offers a set of methods to write resulting XML to string, file or any output stream.

Typical usage is the following:

HtmlCleaner cleaner = new HtmlCleaner(...); // one of few constructors cleaner.setXXX(...) // optionally, set cleaner's behaviour clener.clean(); // calls cleaning process cleaner.writeXmlXXX(...) // writes resulting XML to string, file or any output stream Created by: Vladimir Nikic
Date: November, 2006


Field Summary
static java.lang.String DEFAULT_CHARSET
           
 
Constructor Summary
HtmlCleaner(java.io.File file)
          Constructor - creates the instance for specified file and charset.
HtmlCleaner(java.io.File file, ITagInfoProvider tagInfoProvider)
          Constructor - creates the instance for specified file and charset.
HtmlCleaner(java.io.File file, java.lang.String charset)
          Constructor - creates the instance for specified file.
HtmlCleaner(java.io.File file, java.lang.String charset, ITagInfoProvider tagInfoProvider)
          Constructor - creates the instance for specified file.
HtmlCleaner(java.io.InputStream in)
          Constructor - creates the instance for the specified inpout stream
HtmlCleaner(java.io.InputStream in, ITagInfoProvider tagInfoProvider)
          Constructor - creates the instance for the specified inpout stream
HtmlCleaner(java.io.InputStream in, java.lang.String charset)
          Constructor - creates the instance for the specified inpout stream and the charset
HtmlCleaner(java.lang.String htmlContent)
          Constructor - creates the instance with specified html content as String.
HtmlCleaner(java.lang.String htmlContent, ITagInfoProvider tagInfoProvider)
          Constructor - creates the instance with specified html content as String.
HtmlCleaner(java.net.URL url)
          Constructor - creates the instance for specified URL and charset.
HtmlCleaner(java.net.URL url, ITagInfoProvider tagInfoProvider)
          Constructor - creates the instance for specified URL and charset.
HtmlCleaner(java.net.URL url, java.lang.String charset)
          Constructor - creates the instance for specified URL and charset.
HtmlCleaner(java.net.URL url, java.lang.String charset, ITagInfoProvider tagInfoProvider)
          Constructor - creates the instance for specified URL and charset.
 
Method Summary
 void clean()
           
 java.util.Set getAllTags()
           
 TagNode getBodyNode()
           
 java.lang.String getCompactXmlAsString()
           
 java.lang.String getHyphenReplacementInComment()
           
 java.util.List getNodeList()
           
 java.lang.String getPrettyXmlAsString()
           
 java.lang.String getXmlAsString()
           
 boolean isAdvancedXmlEscape()
           
 boolean isOmitComments()
           
 boolean isOmitDeprecatedTags()
           
 boolean isOmitDoctypeDeclaration()
           
 boolean isOmitUnknownTags()
           
 boolean isOmitXmlDeclaration()
           
 boolean isOmitXmlnsAttributes()
           
 boolean isRecognizeUnicodeChars()
           
 boolean isTranslateSpecialEntities()
           
 boolean isUseCdataForScriptAndStyle()
           
 void setAdvancedXmlEscape(boolean advancedXmlEscape)
           
 void setBodyNode(TagNode bodyNode)
           
 void setHyphenReplacementInComment(java.lang.String hyphenReplacementInComment)
           
 void setOmitComments(boolean omitComments)
           
 void setOmitDeprecatedTags(boolean omitDeprecatedTags)
           
 void setOmitDoctypeDeclaration(boolean omitDoctypeDeclaration)
           
 void setOmitUnknownTags(boolean omitUnknownTags)
           
 void setOmitXmlDeclaration(boolean omitXmlDeclaration)
           
 void setOmitXmlnsAttributes(boolean omitXmlnsAttributes)
           
 void setRecognizeUnicodeChars(boolean recognizeUnicodeChars)
           
 void setTranslateSpecialEntities(boolean translateSpecialEntities)
           
 void setUseCdataForScriptAndStyle(boolean useCdataForScriptAndStyle)
           
 void writeCompactXmlToFile(java.lang.String fileName)
           
 void writeCompactXmlToFile(java.lang.String fileName, java.lang.String charset)
           
 void writeCompactXmlToStream(java.io.OutputStream out)
           
 void writeCompactXmlToStream(java.io.OutputStream out, java.lang.String charset)
           
 void writePrettyXmlToFile(java.lang.String fileName)
           
 void writePrettyXmlToFile(java.lang.String fileName, java.lang.String charset)
           
 void writePrettyXmlToStream(java.io.OutputStream out)
           
 void writePrettyXmlToStream(java.io.OutputStream out, java.lang.String charset)
           
 void writeXml(XmlSerializer xmlSerializer)
          The most general way to serialize resulting XML.
 void writeXmlToFile(java.lang.String fileName)
           
 void writeXmlToFile(java.lang.String fileName, java.lang.String charset)
           
 void writeXmlToStream(java.io.OutputStream out)
           
 void writeXmlToStream(java.io.OutputStream out, java.lang.String charset)
           
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

DEFAULT_CHARSET

public static final java.lang.String DEFAULT_CHARSET
Constructor Detail

HtmlCleaner

public HtmlCleaner(java.lang.String htmlContent,
                   ITagInfoProvider tagInfoProvider)
Constructor - creates the instance with specified html content as String.

Parameters:
htmlContent -

HtmlCleaner

public HtmlCleaner(java.lang.String htmlContent)
Constructor - creates the instance with specified html content as String.

Parameters:
htmlContent -

HtmlCleaner

public HtmlCleaner(java.io.File file,
                   java.lang.String charset,
                   ITagInfoProvider tagInfoProvider)
            throws java.io.IOException
Constructor - creates the instance for specified file.

Parameters:
file -
charset -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.io.File file,
                   java.lang.String charset)
            throws java.io.IOException
Constructor - creates the instance for specified file.

Parameters:
file -
charset -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.io.File file,
                   ITagInfoProvider tagInfoProvider)
            throws java.io.IOException
Constructor - creates the instance for specified file and charset.

Parameters:
file -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.io.File file)
            throws java.io.IOException
Constructor - creates the instance for specified file and charset.

Parameters:
file -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.net.URL url,
                   java.lang.String charset,
                   ITagInfoProvider tagInfoProvider)
            throws java.io.IOException
Constructor - creates the instance for specified URL and charset.

Parameters:
url -
charset -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.net.URL url,
                   ITagInfoProvider tagInfoProvider)
            throws java.io.IOException
Constructor - creates the instance for specified URL and charset.

Parameters:
url -
tagInfoProvider -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.net.URL url,
                   java.lang.String charset)
            throws java.io.IOException
Constructor - creates the instance for specified URL and charset.

Parameters:
url -
charset -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.net.URL url)
            throws java.io.IOException
Constructor - creates the instance for specified URL and charset.

Parameters:
url -
Throws:
java.io.IOException

HtmlCleaner

public HtmlCleaner(java.io.InputStream in,
                   ITagInfoProvider tagInfoProvider)
Constructor - creates the instance for the specified inpout stream

Parameters:
in -
tagInfoProvider -

HtmlCleaner

public HtmlCleaner(java.io.InputStream in)
Constructor - creates the instance for the specified inpout stream

Parameters:
in -

HtmlCleaner

public HtmlCleaner(java.io.InputStream in,
                   java.lang.String charset)
            throws java.io.IOException
Constructor - creates the instance for the specified inpout stream and the charset

Parameters:
in -
charset -
Throws:
java.io.IOException
Method Detail

clean

public void clean()
           throws java.io.IOException
Throws:
java.io.IOException

getNodeList

public java.util.List getNodeList()
                           throws java.io.IOException
Throws:
java.io.IOException

isOmitUnknownTags

public boolean isOmitUnknownTags()

setOmitUnknownTags

public void setOmitUnknownTags(boolean omitUnknownTags)

isOmitDeprecatedTags

public boolean isOmitDeprecatedTags()

setOmitDeprecatedTags

public void setOmitDeprecatedTags(boolean omitDeprecatedTags)

isAdvancedXmlEscape

public boolean isAdvancedXmlEscape()

setAdvancedXmlEscape

public void setAdvancedXmlEscape(boolean advancedXmlEscape)

isUseCdataForScriptAndStyle

public boolean isUseCdataForScriptAndStyle()

setUseCdataForScriptAndStyle

public void setUseCdataForScriptAndStyle(boolean useCdataForScriptAndStyle)

isTranslateSpecialEntities

public boolean isTranslateSpecialEntities()

setTranslateSpecialEntities

public void setTranslateSpecialEntities(boolean translateSpecialEntities)

isRecognizeUnicodeChars

public boolean isRecognizeUnicodeChars()

setRecognizeUnicodeChars

public void setRecognizeUnicodeChars(boolean recognizeUnicodeChars)

isOmitComments

public boolean isOmitComments()

setOmitComments

public void setOmitComments(boolean omitComments)

isOmitXmlDeclaration

public boolean isOmitXmlDeclaration()

setOmitXmlDeclaration

public void setOmitXmlDeclaration(boolean omitXmlDeclaration)

isOmitDoctypeDeclaration

public boolean isOmitDoctypeDeclaration()

setOmitDoctypeDeclaration

public void setOmitDoctypeDeclaration(boolean omitDoctypeDeclaration)

isOmitXmlnsAttributes

public boolean isOmitXmlnsAttributes()

setOmitXmlnsAttributes

public void setOmitXmlnsAttributes(boolean omitXmlnsAttributes)

getHyphenReplacementInComment

public java.lang.String getHyphenReplacementInComment()

setHyphenReplacementInComment

public void setHyphenReplacementInComment(java.lang.String hyphenReplacementInComment)

getAllTags

public java.util.Set getAllTags()

writeXml

public void writeXml(XmlSerializer xmlSerializer)
              throws java.io.IOException
The most general way to serialize resulting XML.

Parameters:
xmlSerializer -
Throws:
java.io.IOException

writeXmlToStream

public void writeXmlToStream(java.io.OutputStream out)
                      throws java.io.IOException
Throws:
java.io.IOException

writeXmlToStream

public void writeXmlToStream(java.io.OutputStream out,
                             java.lang.String charset)
                      throws java.io.IOException
Throws:
java.io.IOException

writeCompactXmlToStream

public void writeCompactXmlToStream(java.io.OutputStream out)
                             throws java.io.IOException
Throws:
java.io.IOException

writeCompactXmlToStream

public void writeCompactXmlToStream(java.io.OutputStream out,
                                    java.lang.String charset)
                             throws java.io.IOException
Throws:
java.io.IOException

writePrettyXmlToStream

public void writePrettyXmlToStream(java.io.OutputStream out)
                            throws java.io.IOException
Throws:
java.io.IOException

writePrettyXmlToStream

public void writePrettyXmlToStream(java.io.OutputStream out,
                                   java.lang.String charset)
                            throws java.io.IOException
Throws:
java.io.IOException

writeXmlToFile

public void writeXmlToFile(java.lang.String fileName)
                    throws java.io.IOException
Throws:
java.io.IOException

writeXmlToFile

public void writeXmlToFile(java.lang.String fileName,
                           java.lang.String charset)
                    throws java.io.IOException
Throws:
java.io.IOException

writeCompactXmlToFile

public void writeCompactXmlToFile(java.lang.String fileName)
                           throws java.io.IOException
Throws:
java.io.IOException

writeCompactXmlToFile

public void writeCompactXmlToFile(java.lang.String fileName,
                                  java.lang.String charset)
                           throws java.io.IOException
Throws:
java.io.IOException

writePrettyXmlToFile

public void writePrettyXmlToFile(java.lang.String fileName)
                          throws java.io.IOException
Throws:
java.io.IOException

writePrettyXmlToFile

public void writePrettyXmlToFile(java.lang.String fileName,
                                 java.lang.String charset)
                          throws java.io.IOException
Throws:
java.io.IOException

getXmlAsString

public java.lang.String getXmlAsString()
                                throws java.io.IOException
Throws:
java.io.IOException

getCompactXmlAsString

public java.lang.String getCompactXmlAsString()
                                       throws java.io.IOException
Throws:
java.io.IOException

getPrettyXmlAsString

public java.lang.String getPrettyXmlAsString()
                                      throws java.io.IOException
Throws:
java.io.IOException

getBodyNode

public TagNode getBodyNode()

setBodyNode

public void setBodyNode(TagNode bodyNode)


Copyright © 2012 Java Wikipedia API (Bliki engine). All Rights Reserved.