|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Object com.cc.framework.util.parser.HtmlParser
A very simple HTML parser
Nested Class Summary | |
private static class |
HtmlParser.Attribute
A single Attribute definition |
private static class |
HtmlParser.AttributesImpl
The attribute collection of a HTML tag |
Field Summary | |
private java.util.Stack |
elements
The element stack the parser uses to track well formed syntax Stack<String> |
private EntityMapper |
entityMapper
The object to map entities |
private HtmlHandler |
handler
The handler that implements the callback methods the parser will call during processing of the document |
protected int |
pos
The current processing position (index) |
protected int |
processed
The position (index) to which the document is processed |
private char[] |
source
The documents source code |
private boolean |
validate
This flag tells the parser to check if the document is well formed |
Constructor Summary | |
HtmlParser()
Constructor |
Method Summary | |
protected boolean |
eos()
|
protected HtmlHandler |
getHandler()
|
protected char[] |
getSource()
|
protected boolean |
isIdentifierChar(char c)
Returns true when the given character is a valid
identifier character |
boolean |
isValidating()
|
protected boolean |
isWhitespaceChar(char c)
Returns true when the given character is a valid
whitespace character |
protected boolean |
match(char value)
|
protected boolean |
match(java.lang.String value)
|
HtmlHandler |
parse(java.lang.String html,
HtmlHandler handler)
parses the given HTML code |
protected HtmlAttributes |
parseAttributes()
Parses the attributes of a tag |
protected java.lang.String |
parseAttributeValue()
Parses an attribute value. |
protected java.lang.String |
parseIdentifier(boolean namespacePrefix)
Parses an identifier |
protected void |
process()
|
protected void |
processChars()
Process CDATA |
protected void |
processComment()
Process a Comment |
protected void |
processElement()
Valid formats are <_name_attr_=_"value"_[/]> <_name_attr_=_'value'_[/]> <_name_attr_=_value_[/]> <_name_attr_[/]> <_/_name_attr_> |
protected void |
processEntity()
Processes a HTML entity. |
protected void |
processWhitespace()
|
protected void |
reset()
resets the internal state of the parser |
void |
setValidating(boolean validate)
|
Methods inherited from class java.lang.Object |
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
private EntityMapper entityMapper
private boolean validate
private char[] source
protected int processed
protected int pos
private HtmlHandler handler
private java.util.Stack elements
Constructor Detail |
public HtmlParser()
Method Detail |
protected void reset()
protected HtmlHandler getHandler()
protected char[] getSource()
protected boolean eos()
protected boolean match(char value)
protected boolean match(java.lang.String value)
public HtmlHandler parse(java.lang.String html, HtmlHandler handler)
html
- the HTML code to parsehandler
- The handler that implements the callback methods the parser
will call during processing of the document
protected void process()
protected boolean isWhitespaceChar(char c)
true
when the given character is a valid
whitespace character
c
- the character to test
protected boolean isIdentifierChar(char c)
true
when the given character is a valid
identifier character
c
- the character to test
public boolean isValidating()
public void setValidating(boolean validate)
validate
- the validate to setprotected void processEntity()
protected void processChars()
protected void processComment()
protected void processElement()
protected HtmlAttributes parseAttributes()
protected java.lang.String parseAttributeValue()
null
when no identifier could be
foundprotected java.lang.String parseIdentifier(boolean namespacePrefix)
namespacePrefix
- indicates thate there may bee a namespace prefix
null
when no identifier could be
foundprotected void processWhitespace()
|
|||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | ||||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |