1 | |
package net.sf.jolene.dom; |
2 | |
|
3 | |
import net.sf.jolene.constants.Elements; |
4 | |
import net.sf.jolene.constants.Tags; |
5 | |
import net.sf.jolene.html.Attributes; |
6 | |
import org.apache.log4j.LogManager; |
7 | |
import org.apache.log4j.Logger; |
8 | |
|
9 | |
import java.io.BufferedWriter; |
10 | |
import java.io.FileWriter; |
11 | |
import java.io.IOException; |
12 | |
import java.util.Iterator; |
13 | |
import java.util.StringTokenizer; |
14 | |
|
15 | |
|
16 | |
|
17 | |
|
18 | |
|
19 | |
|
20 | |
|
21 | |
final class Parser { |
22 | |
|
23 | 1 | private static final Logger log = LogManager.getLogger(Parser.class); |
24 | |
|
25 | |
|
26 | |
String workBuffer; |
27 | |
Document document; |
28 | |
|
29 | 34 | Parser(String workBuffer, Document document) throws MalformedHTMLException { |
30 | 34 | this.workBuffer = workBuffer; |
31 | 34 | this.document = document; |
32 | 34 | parse(); |
33 | 33 | } |
34 | |
|
35 | |
void parse() throws MalformedHTMLException { |
36 | |
|
37 | |
long startTime; |
38 | |
|
39 | 34 | startTime = System.currentTimeMillis(); |
40 | |
|
41 | |
|
42 | |
int n; |
43 | 34 | n = workBuffer.toUpperCase().indexOf("<HTML>"); |
44 | 34 | if (n > -1) { |
45 | |
|
46 | 32 | document.doctype = workBuffer.substring(0, n).trim(); |
47 | |
} |
48 | |
|
49 | 34 | if (!parseHead()) { |
50 | 0 | return; |
51 | |
} |
52 | |
|
53 | 34 | if (!parseBody()) { |
54 | 0 | return; |
55 | |
} |
56 | |
|
57 | 33 | log.info("time to parse doc: " + document + " " + (System.currentTimeMillis() - startTime)); |
58 | 33 | } |
59 | |
|
60 | |
private boolean parseHead() { |
61 | |
|
62 | |
String head, uhead; |
63 | |
int n1, n2; |
64 | 34 | head = workBuffer; |
65 | 34 | uhead = head.toUpperCase(); |
66 | |
|
67 | 34 | n1 = uhead.indexOf("<HEAD"); |
68 | 34 | n2 = uhead.indexOf("</HEAD"); |
69 | 34 | if (n1 == -1 || n2 == -1) { |
70 | 0 | return true; |
71 | |
} |
72 | |
|
73 | |
|
74 | 34 | head = head.substring(n1 + 6, n2); |
75 | 34 | uhead = head.toUpperCase(); |
76 | |
|
77 | |
|
78 | |
int t1, t2; |
79 | 34 | t1 = uhead.indexOf("<TITLE"); |
80 | 34 | t2 = uhead.indexOf("</TITLE"); |
81 | |
|
82 | |
|
83 | 34 | if (t1 == -1) { |
84 | 9 | document.setTitle(""); |
85 | |
} else { |
86 | |
|
87 | |
|
88 | |
|
89 | 25 | document.setTitle(head.substring(t1 + 7, t2)); |
90 | |
|
91 | |
|
92 | 25 | StringBuffer s = new StringBuffer(head); |
93 | 25 | s.delete(t1, t2 + 8); |
94 | 25 | head = s.toString(); |
95 | |
} |
96 | |
|
97 | 34 | StringTokenizer st = new StringTokenizer(head, "<"); |
98 | |
String cc; |
99 | |
int pos, end; |
100 | |
|
101 | 34 | Header scriptElement = null; |
102 | 34 | String scriptContent = ""; |
103 | |
|
104 | 34 | Header styleElement = null; |
105 | 34 | String styleContent = ""; |
106 | |
|
107 | 191 | while (st.hasMoreTokens()) { |
108 | 157 | cc = st.nextToken(); |
109 | 157 | log.debug("RAW head: " + cc); |
110 | |
|
111 | |
|
112 | |
|
113 | |
|
114 | |
|
115 | 157 | if (cc.length() > 4 && cc.toUpperCase().startsWith("META")) { |
116 | 33 | pos = head.indexOf(cc) - 1; |
117 | 33 | end = cc.indexOf(">"); |
118 | 33 | if (end > -1) { |
119 | 33 | end = pos + end + 1; |
120 | 33 | cc = head.substring(pos, end); |
121 | 33 | log.debug(cc); |
122 | |
} else { |
123 | |
|
124 | 0 | throw new MalformedHTMLException("No > found in HEAD tag - position: " + pos + " " + cc); |
125 | |
} |
126 | |
|
127 | 33 | log.debug("head tag META: " + cc); |
128 | 33 | cc = cc.substring(5); |
129 | 33 | Attributes a = parseAttributes(cc); |
130 | 33 | addHeader(a, Tags.meta); |
131 | |
|
132 | |
} |
133 | |
|
134 | 157 | if (cc.length() > 4 && cc.toUpperCase().startsWith("LINK")) { |
135 | 14 | pos = head.indexOf(cc) - 1; |
136 | 14 | end = cc.indexOf(">"); |
137 | 14 | if (end > -1) { |
138 | 14 | end = pos + end + 1; |
139 | 14 | cc = head.substring(pos, end); |
140 | 14 | log.debug(cc); |
141 | |
} else { |
142 | |
|
143 | 0 | throw new MalformedHTMLException("No > found in HEAD tag - position: " + pos + " " + cc); |
144 | |
} |
145 | |
|
146 | 14 | log.debug("head tag LINK: " + cc); |
147 | 14 | cc = cc.substring(5); |
148 | 14 | Attributes a = parseAttributes(cc); |
149 | 14 | addHeader(a, Tags.link); |
150 | |
|
151 | |
} |
152 | |
|
153 | |
|
154 | 157 | if (cc.length() > 6 && cc.toUpperCase().startsWith("/SCRIPT")) { |
155 | 13 | scriptElement = null; |
156 | 13 | scriptContent = ""; |
157 | |
} |
158 | |
|
159 | 157 | if (scriptElement != null) { |
160 | |
|
161 | |
|
162 | |
|
163 | 13 | String script = scriptElement.getValue(); |
164 | 13 | scriptElement.setValue(script + '<' + cc); |
165 | |
} |
166 | |
|
167 | 157 | if (cc.length() > 6 && cc.toUpperCase().startsWith("SCRIPT")) { |
168 | 13 | pos = head.indexOf(cc) - 1; |
169 | 13 | end = cc.indexOf(">"); |
170 | 13 | if (end > -1) { |
171 | |
|
172 | 13 | if (cc.length() > end + 1) { |
173 | 12 | scriptContent = cc.substring(end + 1); |
174 | |
} else { |
175 | 1 | scriptContent = ""; |
176 | |
} |
177 | 13 | cc = cc.substring(0, end); |
178 | 13 | log.debug(cc); |
179 | 13 | log.debug("script:" + scriptContent); |
180 | |
} else { |
181 | |
|
182 | 0 | throw new MalformedHTMLException("No > found in HEAD tag - position: " + pos + " " + cc); |
183 | |
} |
184 | |
|
185 | 13 | log.debug("head tag SCRIPT: " + cc + " len: " + cc.length()); |
186 | |
Attributes a; |
187 | 13 | if (cc.length() > 7) { |
188 | 10 | cc = cc.substring(7); |
189 | 10 | a = parseAttributes(cc); |
190 | |
} else { |
191 | 3 | a = new Attributes(); |
192 | |
} |
193 | 13 | scriptElement = addHeader(a, Tags.script); |
194 | 13 | scriptElement.setValue(scriptContent); |
195 | |
} |
196 | |
|
197 | |
|
198 | 157 | if (cc.length() > 5 && cc.toUpperCase().startsWith("/STYLE")) { |
199 | 13 | styleElement = null; |
200 | 13 | styleContent = ""; |
201 | |
} |
202 | |
|
203 | 157 | if (styleElement != null) { |
204 | |
|
205 | 7 | String style = styleElement.getValue(); |
206 | 7 | styleElement.setValue(style + '<' + cc); |
207 | |
} |
208 | |
|
209 | 157 | if (cc.length() > 5 && cc.toUpperCase().startsWith("STYLE")) { |
210 | 13 | pos = head.indexOf(cc) - 1; |
211 | 13 | end = cc.indexOf(">"); |
212 | 13 | if (end > -1) { |
213 | |
|
214 | 13 | if (cc.length() > end + 1) { |
215 | 13 | styleContent = cc.substring(end + 1); |
216 | |
} else { |
217 | 0 | styleContent = ""; |
218 | |
} |
219 | 13 | cc = cc.substring(0, end); |
220 | 13 | log.debug(cc); |
221 | 13 | log.debug("style:" + styleContent); |
222 | |
} else { |
223 | |
|
224 | 0 | throw new MalformedHTMLException("No > found in HEAD tag - position: " + pos + " " + cc); |
225 | |
} |
226 | |
|
227 | 13 | log.debug("head tag STYLE: " + cc + " len: " + cc.length()); |
228 | |
Attributes a; |
229 | 13 | if (cc.length() > 6) { |
230 | 8 | cc = cc.substring(6); |
231 | 8 | a = parseAttributes(cc); |
232 | |
} else { |
233 | 5 | a = new Attributes(); |
234 | |
} |
235 | 13 | styleElement = addHeader(a, Tags.style); |
236 | 13 | styleElement.setValue(styleContent); |
237 | 13 | } |
238 | |
|
239 | |
|
240 | |
} |
241 | |
|
242 | 34 | return true; |
243 | |
} |
244 | |
|
245 | |
private Header addHeader(Attributes attributes, Tags tag) { |
246 | 73 | Iterator i = attributes.keySet().iterator(); |
247 | 73 | Header element = new Header(); |
248 | 73 | element.tag = tag; |
249 | |
String key, value; |
250 | 214 | while (i.hasNext()) { |
251 | 141 | key = (String) i.next(); |
252 | 141 | value = attributes.getAttribute(key); |
253 | 141 | element.setAttribute(key, value); |
254 | |
} |
255 | |
|
256 | 73 | document.head.add(element); |
257 | 73 | return element; |
258 | |
} |
259 | |
|
260 | |
private boolean parseBody() throws MalformedHTMLException { |
261 | |
String bodyProperties; |
262 | |
|
263 | |
|
264 | |
|
265 | |
int n1, n2, n; |
266 | |
|
267 | |
String c; |
268 | 34 | c = workBuffer.toUpperCase(); |
269 | 34 | n1 = c.indexOf("<BODY"); |
270 | 34 | n2 = c.indexOf("</BODY"); |
271 | 34 | if (n1 == -1 || n2 == -1) { |
272 | 0 | return false; |
273 | |
} |
274 | |
|
275 | |
|
276 | 34 | document.bodyText = workBuffer.substring(n1 + 5, n2); |
277 | |
|
278 | |
|
279 | |
|
280 | |
|
281 | |
|
282 | |
|
283 | |
|
284 | 34 | if (!document.bodyText.trim().startsWith(">")) { |
285 | 5 | n = document.bodyText.indexOf(">"); |
286 | 5 | bodyProperties = document.bodyText.trim().substring(0, n - 1); |
287 | |
|
288 | |
|
289 | 5 | log.debug("about to open body"); |
290 | 5 | document.body = parseAttributes(bodyProperties); |
291 | 5 | document.bodyText = document.bodyText.substring(n + 1).trim(); |
292 | |
|
293 | |
} else { |
294 | 29 | document.body = new Attributes(); |
295 | 29 | document.bodyText = document.bodyText.substring(2).trim(); |
296 | |
} |
297 | |
|
298 | 34 | if (log.isDebugEnabled()) { |
299 | 0 | BufferedWriter out = null; |
300 | |
try { |
301 | 0 | out = new BufferedWriter(new FileWriter("c:\\temp\\body.txt")); |
302 | 0 | out.write(document.bodyText); |
303 | 0 | } catch (IOException e) { |
304 | 0 | log.error(e.getMessage(), e); |
305 | |
} finally { |
306 | 0 | if (out != null) { |
307 | |
try { |
308 | 0 | out.close(); |
309 | 0 | } catch (IOException e) { |
310 | 0 | } |
311 | |
} |
312 | |
} |
313 | |
|
314 | |
} |
315 | 34 | return parseForms(); |
316 | |
} |
317 | |
|
318 | |
private boolean parseForms() throws MalformedHTMLException { |
319 | |
|
320 | |
|
321 | |
|
322 | |
|
323 | |
|
324 | |
|
325 | |
|
326 | |
|
327 | |
|
328 | |
|
329 | |
|
330 | |
|
331 | |
|
332 | |
|
333 | |
|
334 | |
|
335 | |
|
336 | |
|
337 | |
|
338 | 34 | StringTokenizer st = new StringTokenizer(document.bodyText, "<"); |
339 | |
String cc; |
340 | |
|
341 | 34 | int n = -1; |
342 | |
int nn; |
343 | |
Form f; |
344 | 34 | int start = 0; |
345 | 34 | int end = 0; |
346 | 34 | boolean inForm = false, inComments = false; |
347 | |
|
348 | 2497 | while (st.hasMoreTokens()) { |
349 | 2464 | cc = st.nextToken(); |
350 | |
|
351 | 2464 | log.debug(cc); |
352 | |
|
353 | 2464 | if (cc.length() > 5 |
354 | |
&& cc.toUpperCase().substring(0, 5).equals("/FORM")) { |
355 | 42 | inForm = false; |
356 | |
} |
357 | |
|
358 | 2464 | if (cc.length() > 4 |
359 | |
&& cc.toUpperCase().substring(0, 4).equals("FORM")) { |
360 | |
|
361 | 43 | inForm = true; |
362 | |
|
363 | 43 | start = document.bodyText.indexOf(cc, end) - 1; |
364 | 43 | nn = cc.indexOf(">"); |
365 | 43 | if (nn > -1) { |
366 | 42 | end = start + nn + 1; |
367 | 42 | cc = cc.substring(0, nn); |
368 | |
} else { |
369 | |
|
370 | 1 | throw new MalformedHTMLException("No > found in FORM tag - position: " + start); |
371 | |
} |
372 | |
|
373 | |
|
374 | |
|
375 | |
|
376 | |
|
377 | |
|
378 | 42 | cc = cc.substring(4); |
379 | |
|
380 | 42 | f = new Form(); |
381 | 42 | f.setStartPoint(start); |
382 | 42 | f.setEndPoint(end); |
383 | 42 | log.debug("about to open form " + (n + 1)); |
384 | |
|
385 | 42 | f.setAttributes(parseAttributes(cc)); |
386 | |
|
387 | 42 | if (f.hasAttribute("name")) { |
388 | 13 | f.setName(f.getAttribute("name")); |
389 | |
} |
390 | |
|
391 | 42 | document.forms.add(f); |
392 | 42 | n++; |
393 | |
|
394 | |
|
395 | |
|
396 | |
|
397 | |
|
398 | |
|
399 | |
|
400 | |
|
401 | |
|
402 | |
|
403 | |
|
404 | |
|
405 | |
|
406 | |
} |
407 | |
|
408 | |
|
409 | |
|
410 | 2463 | if (cc.length() > 5 |
411 | |
&& cc.toUpperCase().substring(0, 5).equals("INPUT") && n > -1) { |
412 | |
|
413 | |
|
414 | 161 | start = document.bodyText.indexOf(cc, end) - 1; |
415 | |
|
416 | 161 | nn = cc.indexOf(">"); |
417 | 161 | if (nn > -1) { |
418 | 161 | end = start + nn + 1; |
419 | 161 | cc = cc.substring(0, nn); |
420 | |
} else { |
421 | |
|
422 | 0 | throw new MalformedHTMLException("No > found in INPUT tag - position: " + start); |
423 | |
} |
424 | |
|
425 | 161 | cc = cc.substring(5); |
426 | |
|
427 | 161 | log.debug("about to open input"); |
428 | |
|
429 | 161 | if (inForm) { |
430 | 161 | Attributes a = parseAttributes(cc); |
431 | 161 | if (a.hasAttribute("type") && a.getAttribute("type").equalsIgnoreCase("radio")) { |
432 | 18 | document.forms(n).addObject(Elements.radio, a, start, end); |
433 | 143 | } else if (a.hasAttribute("type") && a.getAttribute("type").equalsIgnoreCase("checkbox")) { |
434 | 14 | document.forms(n).addObject(Elements.checkbox, a, start, end); |
435 | |
} else { |
436 | 129 | document.forms(n).addObject(Elements.input, a, start, end); |
437 | |
} |
438 | |
} |
439 | |
|
440 | |
} |
441 | |
|
442 | |
|
443 | 2463 | if (cc.length() > 6 |
444 | |
&& cc.toUpperCase().substring(0, 6).equals("SELECT") && n > -1) { |
445 | |
|
446 | |
|
447 | 30 | start = document.bodyText.indexOf(cc, end) - 1; |
448 | 30 | nn = cc.indexOf(">"); |
449 | 30 | if (nn > -1) { |
450 | |
|
451 | 30 | cc = cc.substring(0, nn); |
452 | |
} else { |
453 | |
|
454 | 0 | throw new MalformedHTMLException("No > found in SELECT tag - position: " + start); |
455 | |
} |
456 | |
|
457 | 30 | end = document.bodyText.toUpperCase().indexOf("</SELECT>", start); |
458 | 30 | if (end > -1) { |
459 | 30 | end += "</SELECT>".length() - 1; |
460 | |
} else { |
461 | |
|
462 | 0 | throw new MalformedHTMLException("No </SELECT> found in SELECT tag - position: " |
463 | |
+ start); |
464 | |
} |
465 | |
|
466 | 30 | cc = cc.substring(6); |
467 | |
|
468 | 30 | log.debug("about to open select"); |
469 | |
|
470 | 30 | Attributes a = parseAttributes(cc); |
471 | 30 | Select selectObject = null; |
472 | |
|
473 | 30 | if (inForm) { |
474 | 30 | selectObject = (Select) document.forms(n).addObject(Elements.select, a, start, end); |
475 | |
} |
476 | |
|
477 | 30 | if (selectObject.hasAttribute("multiple")) { |
478 | 4 | selectObject.setMultiple(true); |
479 | |
} |
480 | |
|
481 | |
|
482 | |
|
483 | |
|
484 | |
|
485 | |
|
486 | |
|
487 | |
|
488 | |
|
489 | |
Option option; |
490 | |
|
491 | 30 | int index = -1; |
492 | |
|
493 | |
while (true) { |
494 | |
|
495 | |
|
496 | |
|
497 | 2538 | cc = st.nextToken(); |
498 | 2538 | if (cc.toUpperCase().startsWith("/SELECT>")) { |
499 | 30 | break; |
500 | |
} |
501 | |
|
502 | |
|
503 | |
|
504 | |
|
505 | 2508 | if (cc.length() > 6 |
506 | |
&& cc.toUpperCase().substring(0, 6).equals("OPTION")) { |
507 | |
|
508 | 1254 | index++; |
509 | |
|
510 | |
|
511 | |
|
512 | |
|
513 | |
|
514 | 1254 | option = new Option(); |
515 | 1254 | option.index = index; |
516 | 1254 | log.debug("about to open options"); |
517 | |
|
518 | 1254 | a = parseAttributes(cc); |
519 | 1254 | nn = cc.toUpperCase().indexOf("VALUE"); |
520 | 1254 | if (nn > -1) { |
521 | 1246 | nn = cc.indexOf(">"); |
522 | 1246 | option.value = a.getAttribute("VALUE"); |
523 | 1246 | option.text = cc.substring(nn + 1); |
524 | 1246 | option.selected = a.hasAttribute("SELECTED"); |
525 | |
} else { |
526 | 8 | nn = cc.indexOf(">"); |
527 | 8 | option.value = cc.substring(nn + 1); |
528 | 8 | option.text = cc.substring(nn + 1); |
529 | 8 | option.selected = a.hasAttribute("SELECTED"); |
530 | |
} |
531 | |
|
532 | 1254 | if (selectObject != null) { |
533 | 1254 | selectObject.addOption(option); |
534 | |
} |
535 | |
} |
536 | |
|
537 | |
} |
538 | |
|
539 | |
} |
540 | |
|
541 | 2463 | if (cc.length() > 8 |
542 | |
&& cc.toUpperCase().substring(0, 8).equals("TEXTAREA") && n > -1) { |
543 | |
|
544 | 7 | log.debug("textarea RAW:" + cc); |
545 | |
|
546 | |
|
547 | |
|
548 | 7 | start = document.bodyText.indexOf(cc, end) - 1; |
549 | 7 | nn = cc.indexOf(">"); |
550 | 7 | String text = ""; |
551 | 7 | if (nn > -1) { |
552 | 7 | end = start + nn + 1; |
553 | 7 | text = cc.substring(nn + 1); |
554 | 7 | cc = cc.substring(0, nn); |
555 | |
} else { |
556 | |
|
557 | 0 | throw new MalformedHTMLException("No > found in TEXTAREA tag - position: " + start); |
558 | |
} |
559 | |
|
560 | 7 | end += text.length() + "</TEXTAREA>".length(); |
561 | 7 | cc = cc.substring(8); |
562 | |
|
563 | 7 | log.debug("about to open textarea"); |
564 | |
|
565 | 7 | if (inForm) { |
566 | 7 | Attributes a = parseAttributes(cc); |
567 | |
TextArea o; |
568 | |
|
569 | 7 | o = (TextArea) document.forms(n).addObject(Elements.textarea, a, start, end); |
570 | 7 | o.setValue(text); |
571 | |
} |
572 | |
|
573 | |
|
574 | 7 | cc = st.nextToken(); |
575 | |
} |
576 | |
|
577 | 2463 | if (cc.length() > 5 |
578 | |
&& cc.toUpperCase().substring(0, 5).equals("LABEL") && n > -1) { |
579 | |
|
580 | |
|
581 | |
|
582 | |
|
583 | |
|
584 | |
|
585 | |
|
586 | |
|
587 | |
|
588 | |
|
589 | |
|
590 | |
|
591 | |
|
592 | |
|
593 | |
|
594 | |
|
595 | 18 | start = document.bodyText.indexOf(cc, end) - 1; |
596 | 18 | nn = cc.indexOf(">"); |
597 | 18 | if (nn > -1) { |
598 | 18 | end = start + nn + 1; |
599 | 18 | cc = cc.substring(0, nn); |
600 | |
|
601 | |
} else { |
602 | |
|
603 | 0 | throw new MalformedHTMLException("No > found in LABEL tag - position: " + start); |
604 | |
} |
605 | 18 | cc = cc.substring(5); |
606 | |
|
607 | |
|
608 | |
|
609 | 18 | log.debug("about to open label"); |
610 | |
|
611 | 18 | Attributes a = parseAttributes(cc); |
612 | |
|
613 | |
|
614 | |
int labelEnd; |
615 | 18 | labelEnd = document.bodyText.toUpperCase().indexOf("</LABEL>", end); |
616 | 18 | String text = ""; |
617 | 18 | text = document.bodyText.substring(end + 1, labelEnd); |
618 | 18 | end += text.length() + "</LABEL>".length(); |
619 | |
|
620 | |
|
621 | |
|
622 | |
|
623 | |
|
624 | 18 | int innerTagCount = 0; |
625 | 42 | while (!cc.toUpperCase().startsWith("/LABEL>")) { |
626 | 24 | cc = st.nextToken(); |
627 | 24 | innerTagCount++; |
628 | |
} |
629 | |
|
630 | 18 | if (inForm) { |
631 | |
HTMLElement element; |
632 | 18 | if (innerTagCount > 1) { |
633 | 3 | element = document.forms(n).addObject(Elements.text, a, start, end); |
634 | |
} else { |
635 | 15 | element = document.forms(n).addObject(Elements.label, a, start, end); |
636 | |
} |
637 | 18 | element.setValue(text); |
638 | |
|
639 | |
} |
640 | |
|
641 | |
} |
642 | |
|
643 | 2463 | if (cc.length() > 6 |
644 | |
&& cc.toUpperCase().substring(0, 6).equals("BUTTON") && n > -1) { |
645 | |
|
646 | |
|
647 | |
|
648 | |
|
649 | |
|
650 | |
|
651 | |
|
652 | |
|
653 | |
|
654 | |
|
655 | |
|
656 | |
|
657 | |
|
658 | |
|
659 | |
|
660 | 4 | start = document.bodyText.indexOf(cc, end) - 1; |
661 | 4 | nn = cc.indexOf(">"); |
662 | 4 | if (nn > -1) { |
663 | 4 | end = start + nn + 1; |
664 | 4 | cc = cc.substring(0, nn); |
665 | |
|
666 | |
} else { |
667 | |
|
668 | 0 | throw new MalformedHTMLException("No > found in BUTTON tag - position: " + start); |
669 | |
} |
670 | 4 | cc = cc.substring(6); |
671 | |
|
672 | |
|
673 | |
|
674 | 4 | log.debug("about to open button"); |
675 | |
|
676 | 4 | Attributes a = parseAttributes(cc); |
677 | |
|
678 | |
|
679 | |
int buttonEnd; |
680 | 4 | buttonEnd = document.bodyText.toUpperCase().indexOf("</BUTTON>", end); |
681 | 4 | String text = ""; |
682 | 4 | text = document.bodyText.substring(end + 1, buttonEnd); |
683 | 4 | end += text.length() + "</BUTTON>".length(); |
684 | |
|
685 | 4 | if (inForm) { |
686 | 4 | Button button = (Button) document.forms(n).addObject(Elements.button, a, start, end); |
687 | 4 | button.setContent(text); |
688 | |
} |
689 | |
|
690 | |
|
691 | |
|
692 | 8 | while (!cc.toUpperCase().startsWith("/BUTTON>")) { |
693 | 4 | cc = st.nextToken(); |
694 | |
} |
695 | |
|
696 | |
} |
697 | |
|
698 | 2463 | if (cc.length() > 3 |
699 | |
&& cc.toUpperCase().substring(0, 3).equals("IMG") && n > -1) { |
700 | |
|
701 | |
|
702 | 93 | start = document.bodyText.indexOf(cc, end) - 1; |
703 | |
|
704 | 93 | nn = cc.indexOf(">"); |
705 | 93 | if (nn > -1) { |
706 | 93 | end = start + nn + 1; |
707 | 93 | cc = cc.substring(0, nn); |
708 | |
} else { |
709 | |
|
710 | 0 | throw new MalformedHTMLException("No > found in IMG tag - position: " + start); |
711 | |
} |
712 | |
|
713 | 93 | cc = cc.substring(3); |
714 | |
|
715 | 93 | log.debug("object: " + cc); |
716 | 93 | log.debug("about to open img"); |
717 | |
|
718 | 93 | if (inForm) { |
719 | 78 | Attributes a = parseAttributes(cc); |
720 | 78 | document.forms(n).addObject(Elements.image, a, start, end); |
721 | |
} |
722 | |
} |
723 | |
|
724 | |
|
725 | 2463 | if (cc.length() > 5 && cc.toUpperCase().substring(0, 5).equals("TABLE") && n > -1) { |
726 | |
int saveStart, saveEnd; |
727 | 66 | saveStart = start; |
728 | 66 | saveEnd = end; |
729 | |
|
730 | 66 | start = document.bodyText.indexOf(cc, end) - 1; |
731 | |
|
732 | 66 | nn = cc.indexOf(">"); |
733 | 66 | if (nn > -1) { |
734 | |
|
735 | 66 | cc = cc.substring(0, nn); |
736 | |
} else { |
737 | |
|
738 | 0 | throw new MalformedHTMLException("No > found in TABLE tag - position: " + start); |
739 | |
} |
740 | |
|
741 | 66 | end = document.bodyText.toUpperCase().indexOf("</TABLE>", start); |
742 | 66 | if (end > -1) { |
743 | 66 | end += "</TABLE>".length() - 1; |
744 | |
} else { |
745 | |
|
746 | 0 | throw new MalformedHTMLException("No </TABKE> found in TABLE tag - position: " |
747 | |
+ start); |
748 | |
} |
749 | |
|
750 | |
|
751 | 66 | cc = cc.substring(5); |
752 | |
|
753 | 66 | log.debug("object: " + cc); |
754 | 66 | log.debug("about to open table"); |
755 | |
|
756 | 66 | Attributes a = parseAttributes(cc); |
757 | 66 | if (a.hasAttribute("id") && "GRID".equalsIgnoreCase(a.getAttribute("id").substring(0, 4)) && inForm) { |
758 | |
Grid grid; |
759 | 11 | grid = (Grid) document.forms(n).addObject(Elements.grid, a, start, end); |
760 | 11 | parseTable(grid, st); |
761 | |
|
762 | 11 | } else { |
763 | |
|
764 | 55 | start = saveStart; |
765 | 55 | end = saveEnd; |
766 | |
} |
767 | |
|
768 | |
|
769 | 66 | } |
770 | |
|
771 | |
|
772 | |
} |
773 | 33 | return true; |
774 | |
} |
775 | |
|
776 | |
private void parseTable(Grid grid, StringTokenizer st) { |
777 | |
while (true) { |
778 | |
String cc; |
779 | |
|
780 | |
|
781 | |
|
782 | |
|
783 | 43 | cc = st.nextToken(); |
784 | 43 | if (cc.toUpperCase().startsWith("/TABLE>")) { |
785 | 11 | break; |
786 | |
} |
787 | |
|
788 | 32 | if (cc.length() > 2 |
789 | |
&& cc.toUpperCase().substring(0, 2).equals("TR")) { |
790 | |
|
791 | 28 | cc = cc.substring(2); |
792 | 28 | log.debug("parseTable cc: " + cc); |
793 | |
|
794 | 28 | GridRow row = new GridRow(); |
795 | |
|
796 | |
|
797 | 28 | if (cc.startsWith(">")) { |
798 | 22 | row.setAttributes(new Attributes()); |
799 | |
} else { |
800 | 6 | row.setAttributes(parseAttributes(cc)); |
801 | |
} |
802 | |
|
803 | |
while (true) { |
804 | 276 | cc = st.nextToken(); |
805 | 276 | if (cc.toUpperCase().startsWith("/TR") || |
806 | |
cc.toUpperCase().startsWith("TR") || |
807 | |
cc.toUpperCase().startsWith("/TABLE")) { |
808 | 0 | break; |
809 | |
} |
810 | 248 | if (cc.length() > 2 |
811 | |
&& (cc.toUpperCase().substring(0, 2).equals("TD") || cc.toUpperCase().substring(0, 2).equals("TH"))) { |
812 | |
|
813 | 117 | cc = cc.substring(2); |
814 | |
|
815 | 117 | GridCell cell = new GridCell(); |
816 | |
|
817 | |
|
818 | 117 | if (cc.startsWith(">")) { |
819 | 24 | cell.setAttributes(new Attributes()); |
820 | |
} else { |
821 | 93 | cell.setAttributes(parseAttributes(cc)); |
822 | |
} |
823 | |
|
824 | |
|
825 | |
int n; |
826 | 117 | n = cc.indexOf(">"); |
827 | 117 | if (n > -1) { |
828 | 117 | cell.setValue(cc.substring(n + 1)); |
829 | |
} |
830 | 117 | row.cells.add(cell); |
831 | |
|
832 | |
|
833 | 117 | } |
834 | |
|
835 | |
|
836 | |
} |
837 | 28 | grid.rows.add(row); |
838 | |
|
839 | |
} |
840 | |
|
841 | 32 | } |
842 | |
|
843 | 11 | } |
844 | |
|
845 | |
private Attributes parseAttributes(String c) { |
846 | |
|
847 | |
|
848 | |
|
849 | |
|
850 | |
|
851 | 1829 | log.debug("RAW: " + c); |
852 | |
|
853 | |
|
854 | |
Attributes a; |
855 | |
String key, value; |
856 | |
int n; |
857 | 1829 | key = ""; |
858 | 1829 | value = ""; |
859 | |
|
860 | 1829 | a = new Attributes(); |
861 | 1829 | if (c.trim().length() == 0) { |
862 | |
|
863 | 25 | return a; |
864 | |
} |
865 | |
|
866 | |
|
867 | 1806 | while (c.indexOf(" =") > 0) { |
868 | 2 | c = c.replaceAll(" =", "="); |
869 | |
} |
870 | |
|
871 | 1806 | while (c.indexOf("= ") > 0) { |
872 | 2 | c = c.replaceAll("= ", "="); |
873 | |
} |
874 | |
|
875 | |
|
876 | 1804 | n = c.indexOf(">"); |
877 | 1804 | if (n > -1) { |
878 | 1353 | c = c.substring(0, n); |
879 | |
} |
880 | |
|
881 | |
|
882 | |
|
883 | |
|
884 | |
|
885 | |
|
886 | 1804 | if (c.charAt(c.length() - 1) == '/') { |
887 | 40 | c = c.substring(0, c.length() - 1); |
888 | |
} |
889 | |
|
890 | |
|
891 | 1804 | log.debug("string to tokenize: " + c); |
892 | 1804 | StringTokenizer st = new StringTokenizer(c); |
893 | |
|
894 | |
|
895 | |
String cc; |
896 | 1804 | int tokenCount = st.countTokens(); |
897 | 1804 | int currentToken = 0; |
898 | |
|
899 | 5776 | while (st.hasMoreTokens()) { |
900 | 3972 | cc = st.nextToken(); |
901 | 3972 | n = cc.indexOf("="); |
902 | |
|
903 | 3972 | if (n > -1) { |
904 | |
|
905 | 2698 | key = cc.substring(0, n); |
906 | 2698 | value = cc.substring(n + 1); |
907 | 2698 | log.debug("token: " + cc); |
908 | 2698 | log.debug("parseAttributes: key: " + key + " value: " + value); |
909 | 2698 | currentToken = 0; |
910 | |
while ((value.startsWith("\"") && !value.endsWith("\"")) |
911 | 3226 | || (value.startsWith("'") && !value.endsWith("'"))) { |
912 | |
|
913 | 528 | cc = st.nextToken(); |
914 | 528 | currentToken++; |
915 | |
|
916 | 528 | value += " " + cc; |
917 | |
|
918 | 528 | if (currentToken > tokenCount) { |
919 | |
|
920 | |
|
921 | 0 | break; |
922 | |
} |
923 | |
} |
924 | |
|
925 | 2698 | if (currentToken > tokenCount) { |
926 | 0 | throw new MalformedHTMLException("BAD HTML string parsing attributes: " + c); |
927 | |
} |
928 | |
} else { |
929 | |
|
930 | |
|
931 | 1274 | key = cc; |
932 | 1274 | value = key; |
933 | |
} |
934 | |
|
935 | 3972 | if (key.length() != 0) { |
936 | |
|
937 | |
|
938 | |
|
939 | |
|
940 | |
|
941 | 3972 | if (value.startsWith("'") || value.startsWith("\"")) { |
942 | 2692 | if (value.length() > 3) { |
943 | 2216 | value = value.substring(1, value.length() - 1); |
944 | 476 | } else if (value.length() == 3) { |
945 | 402 | value = value.substring(1, 2); |
946 | |
} else { |
947 | 74 | value = ""; |
948 | |
} |
949 | |
} |
950 | |
|
951 | 3972 | a.setAttribute(key, value); |
952 | |
|
953 | |
} else { |
954 | |
|
955 | 0 | throw new MalformedHTMLException("BAD HTML string parsing attributes - key length 0?"); |
956 | |
|
957 | |
} |
958 | |
|
959 | |
} |
960 | |
|
961 | 1804 | return a; |
962 | |
} |
963 | |
|
964 | |
|
965 | |
} |