/*
* Copyright 2000-2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
*
*
* COMPATIBILITY
*
* [28.01.2001, RammerI] Tested on W2K, with J2SE, JDK 1.3
* [29.01.2001, RammerI] Tested on W2K, with JDK 1.2.2
*
*
*
* FEATURES
* = Rewriting of * * = Removal of ,
// , a counter for the nesting-level will be
// kept here
private int ignoreLevel = 0;
private boolean removeScript = true;
private boolean removeStyle = true;
private boolean removeNoScript = true;
private boolean removeMeta = true;
private boolean removeApplet = true;
private boolean removeObject = true;
private boolean removeHead = true;
private boolean openInNewWindow = false;
// remove the onClick=, onBlur=, etc. - Attributes
private boolean removeOnSomething = true;
private boolean inScript = false;
private boolean inStyle = false;
private StringWriter result = new StringWriter();
private Callback () {
}
private Callback addToResult(Object txt)
{
// to allow for implementation using Stringbuffer or StringWriter
// I don't know yet, which one is better in this case
if (ignoreLevel > 0) return this;
try {
result.write(txt.toString());
} catch (Exception e) { /* ignore */ }
return this;
}
private Callback addToResult(char[] txt)
{
if (ignoreLevel > 0) return this;
try {
result.write(txt);
} catch (Exception e) { /* ignore */ }
return this;
}
/** Accessor to the Callback's content-String
* @return Cleaned and rewritten HTML-Content
*/
public String getResult() {
try {
result.flush();
} catch (Exception e) { /* ignore */ }
// WARNING: doesn't work, if you remove " " + ... but don't know why
String res = " " + result.toString();
return res;
}
public void flush() throws javax.swing.text.BadLocationException {
// nothing to do here ...
}
/**
* Because Scripts and Stlyle sometimes are defined in comments, thoese
* will be written. Otherwise comments are removed
*/
public void handleComment(char[] values,int param) {
if ( !( inStyle || inScript))
return;
try {
result.write("");
} catch (Exception e) { /* ignore */ }
// we ignore them
}
public void handleEndOfLineString(java.lang.String str) {
addToResult("\n");
}
public void handleError(java.lang.String str,int param) {
// ignored
}
public void handleSimpleTag(HTML.Tag tag,MutableAttributeSet attrs,int param) {
if (removeMeta && (tag == HTML.Tag.META)) {
return;
}
appendTagToResult(tag,attrs);
}
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrs, int position) {
appendTagToResult(tag,attrs);
}
public void handleEndTag(HTML.Tag tag, int position) {
if ((tag ==HTML.Tag.FORM) && (inForm)) {
// form handling seems to be buggy
addToResult("").addToResult(tag).addToResult(">");
inForm = false;
} else if (tag == HTML.Tag.FORM) {
// do nothing! ... i.e. we are now outside of any is not really needed ...
} else {
addToResult("").addToResult(tag).addToResult(">");
}
if ( (removeScript == false) && (tag == HTML.Tag.SCRIPT)) {
inScript = false;
} else if ( (removeStyle == false) && (tag == HTML.Tag.STYLE)) {
inStyle = false;
}
if ( removeScript && (tag == HTML.Tag.SCRIPT)) {
ignoreLevel --;
} else if ( removeStyle && (tag == HTML.Tag.STYLE)) {
ignoreLevel --;
} else if ( removeHead && (tag == HTML.Tag.HEAD)) {
ignoreLevel --;
} else if ( removeApplet && (tag == HTML.Tag.APPLET)) {
ignoreLevel --;
} else if ( removeObject && (tag == HTML.Tag.OBJECT)) {
ignoreLevel --;
} else if ( removeNoScript && (tag.toString().equalsIgnoreCase("NOSCRIPT"))) {
ignoreLevel --;
}
}
private void appendTagToResult(HTML.Tag tag, MutableAttributeSet attrs) {
if (tag.toString().equalsIgnoreCase("__ENDOFLINETAG__")) {
// jdk 1.2.2 places a tag <__ENDOFLINETAG__> in the result ...
// we don't want this one
return;
}
if (tag.toString().equalsIgnoreCase("__IMPLIED__")) {
// jdk 1.3 places a tag <__IMPLIED__> in the result ...
// we don't want this one
return;
}
convertURLS(tag,attrs);
Enumeration e = attrs.getAttributeNames();
if (tag == HTML.Tag.BASE)
return;
addToResult("<").addToResult(tag);
while (e.hasMoreElements()) {
Object attr = e.nextElement();
String attrName = attr.toString();
String value = attrs.getAttribute(attr).toString();
// include attribute only when Not(RemoveOnSomething = True and starts with "on")
if (!(removeOnSomething
&& attrName.toLowerCase().startsWith("on")
&& (attrName.length() > 2))) {
// Attribute included
addToResult(" ").addToResult(attr).addToResult("=\"")
.addToResult(value).addToResult("\"");
}
}
addToResult(">");
}
/** Here the magic happens.
*
* If someone wants new types of URLs to be rewritten, add them here
* @param tag TAG from the Callback-Interface
* @param attrs Attribute-Set from the Callback-Interface
*/
private void convertURLS( HTML.Tag tag, MutableAttributeSet attrs ) {
// first we do an URL-rewrite on different tags
if (tag == HTML.Tag.A) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
// ---- CHECKING addConvertedAttribute( HTML.Attribute.HREF,
attrs );
}
if ((attrs.getAttribute(HTML.Attribute.TARGET) == null) && cb.openInNewWindow) {
attrs.addAttribute(HTML.Attribute.TARGET, "_BLANK");
}
} else if (tag == HTML.Tag.AREA) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
// ---- CHECKING addConvertedAttribute( HTML.Attribute.HREF,
attrs );
}
if ((attrs.getAttribute(HTML.Attribute.TARGET) == null) && cb.openInNewWindow) {
attrs.addAttribute(HTML.Attribute.TARGET, "_BLANK");
}
} else if (((tag == HTML.Tag.IMG) || (tag == HTML.Tag.INPUT) || (tag == HTML.Tag.SCRIPT))
&& (attrs.getAttribute(HTML.Attribute.SRC) != null)) {
// ---- CHECKING addConvertedAttribute( HTML.Attribute.SRC,
attrs );
} else if (tag == HTML.Tag.LINK) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
// ---- CHECKING addConvertedAttribute( HTML.Attribute.HREF,
attrs );
}
} else if ( tag == HTML.Tag.APPLET ) {
// ---- CHECKING if (attrs.getAttribute(HTML.Attribute.CODEBASE) == null) {
int endOfPath = baseUrl.toString().lastIndexOf("/");
attrs.addAttribute(HTML.Attribute.CODEBASE,
baseUrl.toString().substring(0,endOfPath +1));
} else {
addConvertedAttribute( HTML.Attribute.CODEBASE, attrs );
}
} else if (tag == HTML.Tag.OBJECT) {
// ---- CHECKING if (attrs.getAttribute(HTML.Attribute.CODEBASE) == null) {
int endOfPath = baseUrl.toString().lastIndexOf("/");
attrs.addAttribute(HTML.Attribute.CODEBASE,
baseUrl.toString().substring(0,endOfPath +1));
} else {
addConvertedAttribute( HTML.Attribute.CODEBASE, attrs );
}
} else if (tag == HTML.Tag.BODY) {
if (attrs.getAttribute(HTML.Attribute.BACKGROUND) != null) {
// background images are applied to the ENTIRE page, this remove them!
attrs.removeAttribute( HTML.Attribute.BACKGROUND);
}
} else if (tag == HTML.Tag.BASE) {
if (attrs.getAttribute(HTML.Attribute.HREF) != null) {
try {
baseUrl = new URL(attrs.getAttribute(HTML.Attribute.HREF).toString());
} catch (Throwable t) {
// logger.error( "HTMLRewriter: Setting BASE="
// + attrs.getAttribute(HTML.Attribute.HREF).toString()
// + t.getMessage());
}
attrs.removeAttribute(HTML.Attribute.HREF);
}
} else if (tag == HTML.Tag.FORM) {
// ---- CHECKING inForm = true; // buggy