added removeHTMLTags and approveHTMLTags. the latter is non-functional and will...
authorjohn <john>
Sun, 30 Sep 2001 16:50:24 +0000 (16:50 +0000)
committerjohn <john>
Sun, 30 Sep 2001 16:50:24 +0000 (16:50 +0000)
source/mir/misc/StringUtil.java

index 057b201..09a4b7c 100755 (executable)
@@ -878,6 +878,63 @@ public final class StringUtil {
       return null;
     }
   }
+  
+    /**
+     * this method deletes all html tags
+     *
+     */
+
+    public static String removeHTMLTags(String haystack){
+try {
+      RE regex = new RE("<[^>]*>",RE.REG_ICASE);
+      haystack = regex.substituteAll(haystack,"");
+
+      return haystack;
+    } catch(REException ex){
+      return null;
+    }
+
+
+    }
+
+    /**
+     * this method deletes all but the approved tags html tags
+     * it also deletes approved tags which contain malicious-looking attributes and doesn't work at all
+     */
+
+
+    public static String approveHTMLTags(String haystack){
+       try {
+           String approvedTags="a|img|h1|h2|h3|h4|h5|h6|br|b|i|strong|p";
+           String badAttributes="onAbort|onBlur|onChange|onClick|onDblClick|onDragDrop|onError|onFocus|onKeyDown|onKeyPress|onKeyUp|onLoad|onMouseDown|onMouseMove|onMouseOut|onMouseOver|onMouseUp|onMove|onReset|onResize|onSelect|onSubmit|onUnload";
+           String approvedProtocols="rtsp|http|ftp|https|freenet|mailto";
+      
+           // kill all the bad tags that have attributes 
+       
+           RE regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s))\\w+\\s[^>]*>",RE.REG_ICASE);
+           haystack = regex.substituteAll(haystack,"");
+           
+           // kill all the bad tags that are attributeless 
+           regex = new RE("<\\s*/?\\s*(?!(("+approvedTags+")\\s*>))\\w+\\s*>",RE.REG_ICASE);
+           haystack = regex.substituteAll(haystack,"");
+           
+           // kill all the tags which have a javascript attribute like onLoad 
+           regex = new RE("<[^>]*("+badAttributes+")[^>]*>",RE.REG_ICASE);
+           haystack = regex.substituteAll(haystack,"");
+           
+           // kill all the tags which include a url to an unacceptable protocol
+           regex = new RE("<\\s*a\\s+[^>]*href=(?!(\'|\")?("+approvedProtocols+"))[^>]*>",RE.REG_ICASE);
+           haystack = regex.substituteAll(haystack,"");
+           
+           return haystack;
+    } catch(REException ex){
+       //return ex.toString();
+        return null;
+    }
+
+
+    }
+
 
   /**
    *  createHTML ruft alle regex-methoden zum unwandeln eines nicht