From d2737219e0413c6a7fc2a72ac1254cda596cb42a Mon Sep 17 00:00:00 2001 From: M66B Date: Mon, 13 May 2019 11:03:15 +0200 Subject: [PATCH] Remove HTML elements with a namespace --- .../java/eu/faircode/email/HtmlHelper.java | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/app/src/main/java/eu/faircode/email/HtmlHelper.java b/app/src/main/java/eu/faircode/email/HtmlHelper.java index 320128507b..ba7b3734b3 100644 --- a/app/src/main/java/eu/faircode/email/HtmlHelper.java +++ b/app/src/main/java/eu/faircode/email/HtmlHelper.java @@ -76,6 +76,7 @@ public class HtmlHelper { static final int PREVIEW_SIZE = 250; private static final int TRACKING_PIXEL_SURFACE = 25; + private static final List heads = Collections.unmodifiableList(Arrays.asList( "h1", "h2", "h3", "h4", "h5", "h6", "p", "ol", "ul", "table", "br", "hr")); private static final List tails = Collections.unmodifiableList(Arrays.asList( @@ -115,6 +116,38 @@ public class HtmlHelper { boolean paranoid = prefs.getBoolean("paranoid", true); Document parsed = Jsoup.parse(html); + + // + + //   + + // Default XHTML namespace: http://www.w3.org/1999/xhtml + + String ns = null; + for (Element h : parsed.select("html")) + for (Attribute a : h.attributes()) { + if (a.getKey().startsWith("xmlns:") && + a.getValue().startsWith("http://www.w3.org/")) { + ns = a.getKey().split(":")[1]; + break; + } + } + for (Element e : parsed.select("*")) + if (e.tagName().contains(":")) { + String tag = e.tagName(); + if (ns != null && e.tagName().startsWith(ns)) { + e.tagName(tag.split(":")[1]); + Log.i("Updated tag=" + tag + " to=" + e.tagName()); + } else { + e.remove(); + Log.i("Removed tag=" + tag); + } + } + Whitelist whitelist = Whitelist.relaxed() .addTags("hr", "abbr") .removeTags("col", "colgroup", "thead", "tbody")