mirror of
https://github.com/M66B/FairEmail.git
synced 2026-01-02 19:10:11 +01:00
Remove HTML elements with a namespace
This commit is contained in:
@@ -76,6 +76,7 @@ public class HtmlHelper {
|
||||
static final int PREVIEW_SIZE = 250;
|
||||
|
||||
private static final int TRACKING_PIXEL_SURFACE = 25;
|
||||
|
||||
private static final List<String> heads = Collections.unmodifiableList(Arrays.asList(
|
||||
"h1", "h2", "h3", "h4", "h5", "h6", "p", "ol", "ul", "table", "br", "hr"));
|
||||
private static final List<String> tails = Collections.unmodifiableList(Arrays.asList(
|
||||
@@ -115,6 +116,38 @@ public class HtmlHelper {
|
||||
boolean paranoid = prefs.getBoolean("paranoid", true);
|
||||
|
||||
Document parsed = Jsoup.parse(html);
|
||||
|
||||
// <html xmlns:v="urn:schemas-microsoft-com:vml"
|
||||
// xmlns:o="urn:schemas-microsoft-com:office:office"
|
||||
// xmlns:w="urn:schemas-microsoft-com:office:word"
|
||||
// xmlns:m="http://schemas.microsoft.com/office/2004/12/omml"
|
||||
// xmlns="http://www.w3.org/TR/REC-html40">
|
||||
|
||||
// <o:p> </o:p></span>
|
||||
|
||||
// Default XHTML namespace: http://www.w3.org/1999/xhtml
|
||||
|
||||
String ns = null;
|
||||
for (Element h : parsed.select("html"))
|
||||
for (Attribute a : h.attributes()) {
|
||||
if (a.getKey().startsWith("xmlns:") &&
|
||||
a.getValue().startsWith("http://www.w3.org/")) {
|
||||
ns = a.getKey().split(":")[1];
|
||||
break;
|
||||
}
|
||||
}
|
||||
for (Element e : parsed.select("*"))
|
||||
if (e.tagName().contains(":")) {
|
||||
String tag = e.tagName();
|
||||
if (ns != null && e.tagName().startsWith(ns)) {
|
||||
e.tagName(tag.split(":")[1]);
|
||||
Log.i("Updated tag=" + tag + " to=" + e.tagName());
|
||||
} else {
|
||||
e.remove();
|
||||
Log.i("Removed tag=" + tag);
|
||||
}
|
||||
}
|
||||
|
||||
Whitelist whitelist = Whitelist.relaxed()
|
||||
.addTags("hr", "abbr")
|
||||
.removeTags("col", "colgroup", "thead", "tbody")
|
||||
|
||||
Reference in New Issue
Block a user