markdown: be more strict about sanitizing classes

Allowing arbitrary class attributes allows users to style content in such way (be re-using global CSS classes) that they can escape the intended target element, causing effects that can be considered suitable for phishing. Thanks to Ruben for the responsible disclosure.
2024-01-08 17:06:17 +01:00 · 2024-01-08 17:06:17 +01:00 · 9f8eda0c4a
parent 3f362e954c
commit 9f8eda0c4a
1 changed files with 23 additions and 1 deletions
--- a/srht/markdown.py
+++ b/srht/markdown.py
@ -149,8 +149,28 @@ def _input_filter(tag, name, value):
        return True
    return name == "type" and value in ["checkbox"]

+def _div_filter(tag, name, value):
+    if name == "class":
+        # For code highlighting
+        return value in ["highlight"]
+    return name in ["style"]
+
+def _span_filter(tag, name, value):
+    if name == "class":
+        # For code highlighting
+        return value in [
+            "bp", "c", "c1", "ch", "cm", "cp", "cpf", "cs", "dl", "err", "fm",
+            "gd", "ge", "gh", "gi", "go", "gp", "gr", "gs", "gt", "gu", "hll",
+            "il", "k", "kc", "kd", "kn", "kp", "kr", "kt", "l", "ld", "m",
+            "mb", "mf", "mh", "mi", "mo", "n", "na", "nb", "nc", "nd", "ne",
+            "nf", "ni", "nl", "nn", "no", "nt", "nv", "nx", "o", "ow", "p",
+            "py", "s", "s1", "s2", "sa", "sb", "sc", "sd", "se", "sh", "si",
+            "sr", "ss", "sx", "vc", "vg", "vi", "vm", "w"
+        ]
+    return name in ["style"]
+
 def _wildcard_filter(tag, name, value):
-    return name in ["style", "class", "colspan", "rowspan"]
+    return name in ["colspan", "rowspan"]

 _sanitizer_attrs = {
    "a": ["id", "href", "title"],
@ -162,6 +182,8 @@ _sanitizer_attrs = {
    "h6": ["id"],
    "img": _img_filter,
    "input": _input_filter,
+    "div": _div_filter,
+    "span": _span_filter,
    "*": _wildcard_filter,
 }
 _sanitizer_styles = [