update mistune

This commit is contained in:
thomas girod
2024-07-01 17:33:05 +02:00
parent 8bcf59aaf0
commit 3c2dcfbfa2
6 changed files with 278 additions and 312 deletions

View File

@ -12,161 +12,125 @@
# OR WITHIN THE LOCAL FILE "LICENSE"
#
#
from __future__ import annotations
import os
import re
from typing import TYPE_CHECKING
import mistune
from django.urls import reverse
from mistune import InlineGrammar, InlineLexer, Markdown, Renderer, escape, escape_link
from mistune import HTMLRenderer, Markdown
if TYPE_CHECKING:
from mistune import InlineParser, InlineState
# match __text__, without linebreak in the text, nor backslash prepending an underscore
# Examples :
# - "__text__" : OK
# - "__te xt__" : OK
# - "__te_xt__" : nope (underscore in the middle)
# - "__te\_xt__" : Ok (the middle underscore is escaped)
# - "__te\nxt__" : nope (there is a linebreak in the text)
# - "\__text__" : nope (one of the underscores have a backslash prepended)
# - "\\__text__" : Ok (the backslash is ignored, because there is another backslash before)
UNDERLINED_RE = (
r"(?<!\\)(?:\\{2})*" # ignore if there is an odd number of backslashes before
r"_{2}" # two underscores
r"(?P<underlined>([^\\_]|\\.)+)" # the actual text
r"_{2}" # closing underscores
)
SITH_LINK_RE = (
r"\[(?P<page_name>[\w\s]+)\]" # [nom du lien]
r"\(page:\/\/" # (page://
r"(?P<page_slug>[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9])" # actual page name
r"\)" # )
)
CUSTOM_DIMENSIONS_IMAGE_RE = (
r"\[(?P<img_name>[\w\s]+)\]" # [nom du lien]
r"\(img:\/\/" # (img://
r"(?P<img_slug>[a-zA-Z0-9][a-zA-Z0-9.-]*[a-zA-Z0-9])" # actual page name
r"\)" # )
)
class SithRenderer(Renderer):
def file_link(self, pk, suffix):
return reverse("core:file_detail", kwargs={"file_id": pk}) + suffix
def exposant(self, text):
return """<sup>%s</sup>""" % text
def indice(self, text):
return """<sub>%s</sub>""" % text
def underline(self, text):
return """<u>%s</u>""" % text
def image(self, original_src, title, text):
"""Rendering a image with title and text.
:param src: source link of the image.
:param title: title text of the image.
:param text: alt text of the image.
"""
style = None
if "?" in original_src:
src, params = original_src.rsplit("?", maxsplit=1)
m = re.search(r"(\d+%?)(x(\d+%?))?", params)
if not m:
src = original_src
else:
width = m.group(1)
if not width.endswith("%"):
width += "px"
style = "width: %s; " % width
height = m.group(3)
if height is not None:
if not height.endswith("%"):
height += "px"
style += "height: %s; " % height
else:
params = None
src = original_src
src = escape_link(src)
text = escape(text, quote=True)
if title:
title = escape(title, quote=True)
html = '<img src="%s" alt="%s" title="%s"' % (src, text, title)
else:
html = '<img src="%s" alt="%s"' % (src, text)
if style:
html = '%s style="%s"' % (html, style)
if self.options.get("use_xhtml"):
return "%s />" % html
return "%s>" % html
def parse_underline(_inline: InlineParser, m: re.Match, state: InlineState):
state.append_token({"type": "underline", "raw": m.group("underlined")})
return m.end()
class SithInlineGrammar(InlineGrammar):
double_emphasis = re.compile(r"^\*{2}([\s\S]+?)\*{2}(?!\*)") # **word**
emphasis = re.compile(r"^\*((?:\*\*|[^\*])+?)\*(?!\*)") # *word*
underline = re.compile(r"^_{2}([\s\S]+?)_{2}(?!_)") # __word__
exposant = re.compile(r"^<sup>([\s\S]+?)</sup>") # <sup>text</sup>
indice = re.compile(r"^<sub>([\s\S]+?)</sub>") # <sub>text</sub>
class SithInlineLexer(InlineLexer):
grammar_class = SithInlineGrammar
default_rules = [
"escape",
# 'inline_html',
"autolink",
"url",
"footnote",
"link",
"reflink",
"nolink",
"exposant",
"double_emphasis",
"emphasis",
def underline(md_instance: Markdown):
md_instance.inline.register(
"underline",
"indice",
"code",
"linebreak",
UNDERLINED_RE,
parse_underline,
before="emphasis",
)
md_instance.renderer.register("underline", lambda _, text: f"<u>{text}</u>")
def parse_sith_link(_inline: InlineParser, m: re.Match, state: InlineState):
page_name = m.group("page_name")
page_slug = m.group("page_slug")
state.append_token(
{
"type": "link",
"children": [{"type": "text", "raw": page_name}],
"attrs": {"url": reverse("core:page", kwargs={"page_name": page_slug})},
}
)
return m.end()
def sith_link(md_instance: Markdown):
md_instance.inline.register(
"sith_link",
SITH_LINK_RE,
parse_sith_link,
before="emphasis",
)
# no custom renderer here.
# we just add another parsing rule, but render it as if it was
# a regular markdown link
class SithRenderer(HTMLRenderer):
def image(self, text: str, url: str, title=None) -> str:
if "?" not in url:
return super().image(text, url, title)
new_url, params = url.rsplit("?", maxsplit=1)
m = re.match(r"^(?P<width>\d+(%|px)?)(x(?P<height>\d+(%|px)?))?$", params)
if not m:
return super().image(text, url, title)
width, height = m.group("width"), m.group("height")
if not width.endswith(("%", "px")):
width += "px"
style = f"width:{width};"
if height is not None:
if not height.endswith(("%", "px")):
height += "px"
style += f"height:{height};"
return super().image(text, new_url, title).replace("/>", f'style="{style}" />')
markdown = mistune.create_markdown(
renderer=SithRenderer(escape=True),
plugins=[
underline,
sith_link,
"strikethrough",
"text",
]
inline_html_rules = [
"escape",
"autolink",
"footnotes",
"table",
"spoiler",
"subscript",
"superscript",
"url",
"link",
"reflink",
"nolink",
"exposant",
"double_emphasis",
"emphasis",
"underline",
"indice",
"code",
"linebreak",
"strikethrough",
"text",
]
def output_underline(self, m):
text = m.group(1)
return self.renderer.underline(text)
def output_exposant(self, m):
text = m.group(1)
return self.renderer.exposant(text)
def output_indice(self, m):
text = m.group(1)
return self.renderer.indice(text)
# Double emphasis rule changed
def output_double_emphasis(self, m):
text = m.group(1)
text = self.output(text)
return self.renderer.double_emphasis(text)
# Emphasis rule changed
def output_emphasis(self, m):
text = m.group(1)
text = self.output(text)
return self.renderer.emphasis(text)
def _process_link(self, m, link, title=None):
try: # Add page:// support for links
page = re.compile(r"^page://(\S*)") # page://nom_de_ma_page
match = page.search(link)
page = match.group(1) or ""
link = reverse("core:page", kwargs={"page_name": page})
except:
pass
try: # Add file:// support for links
file_link = re.compile(r"^file://(\d*)/?(\S*)?") # file://4000/download
match = file_link.search(link)
pk = match.group(1)
suffix = match.group(2) or ""
link = reverse("core:file_detail", kwargs={"file_id": id}) + suffix
except:
pass
return super()._process_link(m, link, title)
renderer = SithRenderer(escape=True)
inline = SithInlineLexer(renderer)
markdown = Markdown(renderer, inline=inline)
],
)
if __name__ == "__main__":
root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))

View File

@ -13,8 +13,8 @@
#
#
import os
from datetime import date, timedelta
from pathlib import Path
import freezegun
import pytest
@ -22,7 +22,7 @@ from django.core.cache import cache
from django.test import TestCase
from django.urls import reverse
from django.utils.timezone import now
from pytest_django.asserts import assertRedirects
from pytest_django.asserts import assertInHTML, assertRedirects
from club.models import Membership
from core.markdown import markdown
@ -108,12 +108,51 @@ class TestUserLogin:
assertRedirects(response, reverse("core:index"))
@pytest.mark.parametrize(
("md", "html"),
[
(
"[nom du lien](page://nomDeLaPage)",
'<a href="/page/nomDeLaPage/">nom du lien</a>',
),
("__texte__", "<u>texte</u>"),
("~~***__texte__***~~", "<del><em><strong><u>texte</u></strong></em></del>"),
(
'![tst_alt](/img.png?50% "tst_title")',
'<img src="/img.png" alt="tst_alt" title="tst_title" style="width:50%;" />',
),
(
"[texte](page://tst-page)",
'<a href="/page/tst-page/">texte</a>',
),
(
"![](/img.png?50x450)",
'<img src="/img.png" alt="" style="width:50px;height:450px;" />',
),
("![](/img.png)", '<img src="/img.png" alt="" />'),
(
"![](/img.png?50%x120%)",
'<img src="/img.png" alt="" style="width:50%;height:120%;" />',
),
("![](/img.png?50px)", '<img src="/img.png" alt="" style="width:50px;" />'),
(
"![](/img.png?50pxx120%)",
'<img src="/img.png" alt="" style="width:50px;height:120%;" />',
),
# when the image dimension has a wrong format, don't touch the url
("![](/img.png?50pxxxxxxxx)", '<img src="/img.png?50pxxxxxxxx" alt="" />'),
("![](/img.png?azerty)", '<img src="/img.png?azerty" alt="" />'),
],
)
def test_custom_markdown_syntax(md, html):
"""Test the homemade markdown syntax"""
assert markdown(md) == f"<p>{html}</p>\n"
def test_full_markdown_syntax():
root_path = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
with open(os.path.join(root_path) + "/doc/SYNTAX.md", "r") as md_file:
md = md_file.read()
with open(os.path.join(root_path) + "/doc/SYNTAX.html", "r") as html_file:
html = html_file.read()
doc_path = Path(settings.BASE_DIR) / "doc"
md = (doc_path / "SYNTAX.md").read_text()
html = (doc_path / "SYNTAX.html").read_text()
result = markdown(md)
assert result == html
@ -218,12 +257,15 @@ http://git.an
)
response = self.client.get(reverse("core:page", kwargs={"page_name": "guy"}))
assert response.status_code == 200
assert (
'<p>Guy <em>bibou</em></p>\\n<p><a href="http://git.an">http://git.an</a></p>\\n'
+ "<h1>Swag</h1>\\n&lt;guy&gt;Bibou&lt;/guy&gt;"
+ "&lt;script&gt;alert(\\'Guy\\');&lt;/script&gt;"
in str(response.content)
)
print(response.content.decode())
expected = """
<p>Guy <em>bibou</em></p>
<p><a href="http://git.an">http://git.an</a></p>
<h1>Swag</h1>
<p>&lt;guy&gt;Bibou&lt;/guy&gt;</p>
<p>&lt;script&gt;alert('Guy');&lt;/script&gt;</p>
"""
assertInHTML(expected, response.content.decode())
class UserToolsTest: