remove doku/bbcode to markdown

This commit is contained in:
thomas girod
2024-08-10 14:23:01 +02:00
parent 28ff7f24c5
commit 7a0fa9f1a0
5 changed files with 2 additions and 195 deletions

View File

@ -13,7 +13,6 @@
#
#
import re
import subprocess
from datetime import date
@ -142,156 +141,6 @@ def exif_auto_rotate(image):
return image
def doku_to_markdown(text: str) -> str:
"""Convert doku text to the corresponding markdown.
Args:
text: the doku text to convert
Returns:
The converted markdown text
"""
text = re.sub(
r"([^:]|^)\/\/(.*?)\/\/", r"*\2*", text
) # Italic (prevents protocol:// conflict)
text = re.sub(
r"<del>(.*?)<\/del>", r"~~\1~~", text, flags=re.DOTALL
) # Strike (may be multiline)
text = re.sub(
r"<sup>(.*?)<\/sup>", r"^\1^", text
) # Superscript (multiline not supported, because almost never used)
text = re.sub(r"<sub>(.*?)<\/sub>", r"_\1_", text) # Subscript (idem)
text = re.sub(r"^======(.*?)======", r"#\1", text, flags=re.MULTILINE) # Titles
text = re.sub(r"^=====(.*?)=====", r"##\1", text, flags=re.MULTILINE)
text = re.sub(r"^====(.*?)====", r"###\1", text, flags=re.MULTILINE)
text = re.sub(r"^===(.*?)===", r"####\1", text, flags=re.MULTILINE)
text = re.sub(r"^==(.*?)==", r"#####\1", text, flags=re.MULTILINE)
text = re.sub(r"^=(.*?)=", r"######\1", text, flags=re.MULTILINE)
text = re.sub(r"<nowiki>", r"<nosyntax>", text)
text = re.sub(r"</nowiki>", r"</nosyntax>", text)
text = re.sub(r"<code>", r"```\n", text)
text = re.sub(r"</code>", r"\n```", text)
text = re.sub(r"article://", r"page://", text)
text = re.sub(r"dfile://", r"file://", text)
i = 1
for fn in re.findall(r"\(\((.*?)\)\)", text): # Footnotes
text = re.sub(r"\(\((.*?)\)\)", r"[^%s]" % i, text, count=1)
text += "\n[^%s]: %s\n" % (i, fn)
i += 1
text = re.sub(r"\\{2,}[\s]", r" \n", text) # Carriage return
text = re.sub(r"\[\[(.*?)\|(.*?)\]\]", r"[\2](\1)", text) # Links
text = re.sub(r"\[\[(.*?)\]\]", r"[\1](\1)", text) # Links 2
text = re.sub(r"{{(.*?)\|(.*?)}}", r'![\2](\1 "\2")', text) # Images
text = re.sub(r"{{(.*?)(\|(.*?))?}}", r'![\1](\1 "\1")', text) # Images 2
text = re.sub(
r"{\[(.*?)(\|(.*?))?\]}", r"[\1](\1)", text
) # Video (transform to classic links, since we can't integrate them)
text = re.sub(r"###(\d*?)###", r"[[[\1]]]", text) # Progress bar
text = re.sub(
r"(\n +[^* -][^\n]*(\n +[^* -][^\n]*)*)", r"```\1\n```", text, flags=re.DOTALL
) # Block code without lists
text = re.sub(r"( +)-(.*)", r"1.\2", text) # Ordered lists
new_text = []
quote_level = 0
for line in text.splitlines(): # Tables and quotes
enter = re.finditer(r"\[quote(=(.+?))?\]", line)
quit_ = re.finditer(r"\[/quote\]", line)
if re.search(r"\A\s*\^(([^\^]*?)\^)*", line): # Table part
line = line.replace("^", "|")
new_text.append("> " * quote_level + line)
new_text.append(
"> " * quote_level + "|---|"
) # Don't keep the text alignement in tables it's really too complex for what it's worth
elif enter or quit_: # Quote part
for quote in enter: # Enter quotes (support multiple at a time)
quote_level += 1
try:
new_text.append("> " * quote_level + "##### " + quote.group(2))
except:
new_text.append("> " * quote_level)
line = line.replace(quote.group(0), "")
final_quote_level = quote_level # Store quote_level to use at the end, since it will be modified during quit_ iteration
final_newline = False
for quote in quit_: # Quit quotes (support multiple at a time)
line = line.replace(quote.group(0), "")
quote_level -= 1
final_newline = True
new_text.append("> " * final_quote_level + line) # Finally append the line
if final_newline:
new_text.append(
"\n"
) # Add a new line to ensure the separation between the quote and the following text
else:
new_text.append(line)
return "\n".join(new_text)
def bbcode_to_markdown(text):
"""Convert bbcode text to the corresponding markdown.
Args:
text: the bbcode text to convert
Returns:
The converted markdown text
"""
text = re.sub(r"\[b\](.*?)\[\/b\]", r"**\1**", text, flags=re.DOTALL) # Bold
text = re.sub(r"\[i\](.*?)\[\/i\]", r"*\1*", text, flags=re.DOTALL) # Italic
text = re.sub(r"\[u\](.*?)\[\/u\]", r"__\1__", text, flags=re.DOTALL) # Underline
text = re.sub(
r"\[s\](.*?)\[\/s\]", r"~~\1~~", text, flags=re.DOTALL
) # Strike (may be multiline)
text = re.sub(
r"\[strike\](.*?)\[\/strike\]", r"~~\1~~", text, flags=re.DOTALL
) # Strike 2
text = re.sub(r"article://", r"page://", text)
text = re.sub(r"dfile://", r"file://", text)
text = re.sub(r"\[url=(.*?)\](.*)\[\/url\]", r"[\2](\1)", text) # Links
text = re.sub(r"\[url\](.*)\[\/url\]", r"\1", text) # Links 2
text = re.sub(r"\[img\](.*)\[\/img\]", r'![\1](\1 "\1")', text) # Images
new_text = []
quote_level = 0
for line in text.splitlines(): # Tables and quotes
enter = re.finditer(r"\[quote(=(.+?))?\]", line)
quit_ = re.finditer(r"\[/quote\]", line)
if enter or quit_: # Quote part
for quote in enter: # Enter quotes (support multiple at a time)
quote_level += 1
try:
new_text.append("> " * quote_level + "##### " + quote.group(2))
except:
new_text.append("> " * quote_level)
line = line.replace(quote.group(0), "")
final_quote_level = quote_level # Store quote_level to use at the end, since it will be modified during quit_ iteration
final_newline = False
for quote in quit_: # Quit quotes (support multiple at a time)
line = line.replace(quote.group(0), "")
quote_level -= 1
final_newline = True
new_text.append("> " * final_quote_level + line) # Finally append the line
if final_newline:
new_text.append(
"\n"
) # Add a new line to ensure the separation between the quote and the following text
else:
new_text.append(line)
return "\n".join(new_text)
def get_client_ip(request: HttpRequest) -> str | None:
headers = (
"X_FORWARDED_FOR", # Common header for proixes