From 3f4a41ba4207663df3578cfd085c3ae87c5c75d5 Mon Sep 17 00:00:00 2001 From: imperosol Date: Wed, 19 Nov 2025 13:51:38 +0100 Subject: [PATCH] refactor detection of the need to merge `PageRev` --- core/models.py | 32 +++++++++++++++++++++++++++++++- core/views/forms.py | 23 +++++------------------ 2 files changed, 36 insertions(+), 19 deletions(-) diff --git a/core/models.py b/core/models.py index b84139ae..be3b5cc4 100644 --- a/core/models.py +++ b/core/models.py @@ -23,12 +23,13 @@ # from __future__ import annotations +import difflib import string import unicodedata from datetime import timedelta from io import BytesIO from pathlib import Path -from typing import TYPE_CHECKING, Self +from typing import TYPE_CHECKING, Final, Self from uuid import uuid4 from django.conf import settings @@ -1344,6 +1345,9 @@ class PageRev(models.Model): The content is in PageRev.title and PageRev.content . """ + MERGE_TIME_THRESHOLD: Final[timedelta] = timedelta(minutes=20) + MERGE_DIFF_THRESHOLD: Final[float] = 0.2 + revision = models.IntegerField(_("revision")) title = models.CharField(_("page title"), max_length=255, blank=True) content = models.TextField(_("page content"), blank=True) @@ -1385,6 +1389,32 @@ class PageRev(models.Model): def is_owned_by(self, user: User) -> bool: return any(g.id == self.page.owner_group_id for g in user.cached_groups) + def similarity_ratio(self, text: str) -> float: + """Similarity ratio between this revision's content and the given text. + + The result is a float in [0; 1], 0 meaning the contents are entirely different, + and 1 they are strictly the same. + """ + # cf. https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher.ratio + return difflib.SequenceMatcher(None, self.content, text).quick_ratio() + + def should_merge(self, other: Self) -> bool: + """Return True if `other` should be merged into `self`, else False. + + It's considered the other revision should be merged into this one if : + + - it was made less than 20 minutes after + - by the same author + - with a similarity ratio higher than 80% + """ + return ( + not self._state.adding # cannot merge if the original rev doesn't exist + and self.author == other.author + and (other.date - self.date) < self.MERGE_TIME_THRESHOLD + and (not other._state.adding or other.revision == self.revision + 1) + and self.similarity_ratio(other.content) >= (1 - other.MERGE_DIFF_THRESHOLD) + ) + def get_notification_types(): return settings.SITH_NOTIFICATIONS diff --git a/core/views/forms.py b/core/views/forms.py index c39d36b2..58b5c598 100644 --- a/core/views/forms.py +++ b/core/views/forms.py @@ -20,9 +20,9 @@ # Place - Suite 330, Boston, MA 02111-1307, USA. # # -import difflib import re -from datetime import date, datetime, timedelta +from copy import copy +from datetime import date, datetime from io import BytesIO from captcha.fields import CaptchaField @@ -390,14 +390,11 @@ class PageRevisionForm(forms.ModelForm): - less than 20 minutes ago - by the same author - - with a diff ratio higher than 20% + - with a similarity ratio higher than 80% then the latter will be edited and the new revision won't be created. """ - TIME_THRESHOLD = timedelta(minutes=20) - DIFF_THRESHOLD = 0.2 - class Meta: model = PageRev fields = ["title", "content"] @@ -409,21 +406,11 @@ class PageRevisionForm(forms.ModelForm): super().__init__(*args, instance=instance, **kwargs) self.author = author self.page = page - self.initial_content = instance.content if instance else "" - - def diff_ratio(self, new_str: str) -> float: - return difflib.SequenceMatcher( - None, self.initial_content, new_str - ).quick_ratio() + self.initial_obj: PageRev = copy(self.instance) def save(self, commit=True): # noqa FBT002 revision: PageRev = self.instance - if ( - revision._state.adding - or revision.author != self.author - or revision.date + self.TIME_THRESHOLD < now() - or self.diff_ratio(revision.content) < (1 - self.DIFF_THRESHOLD) - ): + if not self.initial_obj.should_merge(self.instance): revision.author = self.author revision.page = self.page revision.id = None # if id is None, Django will create a new record