refactor detection of the need to merge PageRev

This commit is contained in:
imperosol
2025-11-19 13:51:38 +01:00
parent 449abbb17e
commit 3f4a41ba42
2 changed files with 36 additions and 19 deletions

View File

@@ -23,12 +23,13 @@
#
from __future__ import annotations
import difflib
import string
import unicodedata
from datetime import timedelta
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Self
from typing import TYPE_CHECKING, Final, Self
from uuid import uuid4
from django.conf import settings
@@ -1344,6 +1345,9 @@ class PageRev(models.Model):
The content is in PageRev.title and PageRev.content .
"""
MERGE_TIME_THRESHOLD: Final[timedelta] = timedelta(minutes=20)
MERGE_DIFF_THRESHOLD: Final[float] = 0.2
revision = models.IntegerField(_("revision"))
title = models.CharField(_("page title"), max_length=255, blank=True)
content = models.TextField(_("page content"), blank=True)
@@ -1385,6 +1389,32 @@ class PageRev(models.Model):
def is_owned_by(self, user: User) -> bool:
return any(g.id == self.page.owner_group_id for g in user.cached_groups)
def similarity_ratio(self, text: str) -> float:
"""Similarity ratio between this revision's content and the given text.
The result is a float in [0; 1], 0 meaning the contents are entirely different,
and 1 they are strictly the same.
"""
# cf. https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher.ratio
return difflib.SequenceMatcher(None, self.content, text).quick_ratio()
def should_merge(self, other: Self) -> bool:
"""Return True if `other` should be merged into `self`, else False.
It's considered the other revision should be merged into this one if :
- it was made less than 20 minutes after
- by the same author
- with a similarity ratio higher than 80%
"""
return (
not self._state.adding # cannot merge if the original rev doesn't exist
and self.author == other.author
and (other.date - self.date) < self.MERGE_TIME_THRESHOLD
and (not other._state.adding or other.revision == self.revision + 1)
and self.similarity_ratio(other.content) >= (1 - other.MERGE_DIFF_THRESHOLD)
)
def get_notification_types():
return settings.SITH_NOTIFICATIONS

View File

@@ -20,9 +20,9 @@
# Place - Suite 330, Boston, MA 02111-1307, USA.
#
#
import difflib
import re
from datetime import date, datetime, timedelta
from copy import copy
from datetime import date, datetime
from io import BytesIO
from captcha.fields import CaptchaField
@@ -390,14 +390,11 @@ class PageRevisionForm(forms.ModelForm):
- less than 20 minutes ago
- by the same author
- with a diff ratio higher than 20%
- with a similarity ratio higher than 80%
then the latter will be edited and the new revision won't be created.
"""
TIME_THRESHOLD = timedelta(minutes=20)
DIFF_THRESHOLD = 0.2
class Meta:
model = PageRev
fields = ["title", "content"]
@@ -409,21 +406,11 @@ class PageRevisionForm(forms.ModelForm):
super().__init__(*args, instance=instance, **kwargs)
self.author = author
self.page = page
self.initial_content = instance.content if instance else ""
def diff_ratio(self, new_str: str) -> float:
return difflib.SequenceMatcher(
None, self.initial_content, new_str
).quick_ratio()
self.initial_obj: PageRev = copy(self.instance)
def save(self, commit=True): # noqa FBT002
revision: PageRev = self.instance
if (
revision._state.adding
or revision.author != self.author
or revision.date + self.TIME_THRESHOLD < now()
or self.diff_ratio(revision.content) < (1 - self.DIFF_THRESHOLD)
):
if not self.initial_obj.should_merge(self.instance):
revision.author = self.author
revision.page = self.page
revision.id = None # if id is None, Django will create a new record