refactor detection of the need to merge PageRev

This commit is contained in:
imperosol
2025-11-19 13:51:38 +01:00
parent 449abbb17e
commit 3f4a41ba42
2 changed files with 36 additions and 19 deletions

View File

@@ -23,12 +23,13 @@
# #
from __future__ import annotations from __future__ import annotations
import difflib
import string import string
import unicodedata import unicodedata
from datetime import timedelta from datetime import timedelta
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import TYPE_CHECKING, Self from typing import TYPE_CHECKING, Final, Self
from uuid import uuid4 from uuid import uuid4
from django.conf import settings from django.conf import settings
@@ -1344,6 +1345,9 @@ class PageRev(models.Model):
The content is in PageRev.title and PageRev.content . The content is in PageRev.title and PageRev.content .
""" """
MERGE_TIME_THRESHOLD: Final[timedelta] = timedelta(minutes=20)
MERGE_DIFF_THRESHOLD: Final[float] = 0.2
revision = models.IntegerField(_("revision")) revision = models.IntegerField(_("revision"))
title = models.CharField(_("page title"), max_length=255, blank=True) title = models.CharField(_("page title"), max_length=255, blank=True)
content = models.TextField(_("page content"), blank=True) content = models.TextField(_("page content"), blank=True)
@@ -1385,6 +1389,32 @@ class PageRev(models.Model):
def is_owned_by(self, user: User) -> bool: def is_owned_by(self, user: User) -> bool:
return any(g.id == self.page.owner_group_id for g in user.cached_groups) return any(g.id == self.page.owner_group_id for g in user.cached_groups)
def similarity_ratio(self, text: str) -> float:
"""Similarity ratio between this revision's content and the given text.
The result is a float in [0; 1], 0 meaning the contents are entirely different,
and 1 they are strictly the same.
"""
# cf. https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher.ratio
return difflib.SequenceMatcher(None, self.content, text).quick_ratio()
def should_merge(self, other: Self) -> bool:
"""Return True if `other` should be merged into `self`, else False.
It's considered the other revision should be merged into this one if :
- it was made less than 20 minutes after
- by the same author
- with a similarity ratio higher than 80%
"""
return (
not self._state.adding # cannot merge if the original rev doesn't exist
and self.author == other.author
and (other.date - self.date) < self.MERGE_TIME_THRESHOLD
and (not other._state.adding or other.revision == self.revision + 1)
and self.similarity_ratio(other.content) >= (1 - other.MERGE_DIFF_THRESHOLD)
)
def get_notification_types(): def get_notification_types():
return settings.SITH_NOTIFICATIONS return settings.SITH_NOTIFICATIONS

View File

@@ -20,9 +20,9 @@
# Place - Suite 330, Boston, MA 02111-1307, USA. # Place - Suite 330, Boston, MA 02111-1307, USA.
# #
# #
import difflib
import re import re
from datetime import date, datetime, timedelta from copy import copy
from datetime import date, datetime
from io import BytesIO from io import BytesIO
from captcha.fields import CaptchaField from captcha.fields import CaptchaField
@@ -390,14 +390,11 @@ class PageRevisionForm(forms.ModelForm):
- less than 20 minutes ago - less than 20 minutes ago
- by the same author - by the same author
- with a diff ratio higher than 20% - with a similarity ratio higher than 80%
then the latter will be edited and the new revision won't be created. then the latter will be edited and the new revision won't be created.
""" """
TIME_THRESHOLD = timedelta(minutes=20)
DIFF_THRESHOLD = 0.2
class Meta: class Meta:
model = PageRev model = PageRev
fields = ["title", "content"] fields = ["title", "content"]
@@ -409,21 +406,11 @@ class PageRevisionForm(forms.ModelForm):
super().__init__(*args, instance=instance, **kwargs) super().__init__(*args, instance=instance, **kwargs)
self.author = author self.author = author
self.page = page self.page = page
self.initial_content = instance.content if instance else "" self.initial_obj: PageRev = copy(self.instance)
def diff_ratio(self, new_str: str) -> float:
return difflib.SequenceMatcher(
None, self.initial_content, new_str
).quick_ratio()
def save(self, commit=True): # noqa FBT002 def save(self, commit=True): # noqa FBT002
revision: PageRev = self.instance revision: PageRev = self.instance
if ( if not self.initial_obj.should_merge(self.instance):
revision._state.adding
or revision.author != self.author
or revision.date + self.TIME_THRESHOLD < now()
or self.diff_ratio(revision.content) < (1 - self.DIFF_THRESHOLD)
):
revision.author = self.author revision.author = self.author
revision.page = self.page revision.page = self.page
revision.id = None # if id is None, Django will create a new record revision.id = None # if id is None, Django will create a new record