refactor detection of the need to merge PageRev

This commit is contained in:
imperosol
2025-11-19 13:51:38 +01:00
parent 449abbb17e
commit 3f4a41ba42
2 changed files with 36 additions and 19 deletions

View File

@@ -23,12 +23,13 @@
#
from __future__ import annotations
import difflib
import string
import unicodedata
from datetime import timedelta
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Self
from typing import TYPE_CHECKING, Final, Self
from uuid import uuid4
from django.conf import settings
@@ -1344,6 +1345,9 @@ class PageRev(models.Model):
The content is in PageRev.title and PageRev.content .
"""
MERGE_TIME_THRESHOLD: Final[timedelta] = timedelta(minutes=20)
MERGE_DIFF_THRESHOLD: Final[float] = 0.2
revision = models.IntegerField(_("revision"))
title = models.CharField(_("page title"), max_length=255, blank=True)
content = models.TextField(_("page content"), blank=True)
@@ -1385,6 +1389,32 @@ class PageRev(models.Model):
def is_owned_by(self, user: User) -> bool:
return any(g.id == self.page.owner_group_id for g in user.cached_groups)
def similarity_ratio(self, text: str) -> float:
"""Similarity ratio between this revision's content and the given text.
The result is a float in [0; 1], 0 meaning the contents are entirely different,
and 1 they are strictly the same.
"""
# cf. https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher.ratio
return difflib.SequenceMatcher(None, self.content, text).quick_ratio()
def should_merge(self, other: Self) -> bool:
"""Return True if `other` should be merged into `self`, else False.
It's considered the other revision should be merged into this one if :
- it was made less than 20 minutes after
- by the same author
- with a similarity ratio higher than 80%
"""
return (
not self._state.adding # cannot merge if the original rev doesn't exist
and self.author == other.author
and (other.date - self.date) < self.MERGE_TIME_THRESHOLD
and (not other._state.adding or other.revision == self.revision + 1)
and self.similarity_ratio(other.content) >= (1 - other.MERGE_DIFF_THRESHOLD)
)
def get_notification_types():
return settings.SITH_NOTIFICATIONS