mirror of
https://github.com/ae-utbm/sith.git
synced 2025-11-22 12:46:58 +00:00
refactor detection of the need to merge PageRev
This commit is contained in:
@@ -23,12 +23,13 @@
|
|||||||
#
|
#
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import difflib
|
||||||
import string
|
import string
|
||||||
import unicodedata
|
import unicodedata
|
||||||
from datetime import timedelta
|
from datetime import timedelta
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import TYPE_CHECKING, Self
|
from typing import TYPE_CHECKING, Final, Self
|
||||||
from uuid import uuid4
|
from uuid import uuid4
|
||||||
|
|
||||||
from django.conf import settings
|
from django.conf import settings
|
||||||
@@ -1344,6 +1345,9 @@ class PageRev(models.Model):
|
|||||||
The content is in PageRev.title and PageRev.content .
|
The content is in PageRev.title and PageRev.content .
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
MERGE_TIME_THRESHOLD: Final[timedelta] = timedelta(minutes=20)
|
||||||
|
MERGE_DIFF_THRESHOLD: Final[float] = 0.2
|
||||||
|
|
||||||
revision = models.IntegerField(_("revision"))
|
revision = models.IntegerField(_("revision"))
|
||||||
title = models.CharField(_("page title"), max_length=255, blank=True)
|
title = models.CharField(_("page title"), max_length=255, blank=True)
|
||||||
content = models.TextField(_("page content"), blank=True)
|
content = models.TextField(_("page content"), blank=True)
|
||||||
@@ -1385,6 +1389,32 @@ class PageRev(models.Model):
|
|||||||
def is_owned_by(self, user: User) -> bool:
|
def is_owned_by(self, user: User) -> bool:
|
||||||
return any(g.id == self.page.owner_group_id for g in user.cached_groups)
|
return any(g.id == self.page.owner_group_id for g in user.cached_groups)
|
||||||
|
|
||||||
|
def similarity_ratio(self, text: str) -> float:
|
||||||
|
"""Similarity ratio between this revision's content and the given text.
|
||||||
|
|
||||||
|
The result is a float in [0; 1], 0 meaning the contents are entirely different,
|
||||||
|
and 1 they are strictly the same.
|
||||||
|
"""
|
||||||
|
# cf. https://docs.python.org/3/library/difflib.html#difflib.SequenceMatcher.ratio
|
||||||
|
return difflib.SequenceMatcher(None, self.content, text).quick_ratio()
|
||||||
|
|
||||||
|
def should_merge(self, other: Self) -> bool:
|
||||||
|
"""Return True if `other` should be merged into `self`, else False.
|
||||||
|
|
||||||
|
It's considered the other revision should be merged into this one if :
|
||||||
|
|
||||||
|
- it was made less than 20 minutes after
|
||||||
|
- by the same author
|
||||||
|
- with a similarity ratio higher than 80%
|
||||||
|
"""
|
||||||
|
return (
|
||||||
|
not self._state.adding # cannot merge if the original rev doesn't exist
|
||||||
|
and self.author == other.author
|
||||||
|
and (other.date - self.date) < self.MERGE_TIME_THRESHOLD
|
||||||
|
and (not other._state.adding or other.revision == self.revision + 1)
|
||||||
|
and self.similarity_ratio(other.content) >= (1 - other.MERGE_DIFF_THRESHOLD)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def get_notification_types():
|
def get_notification_types():
|
||||||
return settings.SITH_NOTIFICATIONS
|
return settings.SITH_NOTIFICATIONS
|
||||||
|
|||||||
@@ -20,9 +20,9 @@
|
|||||||
# Place - Suite 330, Boston, MA 02111-1307, USA.
|
# Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
import difflib
|
|
||||||
import re
|
import re
|
||||||
from datetime import date, datetime, timedelta
|
from copy import copy
|
||||||
|
from datetime import date, datetime
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
|
|
||||||
from captcha.fields import CaptchaField
|
from captcha.fields import CaptchaField
|
||||||
@@ -390,14 +390,11 @@ class PageRevisionForm(forms.ModelForm):
|
|||||||
|
|
||||||
- less than 20 minutes ago
|
- less than 20 minutes ago
|
||||||
- by the same author
|
- by the same author
|
||||||
- with a diff ratio higher than 20%
|
- with a similarity ratio higher than 80%
|
||||||
|
|
||||||
then the latter will be edited and the new revision won't be created.
|
then the latter will be edited and the new revision won't be created.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
TIME_THRESHOLD = timedelta(minutes=20)
|
|
||||||
DIFF_THRESHOLD = 0.2
|
|
||||||
|
|
||||||
class Meta:
|
class Meta:
|
||||||
model = PageRev
|
model = PageRev
|
||||||
fields = ["title", "content"]
|
fields = ["title", "content"]
|
||||||
@@ -409,21 +406,11 @@ class PageRevisionForm(forms.ModelForm):
|
|||||||
super().__init__(*args, instance=instance, **kwargs)
|
super().__init__(*args, instance=instance, **kwargs)
|
||||||
self.author = author
|
self.author = author
|
||||||
self.page = page
|
self.page = page
|
||||||
self.initial_content = instance.content if instance else ""
|
self.initial_obj: PageRev = copy(self.instance)
|
||||||
|
|
||||||
def diff_ratio(self, new_str: str) -> float:
|
|
||||||
return difflib.SequenceMatcher(
|
|
||||||
None, self.initial_content, new_str
|
|
||||||
).quick_ratio()
|
|
||||||
|
|
||||||
def save(self, commit=True): # noqa FBT002
|
def save(self, commit=True): # noqa FBT002
|
||||||
revision: PageRev = self.instance
|
revision: PageRev = self.instance
|
||||||
if (
|
if not self.initial_obj.should_merge(self.instance):
|
||||||
revision._state.adding
|
|
||||||
or revision.author != self.author
|
|
||||||
or revision.date + self.TIME_THRESHOLD < now()
|
|
||||||
or self.diff_ratio(revision.content) < (1 - self.DIFF_THRESHOLD)
|
|
||||||
):
|
|
||||||
revision.author = self.author
|
revision.author = self.author
|
||||||
revision.page = self.page
|
revision.page = self.page
|
||||||
revision.id = None # if id is None, Django will create a new record
|
revision.id = None # if id is None, Django will create a new record
|
||||||
|
|||||||
Reference in New Issue
Block a user