add diff ratio to the heuristic

This commit is contained in:
imperosol
2025-11-11 14:16:43 +01:00
parent ede15623df
commit c0ed5bd393
2 changed files with 27 additions and 9 deletions

View File

@@ -43,15 +43,17 @@ class TestEditPage:
user = baker.make(User, is_superuser=True)
page = baker.prepare(Page)
page.save(force_lock=True)
first_rev = baker.make(PageRev, author=user, page=page, date=now())
first_rev = baker.make(
PageRev, author=user, page=page, date=now(), content="Hello World"
)
client.force_login(user)
url = reverse("core:page_edit", kwargs={"page_name": page._full_name})
client.post(url, data={"content": "Hello World"})
client.post(url, data={"content": "Hello World!"})
assert page.revisions.count() == 1
assert page.revisions.last() == first_rev
first_rev.refresh_from_db()
assert first_rev.author == user
assert first_rev.content == "Hello World"
assert first_rev.content == "Hello World!"
def test_pagerev_not_reused(self, client):
"""Test that a new revision is created if too much time
@@ -71,6 +73,7 @@ class TestEditPage:
@pytest.mark.django_db
def test_page_revision(client: Client):
"""Test the GET to request to a specific revision page."""
page = baker.prepare(Page)
page.save(force_lock=True)
page.view_groups.add(settings.SITH_GROUP_SUBSCRIBERS_ID)

View File

@@ -20,6 +20,7 @@
# Place - Suite 330, Boston, MA 02111-1307, USA.
#
#
import difflib
import re
from datetime import date, datetime, timedelta
from io import BytesIO
@@ -385,29 +386,43 @@ class PageRevisionForm(forms.ModelForm):
Notes:
Saving this form won't always result in a new revision.
If the previous revision on the same page was made less
than 20 minutes ago by the same author,
If the previous revision on the same page was made :
- less than 20 minutes ago
- by the same author
- with a diff ratio higher than 20%
then the latter will be edited and the new revision won't be created.
"""
UPDATE_THRESHOLD = timedelta(minutes=20)
TIME_THRESHOLD = timedelta(minutes=20)
DIFF_THRESHOLD = 0.2
class Meta:
model = PageRev
fields = ["title", "content"]
widgets = {"content": MarkdownInput}
def __init__(self, *args, author: User, page: Page, **kwargs):
super().__init__(*args, **kwargs)
def __init__(
self, *args, author: User, page: Page, instance: PageRev | None = None, **kwargs
):
super().__init__(*args, instance=instance, **kwargs)
self.author = author
self.page = page
self.initial_content = instance.content if instance else ""
def diff_ratio(self, new_str: str) -> float:
return difflib.SequenceMatcher(
None, self.initial_content, new_str
).quick_ratio()
def save(self, commit=True): # noqa FBT002
revision: PageRev = self.instance
if (
revision._state.adding
or revision.author != self.author
or revision.date + self.UPDATE_THRESHOLD < now()
or revision.date + self.TIME_THRESHOLD < now()
or self.diff_ratio(revision.content) < (1 - self.DIFF_THRESHOLD)
):
revision.author = self.author
revision.page = self.page