write more extensive documentation

- add documentation to previously documented classes and functions and refactor some of the documented one, in accordance to the PEP257 and ReStructuredText standards ;
- add some type hints ;
- use a NamedTuple for the `Galaxy.compute_users_score` method instead of a raw tuple. Also change a little bit the logic in the function which call the latter ;
- add some additional parameter checks on a few functions ;
- change a little bit the logic of the log level setting for the galaxy related commands.
This commit is contained in:
maréchal
2023-04-20 16:25:55 +02:00
committed by Skia
parent 5b311ba6d4
commit 1f2bbff2d5
4 changed files with 253 additions and 68 deletions

View File

@ -28,7 +28,7 @@ import math
import logging
import time
from typing import List, Tuple, TypedDict
from typing import List, TypedDict, NamedTuple, Union, Optional
from django.db import models
from django.db.models import Q, Case, F, Value, When, Count
@ -43,10 +43,14 @@ from sas.models import Picture
class GalaxyStar(models.Model):
"""
This class defines a star (vertex -> user) in the galaxy graph, storing a reference to its owner citizen, and being
referenced by GalaxyLane.
Define a star (vertex -> user) in the galaxy graph,
storing a reference to its owner citizen.
It also stores the individual mass of this star, used to push it towards the center of the galaxy.
Stars are linked to each others through the :class:`GalaxyLane` model.
Each GalaxyStar has a mass which push it towards the center of the galaxy.
This mass is proportional to the number of pictures the owner of the star
is tagged on.
"""
owner = models.ForeignKey(
@ -72,7 +76,14 @@ class GalaxyStar(models.Model):
@property
def current_star(self):
def current_star(self) -> Optional[GalaxyStar]:
"""
The star of this user in the :class:`Galaxy`.
Only take into account the most recent active galaxy.
:return: The star of this user if there is an active Galaxy
and this user is a citizen of it, else ``None``
"""
return self.stars.filter(galaxy=Galaxy.get_current_galaxy()).last()
@ -82,7 +93,8 @@ setattr(User, "current_star", current_star)
class GalaxyLane(models.Model):
"""
This class defines a lane (edge -> link between galaxy citizen) in the galaxy map, storing a reference to both its
Define a lane (edge -> link between galaxy citizen)
in the galaxy map, storing a reference to both its
ends and the distance it covers.
Score details between citizen owning the stars is also stored here.
"""
@ -129,7 +141,35 @@ class GalaxyDict(TypedDict):
links: List
class RelationScore(NamedTuple):
family: int
pictures: int
clubs: int
class Galaxy(models.Model):
"""
The Galaxy, a graph linking the active users between each others.
The distance between two users is given by a relation score which takes
into account a few parameter like the number of pictures they are both tagged on,
the time during which they were in the same clubs and whether they are
in the same family.
The citizens of the Galaxy are represented by :class:`GalaxyStar`
and their relations by :class:`GalaxyLane`.
Several galaxies can coexist. In this case, only the most recent active one
shall usually be taken into account.
This is useful to keep the current galaxy while generating a new one
and swapping them only at the very end.
Please take into account that generating the galaxy is a very expensive
operation. For this reason, try not to call the :meth:`rule` method more
than once a day in production.
To quickly access to the state of a galaxy, use the :attr:`state` attribute.
"""
logger = logging.getLogger("main")
GALAXY_SCALE_FACTOR = 2_000
@ -162,17 +202,19 @@ class Galaxy(models.Model):
###################
@classmethod
def compute_user_score(cls, user) -> int:
def compute_user_score(cls, user: User) -> int:
"""
This compute an individual score for each citizen. It will later be used by the graph algorithm to push
Compute an individual score for each citizen.
It will later be used by the graph algorithm to push
higher scores towards the center of the galaxy.
Idea: This could be added to the computation:
- Forum posts
- Picture count
- Counter consumption
- Barman time
- ...
- Forum posts
- Picture count
- Counter consumption
- Barman time
- ...
"""
user_score = 1
user_score += cls.query_user_score(user)
@ -187,7 +229,11 @@ class Galaxy(models.Model):
return user_score
@classmethod
def query_user_score(cls, user) -> int:
def query_user_score(cls, user: User) -> int:
"""
Perform the db query to get the individual score
of the given user in the galaxy.
"""
score_query = (
User.objects.filter(id=user.id)
.annotate(
@ -214,26 +260,48 @@ class Galaxy(models.Model):
####################
@classmethod
def compute_users_score(cls, user1, user2) -> Tuple[int, int, int, int]:
def compute_users_score(cls, user1: User, user2: User) -> RelationScore:
"""
Compute the relationship scores of the two given users
in the following fields :
- family: if they have some godfather/godchild relation
- pictures: in how many pictures are both tagged
- clubs: during how many days they were members of the same clubs
"""
family = cls.compute_users_family_score(user1, user2)
pictures = cls.compute_users_pictures_score(user1, user2)
clubs = cls.compute_users_clubs_score(user1, user2)
score = family + pictures + clubs
return score, family, pictures, clubs
return RelationScore(family=family, pictures=pictures, clubs=clubs)
@classmethod
def compute_users_family_score(cls, user1, user2) -> int:
def compute_users_family_score(cls, user1: User, user2: User) -> int:
"""
Compute the family score of the relation between the given users.
This takes into account mutual godfathers.
:return: 366 if user1 is the godfather of user2 (or vice versa) else 0
"""
link_count = User.objects.filter(
Q(id=user1.id, godfathers=user2) | Q(id=user2.id, godfathers=user1)
).count()
if link_count:
if link_count > 0:
cls.logger.debug(
f"\t\t- '{user1}' and '{user2}' have {link_count} direct family link"
)
return link_count * cls.FAMILY_LINK_POINTS
@classmethod
def compute_users_pictures_score(cls, user1, user2) -> int:
def compute_users_pictures_score(cls, user1: User, user2: User) -> int:
"""
Compute the pictures score of the relation between the given users.
The pictures score is obtained by counting the number
of :class:`Picture` in which they have been both identified.
This score is then multiplied by 2.
:return: The number of pictures both users have in common, times 2
"""
picture_count = (
Picture.objects.filter(people__user__in=(user1,))
.filter(people__user__in=(user2,))
@ -246,7 +314,21 @@ class Galaxy(models.Model):
return picture_count * cls.PICTURE_POINTS
@classmethod
def compute_users_clubs_score(cls, user1, user2) -> int:
def compute_users_clubs_score(cls, user1: User, user2: User) -> int:
"""
Compute the clubs score of the relation between the given users.
The club score is obtained by counting the number of days
during which the memberships (see :class:`club.models.Membership`)
of both users overlapped.
For example, if user1 was a member of Unitec from 01/01/2020 to 31/12/2021
(two years) and user2 was a member of the same club from 01/01/2021 to
31/12/2022 (also two years, but with an offset of one year), then their
club score is 365.
:return: the number of days during which both users were in the same club
"""
common_clubs = Club.objects.filter(members__in=user1.memberships.all()).filter(
members__in=user2.memberships.all()
)
@ -256,6 +338,7 @@ class Galaxy(models.Model):
score = 0
for user1_membership in user1_memberships:
if user1_membership.end_date is None:
# user1_membership.save() is not called in this function, hence this is safe
user1_membership.end_date = timezone.now().date()
query = Q( # start2 <= start1 <= end2
start_date__lte=user1_membership.start_date,
@ -296,7 +379,14 @@ class Galaxy(models.Model):
###################
@classmethod
def scale_distance(cls, value) -> int:
def scale_distance(cls, value: Union[int, float]) -> int:
"""
Given a numeric value, return a scaled value which can
be used in the Galaxy's graphical interface to set the distance
between two stars
:return: the scaled value usable in the Galaxy's 3d graph
"""
# TODO: this will need adjustements with the real, typical data on Taiste
if value == 0:
return 4000 # Following calculus would give us +∞, we cap it to 4000
@ -319,13 +409,22 @@ class Galaxy(models.Model):
def rule(self, picture_count_threshold=10) -> None:
"""
This is the main function of the Galaxy.
It iterates over all the rulable users to promote them to citizen, which is a user that has a corresponding star in the Galaxy.
It also builds up the lanes, which are the links between the different citizen.
Main function of the Galaxy.
Iterate over all the rulable users to promote them to citizens.
A citizen is a user who has a corresponding star in the Galaxy.
Also build up the lanes, which are the links between the different citizen.
Rulable users are defined with the `picture_count_threshold`: any user that doesn't match that limit won't be
considered to be promoted to citizen. This very effectively limits the quantity of computing to do, and only includes
users that have had a minimum of activity.
Users who can be ruled are defined with the `picture_count_threshold`:
all users who are identified in a strictly lower number of pictures
won't be promoted to citizens.
This does very effectively limit the quantity of computing to do
and only includes users who have had a minimum of activity.
This method still remains very expensive, so think thoroughly before
you call it, especially in production.
:param picture_count_threshold: the minimum number of picture to have to be
included in the galaxy
"""
total_time = time.time()
self.logger.info("Listing rulable citizen.")
@ -387,19 +486,17 @@ class Galaxy(models.Model):
star2 = stars[user2.id]
users_score, family, pictures, clubs = Galaxy.compute_users_score(
user1, user2
)
distance = self.scale_distance(users_score)
score = Galaxy.compute_users_score(user1, user2)
distance = self.scale_distance(sum(score))
if distance < 30: # TODO: this needs tuning with real-world data
lanes.append(
GalaxyLane(
star1=star1,
star2=star2,
distance=distance,
family=family,
pictures=pictures,
clubs=clubs,
family=score.family,
pictures=score.pictures,
clubs=score.clubs,
)
)