diff --git a/galaxy/management/commands/generate_galaxy_test_data.py b/galaxy/management/commands/generate_galaxy_test_data.py index f66bea1e..f6442487 100644 --- a/galaxy/management/commands/generate_galaxy_test_data.py +++ b/galaxy/management/commands/generate_galaxy_test_data.py @@ -21,6 +21,8 @@ # Place - Suite 330, Boston, MA 02111-1307, USA. # # +import warnings +from typing import Final, Optional from django.conf import settings from django.core.files.base import ContentFile @@ -37,18 +39,29 @@ from subscription.models import Subscription from sas.models import Album, Picture, PeoplePictureRelation -RED_PIXEL_PNG = b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52" -RED_PIXEL_PNG += b"\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90\x77\x53" -RED_PIXEL_PNG += b"\xde\x00\x00\x00\x0c\x49\x44\x41\x54\x08\xd7\x63\xf8\xcf\xc0\x00" -RED_PIXEL_PNG += b"\x00\x03\x01\x01\x00\x18\xdd\x8d\xb0\x00\x00\x00\x00\x49\x45\x4e" -RED_PIXEL_PNG += b"\x44\xae\x42\x60\x82" +RED_PIXEL_PNG: Final[bytes] = ( + b"\x89\x50\x4e\x47\x0d\x0a\x1a\x0a\x00\x00\x00\x0d\x49\x48\x44\x52" + b"\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90\x77\x53" + b"\xde\x00\x00\x00\x0c\x49\x44\x41\x54\x08\xd7\x63\xf8\xcf\xc0\x00" + b"\x00\x03\x01\x01\x00\x18\xdd\x8d\xb0\x00\x00\x00\x00\x49\x45\x4e" + b"\x44\xae\x42\x60\x82" +) -USER_PACK_SIZE = 1000 +USER_PACK_SIZE: Final[int] = 1000 class Command(BaseCommand): help = "Procedurally generate representative data for developing the Galaxy" + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.now = timezone.now().replace(hour=12) + + self.users: Optional[list[User]] = None + self.clubs: Optional[list[Club]] = None + self.picts: Optional[list[Picture]] = None + self.pictures_tags: Optional[list[PeoplePictureRelation]] = None + def add_arguments(self, parser): parser.add_argument( "--user-pack-count", @@ -62,12 +75,15 @@ class Command(BaseCommand): def handle(self, *args, **options): self.logger = logging.getLogger("main") - if options["verbosity"] > 1: + if options["verbosity"] < 0 or 2 < options["verbosity"]: + warnings.warn("verbosity level should be between 0 and 2 included") + + if options["verbosity"] == 2: self.logger.setLevel(logging.DEBUG) - elif options["verbosity"] > 0: + elif options["verbosity"] == 1: self.logger.setLevel(logging.INFO) else: - self.logger.setLevel(logging.NOTSET) + self.logger.setLevel(logging.ERROR) self.logger.info("The Galaxy is being populated by the Sith.") @@ -83,7 +99,6 @@ class Command(BaseCommand): self.NB_USERS = options["user_pack_count"] * USER_PACK_SIZE self.NB_CLUBS = options["club_count"] - self.now = timezone.now().replace(hour=12) root = User.objects.filter(username="root").first() sas = SithFile.objects.get(id=settings.SITH_SAS_ROOT_DIR_ID) self.galaxy_album = Album.objects.create( @@ -105,7 +120,12 @@ class Command(BaseCommand): self.make_important_citizen(u) def make_clubs(self): - """This will create all the clubs and store them in self.clubs for fast access later""" + """ + Create all the clubs (:class:`club.models.Club`) + and store them in `self.clubs` for fast access later. + Don't create the meta groups (:class:`core.models.MetaGroup`) + nor the pages of the clubs (:class:`core.models.Page`) + """ self.clubs = [] for i in range(self.NB_CLUBS): self.clubs.append(Club(unix_name=f"galaxy-club-{i}", name=f"club-{i}")) @@ -114,7 +134,11 @@ class Command(BaseCommand): self.clubs = Club.objects.filter(unix_name__startswith="galaxy-").all() def make_users(self): - """This will create all the users and store them in self.users for fast access later""" + """ + Create all the users and store them in `self.users` for fast access later. + + Also create a subscription for all the generated users. + """ self.users = [] for i in range(self.NB_USERS): u = User( @@ -128,6 +152,7 @@ class Command(BaseCommand): User.objects.bulk_create(self.users) self.users = User.objects.filter(username__startswith="galaxy-").all() + # now that users are created, create their subscription subs = [] for i in range(self.NB_USERS): u = self.users[i] @@ -145,10 +170,19 @@ class Command(BaseCommand): def make_families(self): """ + Generate the godfather/godchild relations for the users contained in :attr:`self.users`. + + The :meth:`make_users` method must have been called beforehand. + This will iterate on all citizen after the 200th. - Then it will take 14 other citizen among the 200 preceding (godfathers are usually older), and apply another - heuristic to determine if they should have a family link + Then it will take 14 other citizen among the previous 200 + (godfathers are usually older), and apply another + heuristic to determine whether they should have a family link """ + if self.users is None: + raise RuntimeError( + "The `make_users()` method must be called before `make_families()`" + ) for i in range(200, self.NB_USERS): godfathers = [] for j in range(i - 200, i, 14): # this will loop 14 times (14² = 196) @@ -161,11 +195,25 @@ class Command(BaseCommand): def make_club_memberships(self): """ - This function makes multiple passes on all users to affect them some pseudo-random roles in some clubs. + Assign users to clubs and give them a role in a pseudo-random way. + + The :meth:`make_users` and :meth:`make_clubs` methods + must have been called beforehand. + + Work by making multiples passes on all users to affect + them some pseudo-random roles in some clubs. The multiple passes are useful to get some variations over who goes where. - Each pass for each user has a chance to affect her to two different clubs, increasing a bit more the created - chaos, while remaining purely deterministic. + Each pass for each user has a chance to affect her to two different clubs, + increasing a bit more the created chaos, while remaining purely deterministic. """ + if self.users is None: + raise RuntimeError( + "The `make_users()` method must be called before `make_club_memberships()`" + ) + if self.clubs is None: + raise RuntimeError( + "The `make_clubs()` method must be called before `make_club_memberships()`" + ) memberships = [] for i in range(1, 11): # users can be in up to 20 clubs self.logger.info(f"Club membership, pass {i}") @@ -217,7 +265,15 @@ class Command(BaseCommand): Membership.objects.bulk_create(memberships) def make_pictures(self): - """This function creates pictures for users to be tagged on later""" + """ + Create pictures for users to be tagged on later. + + The :meth:`make_users` method must have been called beforehand. + """ + if self.users is None: + raise RuntimeError( + "The `make_users()` method must be called before `make_families()`" + ) self.picts = [] # Create twice as many pictures as users for i in range(self.NB_USERS * 2): @@ -246,8 +302,10 @@ class Command(BaseCommand): def make_pictures_memberships(self): """ - This assigns users to pictures, and makes enough of them for our created users to be eligible for promotion as citizen. - See galaxy.models.Galaxy.rule for details on promotion to citizen. + Assign users to pictures and make enough of them for our + created users to be eligible for promotion as citizen. + + See :meth:`galaxy.models.Galaxy.rule` for details on promotion to citizen. """ self.pictures_tags = [] @@ -304,10 +362,20 @@ class Command(BaseCommand): _tag_neighbors(uid, 4, self.NB_USERS, 110) PeoplePictureRelation.objects.bulk_create(self.pictures_tags) - def make_important_citizen(self, uid): + def make_important_citizen(self, uid: int): """ - This will make the user passed in `uid` a more important citizen, that will thus trigger many more connections - to other (lanes) and be dragged towards the center of the Galaxy. + Make the user whose uid is given in parameter a more important citizen, + thus triggering many more connections to others (lanes) + and dragging him towards the center of the Galaxy. + + This promotion is obtained by adding more family links + and by tagging the user in more pictures. + + The users chosen to be added to this user's family shall + also be tagged in more pictures, thus making them also + more important. + + :param uid: the id of the user to make more important """ u1 = self.users[uid] u2 = self.users[uid - 100] diff --git a/galaxy/management/commands/rule_galaxy.py b/galaxy/management/commands/rule_galaxy.py index 5863c903..55cb9ae9 100644 --- a/galaxy/management/commands/rule_galaxy.py +++ b/galaxy/management/commands/rule_galaxy.py @@ -21,6 +21,7 @@ # Place - Suite 330, Boston, MA 02111-1307, USA. # # +import warnings from django.core.management.base import BaseCommand from django.db import connection @@ -41,12 +42,15 @@ class Command(BaseCommand): def handle(self, *args, **options): logger = logging.getLogger("main") - if options["verbosity"] > 1: + if options["verbosity"] < 0 or 2 < options["verbosity"]: + warnings.warn("verbosity level should be between 0 and 2 included") + + if options["verbosity"] == 2: logger.setLevel(logging.DEBUG) - elif options["verbosity"] > 0: + elif options["verbosity"] == 1: logger.setLevel(logging.INFO) else: - logger.setLevel(logging.NOTSET) + logger.setLevel(logging.ERROR) logger.info("The Galaxy is being ruled by the Sith.") galaxy = Galaxy.objects.create() diff --git a/galaxy/models.py b/galaxy/models.py index fafd0905..744cca79 100644 --- a/galaxy/models.py +++ b/galaxy/models.py @@ -28,7 +28,7 @@ import math import logging import time -from typing import List, Tuple, TypedDict +from typing import List, TypedDict, NamedTuple, Union, Optional from django.db import models from django.db.models import Q, Case, F, Value, When, Count @@ -43,10 +43,14 @@ from sas.models import Picture class GalaxyStar(models.Model): """ - This class defines a star (vertex -> user) in the galaxy graph, storing a reference to its owner citizen, and being - referenced by GalaxyLane. + Define a star (vertex -> user) in the galaxy graph, + storing a reference to its owner citizen. - It also stores the individual mass of this star, used to push it towards the center of the galaxy. + Stars are linked to each others through the :class:`GalaxyLane` model. + + Each GalaxyStar has a mass which push it towards the center of the galaxy. + This mass is proportional to the number of pictures the owner of the star + is tagged on. """ owner = models.ForeignKey( @@ -72,7 +76,14 @@ class GalaxyStar(models.Model): @property -def current_star(self): +def current_star(self) -> Optional[GalaxyStar]: + """ + The star of this user in the :class:`Galaxy`. + Only take into account the most recent active galaxy. + + :return: The star of this user if there is an active Galaxy + and this user is a citizen of it, else ``None`` + """ return self.stars.filter(galaxy=Galaxy.get_current_galaxy()).last() @@ -82,7 +93,8 @@ setattr(User, "current_star", current_star) class GalaxyLane(models.Model): """ - This class defines a lane (edge -> link between galaxy citizen) in the galaxy map, storing a reference to both its + Define a lane (edge -> link between galaxy citizen) + in the galaxy map, storing a reference to both its ends and the distance it covers. Score details between citizen owning the stars is also stored here. """ @@ -129,7 +141,35 @@ class GalaxyDict(TypedDict): links: List +class RelationScore(NamedTuple): + family: int + pictures: int + clubs: int + + class Galaxy(models.Model): + """ + The Galaxy, a graph linking the active users between each others. + The distance between two users is given by a relation score which takes + into account a few parameter like the number of pictures they are both tagged on, + the time during which they were in the same clubs and whether they are + in the same family. + + The citizens of the Galaxy are represented by :class:`GalaxyStar` + and their relations by :class:`GalaxyLane`. + + Several galaxies can coexist. In this case, only the most recent active one + shall usually be taken into account. + This is useful to keep the current galaxy while generating a new one + and swapping them only at the very end. + + Please take into account that generating the galaxy is a very expensive + operation. For this reason, try not to call the :meth:`rule` method more + than once a day in production. + + To quickly access to the state of a galaxy, use the :attr:`state` attribute. + """ + logger = logging.getLogger("main") GALAXY_SCALE_FACTOR = 2_000 @@ -162,17 +202,19 @@ class Galaxy(models.Model): ################### @classmethod - def compute_user_score(cls, user) -> int: + def compute_user_score(cls, user: User) -> int: """ - This compute an individual score for each citizen. It will later be used by the graph algorithm to push + Compute an individual score for each citizen. + It will later be used by the graph algorithm to push higher scores towards the center of the galaxy. Idea: This could be added to the computation: - - Forum posts - - Picture count - - Counter consumption - - Barman time - - ... + + - Forum posts + - Picture count + - Counter consumption + - Barman time + - ... """ user_score = 1 user_score += cls.query_user_score(user) @@ -187,7 +229,11 @@ class Galaxy(models.Model): return user_score @classmethod - def query_user_score(cls, user) -> int: + def query_user_score(cls, user: User) -> int: + """ + Perform the db query to get the individual score + of the given user in the galaxy. + """ score_query = ( User.objects.filter(id=user.id) .annotate( @@ -214,26 +260,48 @@ class Galaxy(models.Model): #################### @classmethod - def compute_users_score(cls, user1, user2) -> Tuple[int, int, int, int]: + def compute_users_score(cls, user1: User, user2: User) -> RelationScore: + """ + Compute the relationship scores of the two given users + in the following fields : + + - family: if they have some godfather/godchild relation + - pictures: in how many pictures are both tagged + - clubs: during how many days they were members of the same clubs + """ family = cls.compute_users_family_score(user1, user2) pictures = cls.compute_users_pictures_score(user1, user2) clubs = cls.compute_users_clubs_score(user1, user2) - score = family + pictures + clubs - return score, family, pictures, clubs + return RelationScore(family=family, pictures=pictures, clubs=clubs) @classmethod - def compute_users_family_score(cls, user1, user2) -> int: + def compute_users_family_score(cls, user1: User, user2: User) -> int: + """ + Compute the family score of the relation between the given users. + This takes into account mutual godfathers. + + :return: 366 if user1 is the godfather of user2 (or vice versa) else 0 + """ link_count = User.objects.filter( Q(id=user1.id, godfathers=user2) | Q(id=user2.id, godfathers=user1) ).count() - if link_count: + if link_count > 0: cls.logger.debug( f"\t\t- '{user1}' and '{user2}' have {link_count} direct family link" ) return link_count * cls.FAMILY_LINK_POINTS @classmethod - def compute_users_pictures_score(cls, user1, user2) -> int: + def compute_users_pictures_score(cls, user1: User, user2: User) -> int: + """ + Compute the pictures score of the relation between the given users. + + The pictures score is obtained by counting the number + of :class:`Picture` in which they have been both identified. + This score is then multiplied by 2. + + :return: The number of pictures both users have in common, times 2 + """ picture_count = ( Picture.objects.filter(people__user__in=(user1,)) .filter(people__user__in=(user2,)) @@ -246,7 +314,21 @@ class Galaxy(models.Model): return picture_count * cls.PICTURE_POINTS @classmethod - def compute_users_clubs_score(cls, user1, user2) -> int: + def compute_users_clubs_score(cls, user1: User, user2: User) -> int: + """ + Compute the clubs score of the relation between the given users. + + The club score is obtained by counting the number of days + during which the memberships (see :class:`club.models.Membership`) + of both users overlapped. + + For example, if user1 was a member of Unitec from 01/01/2020 to 31/12/2021 + (two years) and user2 was a member of the same club from 01/01/2021 to + 31/12/2022 (also two years, but with an offset of one year), then their + club score is 365. + + :return: the number of days during which both users were in the same club + """ common_clubs = Club.objects.filter(members__in=user1.memberships.all()).filter( members__in=user2.memberships.all() ) @@ -256,6 +338,7 @@ class Galaxy(models.Model): score = 0 for user1_membership in user1_memberships: if user1_membership.end_date is None: + # user1_membership.save() is not called in this function, hence this is safe user1_membership.end_date = timezone.now().date() query = Q( # start2 <= start1 <= end2 start_date__lte=user1_membership.start_date, @@ -296,7 +379,14 @@ class Galaxy(models.Model): ################### @classmethod - def scale_distance(cls, value) -> int: + def scale_distance(cls, value: Union[int, float]) -> int: + """ + Given a numeric value, return a scaled value which can + be used in the Galaxy's graphical interface to set the distance + between two stars + + :return: the scaled value usable in the Galaxy's 3d graph + """ # TODO: this will need adjustements with the real, typical data on Taiste if value == 0: return 4000 # Following calculus would give us +∞, we cap it to 4000 @@ -319,13 +409,22 @@ class Galaxy(models.Model): def rule(self, picture_count_threshold=10) -> None: """ - This is the main function of the Galaxy. - It iterates over all the rulable users to promote them to citizen, which is a user that has a corresponding star in the Galaxy. - It also builds up the lanes, which are the links between the different citizen. + Main function of the Galaxy. + Iterate over all the rulable users to promote them to citizens. + A citizen is a user who has a corresponding star in the Galaxy. + Also build up the lanes, which are the links between the different citizen. - Rulable users are defined with the `picture_count_threshold`: any user that doesn't match that limit won't be - considered to be promoted to citizen. This very effectively limits the quantity of computing to do, and only includes - users that have had a minimum of activity. + Users who can be ruled are defined with the `picture_count_threshold`: + all users who are identified in a strictly lower number of pictures + won't be promoted to citizens. + This does very effectively limit the quantity of computing to do + and only includes users who have had a minimum of activity. + + This method still remains very expensive, so think thoroughly before + you call it, especially in production. + + :param picture_count_threshold: the minimum number of picture to have to be + included in the galaxy """ total_time = time.time() self.logger.info("Listing rulable citizen.") @@ -387,19 +486,17 @@ class Galaxy(models.Model): star2 = stars[user2.id] - users_score, family, pictures, clubs = Galaxy.compute_users_score( - user1, user2 - ) - distance = self.scale_distance(users_score) + score = Galaxy.compute_users_score(user1, user2) + distance = self.scale_distance(sum(score)) if distance < 30: # TODO: this needs tuning with real-world data lanes.append( GalaxyLane( star1=star1, star2=star2, distance=distance, - family=family, - pictures=pictures, - clubs=clubs, + family=score.family, + pictures=score.pictures, + clubs=score.clubs, ) ) diff --git a/galaxy/tests.py b/galaxy/tests.py index 70196561..400dee63 100644 --- a/galaxy/tests.py +++ b/galaxy/tests.py @@ -46,6 +46,9 @@ class GalaxyTest(TestCase): self.com = User.objects.get(username="comunity") def test_user_self_score(self): + """ + Test that individual user scores are correct + """ with self.assertNumQueries(8): self.assertEqual(Galaxy.compute_user_score(self.root), 9) self.assertEqual(Galaxy.compute_user_score(self.skia), 10) @@ -57,6 +60,10 @@ class GalaxyTest(TestCase): self.assertEqual(Galaxy.compute_user_score(self.com), 1) def test_users_score(self): + """ + Test on the default dataset generated by the `populate` command + that the relation scores are correct + """ expected_scores = { "krophil": { "comunity": {"clubs": 0, "family": 0, "pictures": 0, "score": 0}, @@ -117,15 +124,13 @@ class GalaxyTest(TestCase): while len(users) > 0: user1 = users.pop(0) for user2 in users: - score, family, pictures, clubs = Galaxy.compute_users_score( - user1, user2 - ) + score = Galaxy.compute_users_score(user1, user2) u1 = computed_scores.get(user1.username, {}) u1[user2.username] = { - "score": score, - "family": family, - "pictures": pictures, - "clubs": clubs, + "score": sum(score), + "family": score.family, + "pictures": score.pictures, + "clubs": score.clubs, } computed_scores[user1.username] = u1 @@ -133,6 +138,9 @@ class GalaxyTest(TestCase): self.assertDictEqual(expected_scores, computed_scores) def test_page_is_citizen(self): + """ + Test that users can access the galaxy page of users who are citizens + """ with self.assertNumQueries(59): galaxy = Galaxy.objects.create() galaxy.rule(0) # We want all users here @@ -145,6 +153,10 @@ class GalaxyTest(TestCase): ) def test_page_not_citizen(self): + """ + Test that trying to access the galaxy page of a user who is not + citizens return a 404 + """ galaxy = Galaxy.objects.create() galaxy.rule(0) # We want all users here self.client.login(username="root", password="plop") @@ -152,6 +164,10 @@ class GalaxyTest(TestCase): self.assertEquals(response.status_code, 404) def test_full_galaxy_state(self): + """ + Test on the more complex dataset generated by the `generate_galaxy_test_data` + command that the relation scores are correct + """ call_command("generate_galaxy_test_data", "-v", "0") galaxy = Galaxy.objects.create() galaxy.rule(26) # We want a fast test