galaxy: big refactor

Main changes:
  - Multiple Galaxy objects can now exist at the same time in DB. This allows for ruling a new galaxy while still
    displaying the old one.
  - The criteria to quickly know whether a user is a possible citizen is now a simple query on picture count. This
    avoids a very complicated query to database, that could often result in huge working memory load. With this change,
    it should be possible to run the galaxy even on a vanilla Postgres that didn't receive fine tuning for the Sith's
    galaxy.
This commit is contained in:
Skia 2023-04-14 17:33:11 +02:00
parent 1aa3bb8cc4
commit 31093fff43
7 changed files with 339 additions and 196 deletions

View File

@ -49,11 +49,10 @@ class Command(BaseCommand):
logger.setLevel(logging.NOTSET)
logger.info("The Galaxy is being ruled by the Sith.")
Galaxy.rule()
logger.info(
"Caching current Galaxy state for a quicker display of the Empire's power."
)
Galaxy.make_state()
galaxy = Galaxy.objects.create()
galaxy.rule()
logger.info("Sending old galaxies' remains to garbage.")
Galaxy.objects.filter(state__isnull=True).delete()
logger.info("Ruled the galaxy in {} queries.".format(len(connection.queries)))
if options["verbosity"] > 2:

View File

@ -0,0 +1,45 @@
# Generated by Django 3.2.16 on 2023-04-12 09:30
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
class Migration(migrations.Migration):
dependencies = [
migrations.swappable_dependency(settings.AUTH_USER_MODEL),
("galaxy", "0001_initial"),
]
operations = [
migrations.AlterModelOptions(
name="galaxy",
options={"ordering": ["pk"]},
),
migrations.AddField(
model_name="galaxystar",
name="galaxy",
field=models.ForeignKey(
null=True,
on_delete=django.db.models.deletion.CASCADE,
related_name="stars",
to="galaxy.galaxy",
verbose_name="the galaxy this star belongs to",
),
),
migrations.AlterField(
model_name="galaxy",
name="state",
field=models.JSONField(null=True, verbose_name="The galaxy current state"),
),
migrations.AlterField(
model_name="galaxystar",
name="owner",
field=models.ForeignKey(
on_delete=django.db.models.deletion.CASCADE,
related_name="stars",
to=settings.AUTH_USER_MODEL,
verbose_name="star owner",
),
),
]

View File

@ -22,17 +22,19 @@
#
#
from __future__ import annotations
import math
import logging
import time
from typing import Tuple
from typing import List, Tuple, TypedDict
from django.db import models
from django.db.models import Q, Case, F, Value, When, Count
from django.db.models.functions import Concat
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
from typing import List, TypedDict
from core.models import User
from club.models import Club
@ -47,21 +49,37 @@ class GalaxyStar(models.Model):
It also stores the individual mass of this star, used to push it towards the center of the galaxy.
"""
owner = models.OneToOneField(
owner = models.ForeignKey(
User,
verbose_name=_("star owner"),
related_name="galaxy_user",
related_name="stars",
on_delete=models.CASCADE,
)
mass = models.PositiveIntegerField(
_("star mass"),
default=0,
)
galaxy = models.ForeignKey(
"Galaxy",
verbose_name=_("the galaxy this star belongs to"),
related_name="stars",
on_delete=models.CASCADE,
null=True,
)
def __str__(self):
return str(self.owner)
@property
def current_star(self):
return self.stars.filter(galaxy=Galaxy.get_current_galaxy()).last()
# Adding a shortcut to User class for getting its star belonging to the latest ruled Galaxy
setattr(User, "current_star", current_star)
class GalaxyLane(models.Model):
"""
This class defines a lane (edge -> link between galaxy citizen) in the galaxy map, storing a reference to both its
@ -119,60 +137,25 @@ class Galaxy(models.Model):
PICTURE_POINTS = 2 # Equivalent to two days as random members of a club.
CLUBS_POINTS = 1 # One day together as random members in a club is one point.
state = models.JSONField("current state")
state = models.JSONField(_("The galaxy current state"), null=True)
@staticmethod
def make_state() -> None:
"""
Compute JSON structure to send to 3d-force-graph: https://github.com/vasturiano/3d-force-graph/
"""
without_nickname = Concat(
F("owner__first_name"), Value(" "), F("owner__last_name")
)
with_nickname = Concat(
F("owner__first_name"),
Value(" "),
F("owner__last_name"),
Value(" ("),
F("owner__nick_name"),
Value(")"),
)
stars = GalaxyStar.objects.annotate(
owner_name=Case(
When(owner__nick_name=None, then=without_nickname),
default=with_nickname,
)
)
lanes = GalaxyLane.objects.annotate(
star1_owner=F("star1__owner__id"),
star2_owner=F("star2__owner__id"),
)
json = GalaxyDict(
nodes=[
StarDict(id=star.owner_id, name=star.owner_name, mass=star.mass)
for star in stars
],
links=[],
)
# Make bidirectional links
# TODO: see if this impacts performance with a big graph
for path in lanes:
json["links"].append(
{
"source": path.star1_owner,
"target": path.star2_owner,
"value": path.distance,
}
)
json["links"].append(
{
"source": path.star2_owner,
"target": path.star1_owner,
"value": path.distance,
}
)
Galaxy.objects.all().delete()
Galaxy(state=json).save()
class Meta:
ordering = ["pk"]
def __str__(self):
stars_count = self.stars.count()
s = f"GLX-ID{self.pk}-SC{stars_count}-"
if self.state is None:
s += "CHS" # CHAOS
else:
s += "RLD" # RULED
return s
@classmethod
def get_current_galaxy(
cls,
) -> Galaxy: # __future__.annotations is required for this
return Galaxy.objects.filter(state__isnull=False).last()
###################
# User self score #
@ -312,98 +295,11 @@ class Galaxy(models.Model):
# Rule the galaxy #
###################
@classmethod
def rule(cls) -> None:
cls.logger.info("Eradicating previous Galaxy.")
GalaxyStar.objects.all().delete()
# The following is a no-op thanks to cascading, but in case that changes in the future, better keep it anyway.
GalaxyLane.objects.all().delete()
cls.logger.info("Galaxy has been purged.")
cls.logger.info("Listing rulable citizen.")
rulable_users = (
User.objects.filter(subscriptions__isnull=False)
.filter(
Q(godchildren__isnull=False)
| Q(godfathers__isnull=False)
| Q(pictures__isnull=False)
| Q(memberships__isnull=False)
)
.distinct()
)
# force fetch of the whole query to make sure there won't
# be any more db hits
# this is memory expensive but prevents a lot of db hits, therefore
# is far more time efficient
rulable_users = list(rulable_users)
rulable_users_count = len(rulable_users)
user1_count = 0
cls.logger.info(
f"{rulable_users_count} citizen have been listed. Starting to rule."
)
# Display current speed every $speed_count_frequency users
speed_count_frequency = 100
avg_speed = 0
avg_speed_count = 0
while len(rulable_users) > 0:
user1 = rulable_users.pop()
user1_count += 1
rulable_users_count2 = len(rulable_users)
star1, _ = GalaxyStar.objects.get_or_create(owner=user1)
if star1.mass == 0:
star1.mass = cls.compute_user_score(user1)
star1.save()
tstart = time.time()
for user2_count, user2 in enumerate(rulable_users, start=1):
cls.logger.debug("")
cls.logger.debug(
f"\t> Examining '{user1}' ({user1_count}/{rulable_users_count}) with '{user2}' ({user2_count}/{rulable_users_count2})"
)
star2, _ = GalaxyStar.objects.get_or_create(owner=user2)
users_score, family, pictures, clubs = cls.compute_users_score(
user1, user2
)
if users_score > 0:
GalaxyLane(
star1=star1,
star2=star2,
distance=cls.scale_distance(users_score),
family=family,
pictures=pictures,
clubs=clubs,
).save()
if user2_count % speed_count_frequency == 0:
tend = time.time()
delta = tend - tstart
speed = float(speed_count_frequency) / delta
avg_speed += speed
avg_speed_count += 1
cls.logger.info("")
cls.logger.info(
f"\t> Ruling citizen {user1_count}/{rulable_users_count} against citizen {user2_count}/{rulable_users_count2}"
)
cls.logger.info(
f"Speed: {speed:.2f} users per second (delta: {delta:.2f})"
)
eta = (rulable_users_count * rulable_users_count2) / speed / 3600
cls.logger.info(
"Estimated remaining time: {0:.2f} hours ({1:.2f} days)".format(
eta, eta / 24
)
)
tstart = time.time()
avg_speed /= avg_speed_count
cls.logger.info(f"Average speed: {avg_speed:.2f} users per second")
@classmethod
def scale_distance(cls, value) -> int:
# TODO: this will need adjustements with the real, typical data on Taiste
if value == 0:
return 4000 # Following calculus would give us +∞, we cap it to 4000
cls.logger.debug(f"\t\t> Score: {value}")
# Invert score to draw close users together
@ -420,3 +316,199 @@ class Galaxy(models.Model):
)
cls.logger.debug(f"\t\t> Scaled distance: {value}")
return int(value)
def rule(self, picture_count_threshold=10) -> None:
"""
This is the main function of the Galaxy.
It iterates over all the rulable users to promote them to citizen, which is a user that has a corresponding star in the Galaxy.
It also builds up the lanes, which are the links between the different citizen.
Rulable users are defined with the `picture_count_threshold`: any user that doesn't match that limit won't be
considered to be promoted to citizen. This very effectively limits the quantity of computing to do, and only includes
users that have had a minimum of activity.
"""
total_time = time.time()
self.logger.info("Listing rulable citizen.")
rulable_users = (
User.objects.filter(subscriptions__isnull=False)
.annotate(pictures_count=Count("pictures"))
.filter(pictures_count__gt=picture_count_threshold)
.distinct()
)
# force fetch of the whole query to make sure there won't
# be any more db hits
# this is memory expensive but prevents a lot of db hits, therefore
# is far more time efficient
rulable_users = list(rulable_users)
rulable_users_count = len(rulable_users)
user1_count = 0
self.logger.info(
f"{rulable_users_count} citizen have been listed. Starting to rule."
)
stars = GalaxyStar.objects.filter(galaxy=self)
# Display current speed every $speed_count_frequency users
speed_count_frequency = max(rulable_users_count // 10, 1) # ten time at most
global_avg_speed_accumulator = 0
global_avg_speed_count = 0
t_global_start = time.time()
while len(rulable_users) > 0:
user1 = rulable_users.pop()
user1_count += 1
rulable_users_count2 = len(rulable_users)
star1, created = stars.get_or_create(owner=user1)
if created:
star1.galaxy = self
star1.save()
if star1.mass == 0:
star1.mass = self.compute_user_score(user1)
star1.save()
user_avg_speed = 0
user_avg_speed_count = 0
tstart = time.time()
for user2_count, user2 in enumerate(rulable_users, start=1):
self.logger.debug("")
self.logger.debug(
f"\t> Examining '{user1}' ({user1_count}/{rulable_users_count}) with '{user2}' ({user2_count}/{rulable_users_count2})"
)
star2, created = stars.get_or_create(owner=user2)
if created:
star2.galaxy = self
star2.save()
users_score, family, pictures, clubs = Galaxy.compute_users_score(
user1, user2
)
distance = self.scale_distance(users_score)
if distance < 30: # TODO: this needs tuning with real-world data
GalaxyLane(
star1=star1,
star2=star2,
distance=distance,
family=family,
pictures=pictures,
clubs=clubs,
).save()
if user2_count % speed_count_frequency == 0:
tend = time.time()
delta = tend - tstart
speed = float(speed_count_frequency) / delta
user_avg_speed += speed
user_avg_speed_count += 1
self.logger.debug(
f"\tSpeed: {speed:.2f} users per second (time for last {speed_count_frequency} citizens: {delta:.2f} second)"
)
tstart = time.time()
self.logger.info("")
t_global_end = time.time()
global_delta = t_global_end - t_global_start
speed = 1.0 / global_delta
global_avg_speed_accumulator += speed
global_avg_speed_count += 1
global_avg_speed = global_avg_speed_accumulator / global_avg_speed_count
self.logger.info(f" Ruling of {self} ".center(60, "#"))
self.logger.info(
f"Progression: {user1_count}/{rulable_users_count} citizen -- {rulable_users_count - user1_count} remaining"
)
self.logger.info(f"Speed: {60.0*global_avg_speed:.2f} citizen per minute")
# We can divide the computed ETA by 2 because each loop, there is one citizen less to check, and maths tell
# us that this averages to a division by two
eta = rulable_users_count2 / global_avg_speed / 2
eta_hours = int(eta // 3600)
eta_minutes = int(eta // 60 % 60)
self.logger.info(
f"ETA: {eta_hours} hours {eta_minutes} minutes ({eta / 3600 / 24:.2f} days)"
)
self.logger.info("#" * 60)
t_global_start = time.time()
# Here, we get the IDs of the old galaxies that we'll need to delete. In normal operation, only one galaxy
# should be returned, and we can't delete it yet, as it's the one still displayed by the Sith.
old_galaxies_pks = list(
Galaxy.objects.filter(state__isnull=False).values_list("pk", flat=True)
)
self.logger.info(
f"These old galaxies will be deleted once the new one is ready: {old_galaxies_pks}"
)
# Making the state sets this new galaxy as being ready. From now on, the Sith will show us to the world.
self.make_state()
# Avoid accident if there is nothing to delete
if len(old_galaxies_pks) > 0:
# Former galaxies can now be deleted.
Galaxy.objects.filter(pk__in=old_galaxies_pks).delete()
total_time = time.time() - total_time
total_time_hours = int(total_time // 3600)
total_time_minutes = int(total_time // 60 % 60)
total_time_seconds = int(total_time % 60)
self.logger.info(
f"{self} ruled in {total_time:.2f} seconds ({total_time_hours} hours, {total_time_minutes} minutes, {total_time_seconds} seconds)"
)
def make_state(self) -> None:
"""
Compute JSON structure to send to 3d-force-graph: https://github.com/vasturiano/3d-force-graph/
"""
self.logger.info(
"Caching current Galaxy state for a quicker display of the Empire's power."
)
without_nickname = Concat(
F("owner__first_name"), Value(" "), F("owner__last_name")
)
with_nickname = Concat(
F("owner__first_name"),
Value(" "),
F("owner__last_name"),
Value(" ("),
F("owner__nick_name"),
Value(")"),
)
stars = GalaxyStar.objects.filter(galaxy=self).annotate(
owner_name=Case(
When(owner__nick_name=None, then=without_nickname),
default=with_nickname,
)
)
lanes = GalaxyLane.objects.filter(star1__galaxy=self).annotate(
star1_owner=F("star1__owner__id"),
star2_owner=F("star2__owner__id"),
)
json = GalaxyDict(
nodes=[
StarDict(
id=star.owner_id,
name=star.owner_name,
mass=star.mass,
)
for star in stars
],
links=[],
)
for path in lanes:
json["links"].append(
{
"source": path.star1_owner,
"target": path.star2_owner,
"value": path.distance,
}
)
self.state = json
self.save()
self.logger.info(f"{self} is now ready!")

File diff suppressed because one or more lines are too long

View File

@ -1,37 +1,42 @@
{% extends "core/base.jinja" %}
{% block title %}
{% trans user_name=user.get_display_name() %}{{ user_name }}'s Galaxy{% endtrans %}
{% trans user_name=object.get_display_name() %}{{ user_name }}'s Galaxy{% endtrans %}
{% endblock %}
{% block content %}
{% if object.galaxy_user %}
<p><a onclick="focus_node(get_node_from_id({{ object.id }}))">Reset on {{ object.get_display_name() }}</a></p>
<p>Self score: {{ object.galaxy_user.mass }}</p>
<table style="width: initial;">
<tr>
<th></th>
<th>Citizen</th>
<th>Score</th>
<th>Distance</th>
<th>Family</th>
<th>Pictures</th>
<th>Clubs</th>
</tr>
{% for lane in lanes %}
<tr>
<td><a onclick="focus_node(get_node_from_id({{ lane.other_star_id }}))">Locate</a></td>
<td><a href="{{ url("galaxy:user", user_id=lane.other_star_id) }}">{{ lane.other_star_name }}</a></td>
<td>{{ lane.other_star_mass }}</td>
<td>{{ lane.distance }}</td>
<td>{{ lane.family }}</td>
<td>{{ lane.pictures }}</td>
<td>{{ lane.clubs }}</td>
</tr>
{% endfor %}
</table>
{% if object.current_star %}
<div style="display: flex; flex-wrap: wrap;">
<div id="3d-graph" style="margin: 1em;"></div>
<div id="3d-graph" style="margin: 1em;"></div>
<div>
<p><a onclick="focus_node(get_node_from_id({{ object.id }}))">Reset on {{ object.get_display_name() }}</a></p>
<p>Self score: {{ object.current_star.mass }}</p>
<table style="width: initial;">
<tr>
<th></th>
<th>Citizen</th>
<th>Score</th>
<th>Distance</th>
<th>Family</th>
<th>Pictures</th>
<th>Clubs</th>
</tr>
{% for lane in lanes %}
<tr>
<td><a onclick="focus_node(get_node_from_id({{ lane.other_star_id }}))">Locate</a></td>
<td><a href="{{ url("galaxy:user", user_id=lane.other_star_id) }}">{{ lane.other_star_name }}</a></td>
<td>{{ lane.other_star_mass }}</td>
<td>{{ lane.distance }}</td>
<td>{{ lane.family }}</td>
<td>{{ lane.pictures }}</td>
<td>{{ lane.clubs }}</td>
</tr>
{% endfor %}
</table>
</div>
</div>
<p>#{{ object.current_star.galaxy }}#</p>
{% else %}
<p>This citizen has not yet joined the galaxy</p>
{% endif %}

View File

@ -26,8 +26,9 @@ import json
from pathlib import Path
from django.test import TestCase
from django.core.management import call_command
from django.test import TestCase
from django.urls import reverse
from core.models import User
from galaxy.models import Galaxy
@ -132,9 +133,11 @@ class GalaxyTest(TestCase):
self.assertDictEqual(expected_scores, computed_scores)
def test_page_is_citizen(self):
Galaxy.rule()
with self.assertNumQueries(84):
galaxy = Galaxy.objects.create()
galaxy.rule(0) # We want all users here
self.client.login(username="root", password="plop")
response = self.client.get("/galaxy/1/")
response = self.client.get(reverse("galaxy:user", args=[1]))
self.assertContains(
response,
'<a onclick="focus_node(get_node_from_id(8))">Locate</a>',
@ -142,15 +145,17 @@ class GalaxyTest(TestCase):
)
def test_page_not_citizen(self):
Galaxy.rule()
galaxy = Galaxy.objects.create()
galaxy.rule(0) # We want all users here
self.client.login(username="root", password="plop")
response = self.client.get("/galaxy/2/")
response = self.client.get(reverse("galaxy:user", args=[2]))
self.assertEquals(response.status_code, 404)
def test_full_galaxy_state(self):
Galaxy.rule()
Galaxy.make_state()
state = Galaxy.objects.first().state
call_command("generate_galaxy_test_data", "-v", "0")
galaxy = Galaxy.objects.create()
galaxy.rule(26) # We want a fast test
state = Galaxy.objects.last().state
galaxy_dir = Path(__file__).parent

View File

@ -45,32 +45,29 @@ class GalaxyUserView(CanViewMixin, UserTabsMixin, DetailView):
def get_object(self, *args, **kwargs):
user: User = super(GalaxyUserView, self).get_object(*args, **kwargs)
if not hasattr(user, "galaxy_user"):
if user.current_star is None:
raise Http404(_("This citizen has not yet joined the galaxy"))
return user
def get_queryset(self):
return super(GalaxyUserView, self).get_queryset().select_related("galaxy_user")
def get_context_data(self, **kwargs):
kwargs = super(GalaxyUserView, self).get_context_data(**kwargs)
kwargs["lanes"] = (
GalaxyLane.objects.filter(
Q(star1=self.object.galaxy_user) | Q(star2=self.object.galaxy_user)
Q(star1=self.object.current_star) | Q(star2=self.object.current_star)
)
.order_by("distance")
.annotate(
other_star_id=Case(
When(star1=self.object.galaxy_user, then=F("star2__owner__id")),
When(star1=self.object.current_star, then=F("star2__owner__id")),
default=F("star1__owner__id"),
),
other_star_mass=Case(
When(star1=self.object.galaxy_user, then=F("star2__mass")),
When(star1=self.object.current_star, then=F("star2__mass")),
default=F("star1__mass"),
),
other_star_name=Case(
When(
star1=self.object.galaxy_user,
star1=self.object.current_star,
then=Case(
When(
star2__owner__nick_name=None,
@ -101,4 +98,4 @@ class GalaxyUserView(CanViewMixin, UserTabsMixin, DetailView):
class GalaxyDataView(FormerSubscriberMixin, View):
def get(self, request, *args, **kwargs):
return JsonResponse(Galaxy.objects.first().state)
return JsonResponse(Galaxy.get_current_galaxy().state)