galaxy: use bulk_create whenever possible

This is a big performance gain, especially for the tests.

Examples:

----

`./manage.py test galaxy.tests.GalaxyTest.test_full_galaxy_state`

Measurements averaged over 3 run on *my machine*™:
Before: 2min15s
After: 1m41s

----

`./manage.py generate_galaxy_test_data --user-pack-count 1`

Before: 48s
After: 25s

----

`./manage.py rule_galaxy` (for 600 citizen, corresponding to 1 user-pack)

Before: 14m4s
After: 12m34s
This commit is contained in:
Skia
2023-04-19 18:10:42 +02:00
parent bb365e4ee5
commit 6328b0c9e9
4 changed files with 170 additions and 111 deletions

View File

@ -106,32 +106,42 @@ class Command(BaseCommand):
def make_clubs(self):
"""This will create all the clubs and store them in self.clubs for fast access later"""
self.clubs = {}
self.clubs = []
for i in range(self.NB_CLUBS):
self.clubs[i] = Club.objects.create(
unix_name=f"galaxy-club-{i}", name=f"club-{i}"
)
self.clubs.append(Club(unix_name=f"galaxy-club-{i}", name=f"club-{i}"))
# We don't need to create corresponding groups here, as the Galaxy doesn't care about them
Club.objects.bulk_create(self.clubs)
self.clubs = Club.objects.filter(unix_name__startswith="galaxy-").all()
def make_users(self):
"""This will create all the users and store them in self.users for fast access later"""
self.users = {}
self.users = []
for i in range(self.NB_USERS):
u = User.objects.create_user(
u = User(
username=f"galaxy-user-{i}",
email=f"{i}@galaxy.test",
first_name="Citizen",
last_name=f"{i}",
)
self.users[i] = u
self.logger.info(f"Creating {u}")
self.users.append(u)
User.objects.bulk_create(self.users)
self.users = User.objects.filter(username__startswith="galaxy-").all()
subs = []
for i in range(self.NB_USERS):
u = self.users[i]
self.logger.info(f"Registering {u}")
Subscription.objects.create(
member=u,
subscription_start=Subscription.compute_start(
self.now - timedelta(days=self.NB_USERS - i)
),
subscription_end=Subscription.compute_end(duration=2),
subs.append(
Subscription(
member=u,
subscription_start=Subscription.compute_start(
self.now - timedelta(days=self.NB_USERS - i)
),
subscription_end=Subscription.compute_end(duration=2),
)
)
Subscription.objects.bulk_create(subs)
def make_families(self):
"""
@ -140,12 +150,14 @@ class Command(BaseCommand):
heuristic to determine if they should have a family link
"""
for i in range(200, self.NB_USERS):
godfathers = []
for j in range(i - 200, i, 14): # this will loop 14 times (14² = 196)
if (i / 10) % 10 == (i + j) % 10:
u1 = self.users[i]
u2 = self.users[j]
self.logger.info(f"Making {u2} the godfather of {u1}")
u1.godfathers.add(u2)
godfathers.append(u2)
u1.godfathers.set(godfathers)
def make_club_memberships(self):
"""
@ -154,6 +166,7 @@ class Command(BaseCommand):
Each pass for each user has a chance to affect her to two different clubs, increasing a bit more the created
chaos, while remaining purely deterministic.
"""
memberships = []
for i in range(1, 11): # users can be in up to 20 clubs
self.logger.info(f"Club membership, pass {i}")
for uid in range(
@ -166,16 +179,18 @@ class Command(BaseCommand):
days=(((self.NB_USERS - uid) * i) // 110)
) # older users were in clubs before newer users
end = start + timedelta(days=180) # about one semester
self.logger.info(
self.logger.debug(
f"Making {user} a member of club {club} from {start} to {end}"
)
Membership(
user=user,
club=club,
role=(uid + i) % 10 + 1, # spread the different roles
start_date=start,
end_date=end,
).save()
memberships.append(
Membership(
user=user,
club=club,
role=(uid + i) % 10 + 1, # spread the different roles
start_date=start,
end_date=end,
)
)
for uid in range(
10 + i * 2, self.NB_USERS, 10 + i * 2
@ -187,27 +202,31 @@ class Command(BaseCommand):
days=(((self.NB_USERS - uid) * i) // 100)
) # older users were in clubs before newer users
end = start + timedelta(days=180) # about one semester
self.logger.info(
self.logger.debug(
f"Making {user} a member of club {club} from {start} to {end}"
)
Membership(
user=user,
club=club,
role=((uid // 10) + i) % 10 + 1, # spread the different roles
start_date=start,
end_date=end,
).save()
memberships.append(
Membership(
user=user,
club=club,
role=((uid // 10) + i) % 10 + 1, # spread the different roles
start_date=start,
end_date=end,
)
)
Membership.objects.bulk_create(memberships)
def make_pictures(self):
"""This function creates pictures for users to be tagged on later"""
self.picts = {}
for i in range(self.NB_USERS):
u = self.users[i]
# Create twice as many pictures as users
for j in [i, i**2]:
self.picts[j] = Picture.objects.create(
owner=self.users[i],
name=f"galaxy-picture {u} {j}",
self.picts = []
# Create twice as many pictures as users
for i in range(self.NB_USERS * 2):
u = self.users[i % self.NB_USERS]
self.logger.info(f"Making Picture {i // self.NB_USERS} for {u}")
self.picts.append(
Picture(
owner=u,
name=f"galaxy-picture {u} {i // self.NB_USERS}",
is_moderated=True,
is_folder=False,
parent=self.galaxy_album,
@ -218,64 +237,72 @@ class Command(BaseCommand):
mime_type="image/png",
size=len(RED_PIXEL_PNG),
)
self.picts[j].file.name = self.picts[j].name
self.picts[j].compressed.name = self.picts[j].name
self.picts[j].thumbnail.name = self.picts[j].name
self.picts[j].save()
)
self.picts[i].file.name = self.picts[i].name
self.picts[i].compressed.name = self.picts[i].name
self.picts[i].thumbnail.name = self.picts[i].name
Picture.objects.bulk_create(self.picts)
self.picts = Picture.objects.filter(name__startswith="galaxy-").all()
def make_pictures_memberships(self):
"""
This assigns users to pictures, and makes enough of them for our created users to be eligible for promotion as citizen.
See galaxy.models.Galaxy.rule for details on promotion to citizen.
"""
self.pictures_tags = []
# We don't want to handle limits, users in the middle will be far enough
def _tag_neighbors(uid, neighbor_dist, pict_power, pict_dist):
def _tag_neighbors(uid, neighbor_dist, pict_offset, pict_dist):
u2 = self.users[uid - neighbor_dist]
u3 = self.users[uid + neighbor_dist]
PeoplePictureRelation(user=u2, picture=self.picts[uid**pict_power]).save()
PeoplePictureRelation(user=u3, picture=self.picts[uid**pict_power]).save()
PeoplePictureRelation(user=u2, picture=self.picts[uid - pict_dist]).save()
PeoplePictureRelation(user=u3, picture=self.picts[uid - pict_dist]).save()
PeoplePictureRelation(user=u2, picture=self.picts[uid + pict_dist]).save()
PeoplePictureRelation(user=u3, picture=self.picts[uid + pict_dist]).save()
self.pictures_tags += [
PeoplePictureRelation(user=u2, picture=self.picts[uid + pict_offset]),
PeoplePictureRelation(user=u3, picture=self.picts[uid + pict_offset]),
PeoplePictureRelation(user=u2, picture=self.picts[uid - pict_dist]),
PeoplePictureRelation(user=u3, picture=self.picts[uid - pict_dist]),
PeoplePictureRelation(user=u2, picture=self.picts[uid + pict_dist]),
PeoplePictureRelation(user=u3, picture=self.picts[uid + pict_dist]),
]
for uid in range(200, self.NB_USERS - 200):
u1 = self.users[uid]
self.logger.info(f"Pictures of {u1}")
PeoplePictureRelation(user=u1, picture=self.picts[uid]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid - 14]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid + 14]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid - 20]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid + 20]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid - 21]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid + 21]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid - 22]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid + 22]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid - 30]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid + 30]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid - 31]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid + 31]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid - 32]).save()
PeoplePictureRelation(user=u1, picture=self.picts[uid + 32]).save()
self.pictures_tags += [
PeoplePictureRelation(user=u1, picture=self.picts[uid]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 14]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 14]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 20]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 20]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 21]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 21]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 22]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 22]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 30]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 30]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 31]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 31]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 32]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 32]),
]
if uid % 3 == 0:
_tag_neighbors(uid, 1, 1, 40)
_tag_neighbors(uid, 1, 0, 40)
if uid % 5 == 0:
_tag_neighbors(uid, 2, 1, 50)
_tag_neighbors(uid, 2, 0, 50)
if uid % 10 == 0:
_tag_neighbors(uid, 3, 1, 60)
_tag_neighbors(uid, 3, 0, 60)
if uid % 20 == 0:
_tag_neighbors(uid, 5, 1, 70)
_tag_neighbors(uid, 5, 0, 70)
if uid % 25 == 0:
_tag_neighbors(uid, 10, 1, 80)
_tag_neighbors(uid, 10, 0, 80)
if uid % 2 == 1:
_tag_neighbors(uid, 1, 2, 90)
_tag_neighbors(uid, 1, self.NB_USERS, 90)
if uid % 15 == 0:
_tag_neighbors(uid, 5, 2, 100)
_tag_neighbors(uid, 5, self.NB_USERS, 100)
if uid % 30 == 0:
_tag_neighbors(uid, 4, 2, 110)
_tag_neighbors(uid, 4, self.NB_USERS, 110)
PeoplePictureRelation.objects.bulk_create(self.pictures_tags)
def make_important_citizen(self, uid):
"""
@ -285,18 +312,32 @@ class Command(BaseCommand):
u1 = self.users[uid]
u2 = self.users[uid - 100]
u3 = self.users[uid + 100]
u1.godfathers.add(u3)
u1.godchildren.add(u2)
u1.godfathers.add(u2)
u1.godchildren.add(u3)
self.logger.info(f"{u1} will be important and close to {u2} and {u3}")
pictures_tags = []
for p in range( # Mix them with other citizen for more chaos
uid - 400, uid - 200
):
# users may already be on the pictures
if not self.picts[p].people.filter(user=u1).exists():
PeoplePictureRelation(user=u1, picture=self.picts[p]).save()
pictures_tags.append(
PeoplePictureRelation(user=u1, picture=self.picts[p])
)
if not self.picts[p].people.filter(user=u2).exists():
PeoplePictureRelation(user=u2, picture=self.picts[p]).save()
if not self.picts[p**2].people.filter(user=u1).exists():
PeoplePictureRelation(user=u1, picture=self.picts[p**2]).save()
if not self.picts[p**2].people.filter(user=u2).exists():
PeoplePictureRelation(user=u2, picture=self.picts[p**2]).save()
pictures_tags.append(
PeoplePictureRelation(user=u2, picture=self.picts[p])
)
if not self.picts[p + self.NB_USERS].people.filter(user=u1).exists():
pictures_tags.append(
PeoplePictureRelation(
user=u1, picture=self.picts[p + self.NB_USERS]
)
)
if not self.picts[p + self.NB_USERS].people.filter(user=u2).exists():
pictures_tags.append(
PeoplePictureRelation(
user=u2, picture=self.picts[p + self.NB_USERS]
)
)
PeoplePictureRelation.objects.bulk_create(pictures_tags)

View File

@ -348,8 +348,20 @@ class Galaxy(models.Model):
f"{rulable_users_count} citizen have been listed. Starting to rule."
)
stars = GalaxyStar.objects.filter(galaxy=self)
stars = []
self.logger.info("Creating stars for all citizen")
for user in rulable_users:
star = GalaxyStar(
owner=user, galaxy=self, mass=self.compute_user_score(user)
)
stars.append(star)
GalaxyStar.objects.bulk_create(stars)
stars = {}
for star in GalaxyStar.objects.filter(galaxy=self):
stars[star.owner.id] = star
self.logger.info("Creating lanes between stars")
# Display current speed every $speed_count_frequency users
speed_count_frequency = max(rulable_users_count // 10, 1) # ten time at most
global_avg_speed_accumulator = 0
@ -360,44 +372,36 @@ class Galaxy(models.Model):
user1_count += 1
rulable_users_count2 = len(rulable_users)
star1, created = stars.get_or_create(owner=user1)
if created:
star1.galaxy = self
star1.save()
if star1.mass == 0:
star1.mass = self.compute_user_score(user1)
star1.save()
star1 = stars[user1.id]
user_avg_speed = 0
user_avg_speed_count = 0
tstart = time.time()
lanes = []
for user2_count, user2 in enumerate(rulable_users, start=1):
self.logger.debug("")
self.logger.debug(
f"\t> Examining '{user1}' ({user1_count}/{rulable_users_count}) with '{user2}' ({user2_count}/{rulable_users_count2})"
)
star2, created = stars.get_or_create(owner=user2)
if created:
star2.galaxy = self
star2.save()
star2 = stars[user2.id]
users_score, family, pictures, clubs = Galaxy.compute_users_score(
user1, user2
)
distance = self.scale_distance(users_score)
if distance < 30: # TODO: this needs tuning with real-world data
GalaxyLane(
star1=star1,
star2=star2,
distance=distance,
family=family,
pictures=pictures,
clubs=clubs,
).save()
lanes.append(
GalaxyLane(
star1=star1,
star2=star2,
distance=distance,
family=family,
pictures=pictures,
clubs=clubs,
)
)
if user2_count % speed_count_frequency == 0:
tend = time.time()
@ -410,6 +414,8 @@ class Galaxy(models.Model):
)
tstart = time.time()
GalaxyLane.objects.bulk_create(lanes)
self.logger.info("")
t_global_end = time.time()
@ -480,15 +486,27 @@ class Galaxy(models.Model):
F("owner__nick_name"),
Value(")"),
)
stars = GalaxyStar.objects.filter(galaxy=self).annotate(
owner_name=Case(
When(owner__nick_name=None, then=without_nickname),
default=with_nickname,
stars = (
GalaxyStar.objects.filter(galaxy=self)
.order_by(
"owner"
) # This helps determinism for the tests and doesn't cost much
.annotate(
owner_name=Case(
When(owner__nick_name=None, then=without_nickname),
default=with_nickname,
)
)
)
lanes = GalaxyLane.objects.filter(star1__galaxy=self).annotate(
star1_owner=F("star1__owner__id"),
star2_owner=F("star2__owner__id"),
lanes = (
GalaxyLane.objects.filter(star1__galaxy=self)
.order_by(
"star1"
) # This helps determinism for the tests and doesn't cost much
.annotate(
star1_owner=F("star1__owner__id"),
star2_owner=F("star2__owner__id"),
)
)
json = GalaxyDict(
nodes=[

File diff suppressed because one or more lines are too long

View File

@ -133,7 +133,7 @@ class GalaxyTest(TestCase):
self.assertDictEqual(expected_scores, computed_scores)
def test_page_is_citizen(self):
with self.assertNumQueries(84):
with self.assertNumQueries(59):
galaxy = Galaxy.objects.create()
galaxy.rule(0) # We want all users here
self.client.login(username="root", password="plop")