galaxy: use bulk_create whenever possible

This is a big performance gain, especially for the tests.

Examples:

----

`./manage.py test galaxy.tests.GalaxyTest.test_full_galaxy_state`

Measurements averaged over 3 run on *my machine*™:
Before: 2min15s
After: 1m41s

----

`./manage.py generate_galaxy_test_data --user-pack-count 1`

Before: 48s
After: 25s

----

`./manage.py rule_galaxy` (for 600 citizen, corresponding to 1 user-pack)

Before: 14m4s
After: 12m34s
This commit is contained in:
Skia
2023-04-19 18:10:42 +02:00
parent bb365e4ee5
commit 6328b0c9e9
4 changed files with 170 additions and 111 deletions

View File

@ -106,32 +106,42 @@ class Command(BaseCommand):
def make_clubs(self): def make_clubs(self):
"""This will create all the clubs and store them in self.clubs for fast access later""" """This will create all the clubs and store them in self.clubs for fast access later"""
self.clubs = {} self.clubs = []
for i in range(self.NB_CLUBS): for i in range(self.NB_CLUBS):
self.clubs[i] = Club.objects.create( self.clubs.append(Club(unix_name=f"galaxy-club-{i}", name=f"club-{i}"))
unix_name=f"galaxy-club-{i}", name=f"club-{i}" # We don't need to create corresponding groups here, as the Galaxy doesn't care about them
) Club.objects.bulk_create(self.clubs)
self.clubs = Club.objects.filter(unix_name__startswith="galaxy-").all()
def make_users(self): def make_users(self):
"""This will create all the users and store them in self.users for fast access later""" """This will create all the users and store them in self.users for fast access later"""
self.users = {} self.users = []
for i in range(self.NB_USERS): for i in range(self.NB_USERS):
u = User.objects.create_user( u = User(
username=f"galaxy-user-{i}", username=f"galaxy-user-{i}",
email=f"{i}@galaxy.test", email=f"{i}@galaxy.test",
first_name="Citizen", first_name="Citizen",
last_name=f"{i}", last_name=f"{i}",
) )
self.users[i] = u self.logger.info(f"Creating {u}")
self.users.append(u)
User.objects.bulk_create(self.users)
self.users = User.objects.filter(username__startswith="galaxy-").all()
subs = []
for i in range(self.NB_USERS):
u = self.users[i]
self.logger.info(f"Registering {u}") self.logger.info(f"Registering {u}")
Subscription.objects.create( subs.append(
member=u, Subscription(
subscription_start=Subscription.compute_start( member=u,
self.now - timedelta(days=self.NB_USERS - i) subscription_start=Subscription.compute_start(
), self.now - timedelta(days=self.NB_USERS - i)
subscription_end=Subscription.compute_end(duration=2), ),
subscription_end=Subscription.compute_end(duration=2),
)
) )
Subscription.objects.bulk_create(subs)
def make_families(self): def make_families(self):
""" """
@ -140,12 +150,14 @@ class Command(BaseCommand):
heuristic to determine if they should have a family link heuristic to determine if they should have a family link
""" """
for i in range(200, self.NB_USERS): for i in range(200, self.NB_USERS):
godfathers = []
for j in range(i - 200, i, 14): # this will loop 14 times (14² = 196) for j in range(i - 200, i, 14): # this will loop 14 times (14² = 196)
if (i / 10) % 10 == (i + j) % 10: if (i / 10) % 10 == (i + j) % 10:
u1 = self.users[i] u1 = self.users[i]
u2 = self.users[j] u2 = self.users[j]
self.logger.info(f"Making {u2} the godfather of {u1}") self.logger.info(f"Making {u2} the godfather of {u1}")
u1.godfathers.add(u2) godfathers.append(u2)
u1.godfathers.set(godfathers)
def make_club_memberships(self): def make_club_memberships(self):
""" """
@ -154,6 +166,7 @@ class Command(BaseCommand):
Each pass for each user has a chance to affect her to two different clubs, increasing a bit more the created Each pass for each user has a chance to affect her to two different clubs, increasing a bit more the created
chaos, while remaining purely deterministic. chaos, while remaining purely deterministic.
""" """
memberships = []
for i in range(1, 11): # users can be in up to 20 clubs for i in range(1, 11): # users can be in up to 20 clubs
self.logger.info(f"Club membership, pass {i}") self.logger.info(f"Club membership, pass {i}")
for uid in range( for uid in range(
@ -166,16 +179,18 @@ class Command(BaseCommand):
days=(((self.NB_USERS - uid) * i) // 110) days=(((self.NB_USERS - uid) * i) // 110)
) # older users were in clubs before newer users ) # older users were in clubs before newer users
end = start + timedelta(days=180) # about one semester end = start + timedelta(days=180) # about one semester
self.logger.info( self.logger.debug(
f"Making {user} a member of club {club} from {start} to {end}" f"Making {user} a member of club {club} from {start} to {end}"
) )
Membership( memberships.append(
user=user, Membership(
club=club, user=user,
role=(uid + i) % 10 + 1, # spread the different roles club=club,
start_date=start, role=(uid + i) % 10 + 1, # spread the different roles
end_date=end, start_date=start,
).save() end_date=end,
)
)
for uid in range( for uid in range(
10 + i * 2, self.NB_USERS, 10 + i * 2 10 + i * 2, self.NB_USERS, 10 + i * 2
@ -187,27 +202,31 @@ class Command(BaseCommand):
days=(((self.NB_USERS - uid) * i) // 100) days=(((self.NB_USERS - uid) * i) // 100)
) # older users were in clubs before newer users ) # older users were in clubs before newer users
end = start + timedelta(days=180) # about one semester end = start + timedelta(days=180) # about one semester
self.logger.info( self.logger.debug(
f"Making {user} a member of club {club} from {start} to {end}" f"Making {user} a member of club {club} from {start} to {end}"
) )
Membership( memberships.append(
user=user, Membership(
club=club, user=user,
role=((uid // 10) + i) % 10 + 1, # spread the different roles club=club,
start_date=start, role=((uid // 10) + i) % 10 + 1, # spread the different roles
end_date=end, start_date=start,
).save() end_date=end,
)
)
Membership.objects.bulk_create(memberships)
def make_pictures(self): def make_pictures(self):
"""This function creates pictures for users to be tagged on later""" """This function creates pictures for users to be tagged on later"""
self.picts = {} self.picts = []
for i in range(self.NB_USERS): # Create twice as many pictures as users
u = self.users[i] for i in range(self.NB_USERS * 2):
# Create twice as many pictures as users u = self.users[i % self.NB_USERS]
for j in [i, i**2]: self.logger.info(f"Making Picture {i // self.NB_USERS} for {u}")
self.picts[j] = Picture.objects.create( self.picts.append(
owner=self.users[i], Picture(
name=f"galaxy-picture {u} {j}", owner=u,
name=f"galaxy-picture {u} {i // self.NB_USERS}",
is_moderated=True, is_moderated=True,
is_folder=False, is_folder=False,
parent=self.galaxy_album, parent=self.galaxy_album,
@ -218,64 +237,72 @@ class Command(BaseCommand):
mime_type="image/png", mime_type="image/png",
size=len(RED_PIXEL_PNG), size=len(RED_PIXEL_PNG),
) )
self.picts[j].file.name = self.picts[j].name )
self.picts[j].compressed.name = self.picts[j].name self.picts[i].file.name = self.picts[i].name
self.picts[j].thumbnail.name = self.picts[j].name self.picts[i].compressed.name = self.picts[i].name
self.picts[j].save() self.picts[i].thumbnail.name = self.picts[i].name
Picture.objects.bulk_create(self.picts)
self.picts = Picture.objects.filter(name__startswith="galaxy-").all()
def make_pictures_memberships(self): def make_pictures_memberships(self):
""" """
This assigns users to pictures, and makes enough of them for our created users to be eligible for promotion as citizen. This assigns users to pictures, and makes enough of them for our created users to be eligible for promotion as citizen.
See galaxy.models.Galaxy.rule for details on promotion to citizen. See galaxy.models.Galaxy.rule for details on promotion to citizen.
""" """
self.pictures_tags = []
# We don't want to handle limits, users in the middle will be far enough # We don't want to handle limits, users in the middle will be far enough
def _tag_neighbors(uid, neighbor_dist, pict_power, pict_dist): def _tag_neighbors(uid, neighbor_dist, pict_offset, pict_dist):
u2 = self.users[uid - neighbor_dist] u2 = self.users[uid - neighbor_dist]
u3 = self.users[uid + neighbor_dist] u3 = self.users[uid + neighbor_dist]
PeoplePictureRelation(user=u2, picture=self.picts[uid**pict_power]).save() self.pictures_tags += [
PeoplePictureRelation(user=u3, picture=self.picts[uid**pict_power]).save() PeoplePictureRelation(user=u2, picture=self.picts[uid + pict_offset]),
PeoplePictureRelation(user=u2, picture=self.picts[uid - pict_dist]).save() PeoplePictureRelation(user=u3, picture=self.picts[uid + pict_offset]),
PeoplePictureRelation(user=u3, picture=self.picts[uid - pict_dist]).save() PeoplePictureRelation(user=u2, picture=self.picts[uid - pict_dist]),
PeoplePictureRelation(user=u2, picture=self.picts[uid + pict_dist]).save() PeoplePictureRelation(user=u3, picture=self.picts[uid - pict_dist]),
PeoplePictureRelation(user=u3, picture=self.picts[uid + pict_dist]).save() PeoplePictureRelation(user=u2, picture=self.picts[uid + pict_dist]),
PeoplePictureRelation(user=u3, picture=self.picts[uid + pict_dist]),
]
for uid in range(200, self.NB_USERS - 200): for uid in range(200, self.NB_USERS - 200):
u1 = self.users[uid] u1 = self.users[uid]
self.logger.info(f"Pictures of {u1}") self.logger.info(f"Pictures of {u1}")
PeoplePictureRelation(user=u1, picture=self.picts[uid]).save() self.pictures_tags += [
PeoplePictureRelation(user=u1, picture=self.picts[uid - 14]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 14]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid - 14]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 20]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid + 14]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 20]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid - 20]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 21]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid + 20]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 21]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid - 21]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 22]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid + 21]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 22]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid - 22]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 30]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid + 22]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 30]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid - 30]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 31]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid + 30]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 31]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid - 31]),
PeoplePictureRelation(user=u1, picture=self.picts[uid - 32]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid + 31]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 32]).save() PeoplePictureRelation(user=u1, picture=self.picts[uid - 32]),
PeoplePictureRelation(user=u1, picture=self.picts[uid + 32]),
]
if uid % 3 == 0: if uid % 3 == 0:
_tag_neighbors(uid, 1, 1, 40) _tag_neighbors(uid, 1, 0, 40)
if uid % 5 == 0: if uid % 5 == 0:
_tag_neighbors(uid, 2, 1, 50) _tag_neighbors(uid, 2, 0, 50)
if uid % 10 == 0: if uid % 10 == 0:
_tag_neighbors(uid, 3, 1, 60) _tag_neighbors(uid, 3, 0, 60)
if uid % 20 == 0: if uid % 20 == 0:
_tag_neighbors(uid, 5, 1, 70) _tag_neighbors(uid, 5, 0, 70)
if uid % 25 == 0: if uid % 25 == 0:
_tag_neighbors(uid, 10, 1, 80) _tag_neighbors(uid, 10, 0, 80)
if uid % 2 == 1: if uid % 2 == 1:
_tag_neighbors(uid, 1, 2, 90) _tag_neighbors(uid, 1, self.NB_USERS, 90)
if uid % 15 == 0: if uid % 15 == 0:
_tag_neighbors(uid, 5, 2, 100) _tag_neighbors(uid, 5, self.NB_USERS, 100)
if uid % 30 == 0: if uid % 30 == 0:
_tag_neighbors(uid, 4, 2, 110) _tag_neighbors(uid, 4, self.NB_USERS, 110)
PeoplePictureRelation.objects.bulk_create(self.pictures_tags)
def make_important_citizen(self, uid): def make_important_citizen(self, uid):
""" """
@ -285,18 +312,32 @@ class Command(BaseCommand):
u1 = self.users[uid] u1 = self.users[uid]
u2 = self.users[uid - 100] u2 = self.users[uid - 100]
u3 = self.users[uid + 100] u3 = self.users[uid + 100]
u1.godfathers.add(u3) u1.godfathers.add(u2)
u1.godchildren.add(u2) u1.godchildren.add(u3)
self.logger.info(f"{u1} will be important and close to {u2} and {u3}") self.logger.info(f"{u1} will be important and close to {u2} and {u3}")
pictures_tags = []
for p in range( # Mix them with other citizen for more chaos for p in range( # Mix them with other citizen for more chaos
uid - 400, uid - 200 uid - 400, uid - 200
): ):
# users may already be on the pictures # users may already be on the pictures
if not self.picts[p].people.filter(user=u1).exists(): if not self.picts[p].people.filter(user=u1).exists():
PeoplePictureRelation(user=u1, picture=self.picts[p]).save() pictures_tags.append(
PeoplePictureRelation(user=u1, picture=self.picts[p])
)
if not self.picts[p].people.filter(user=u2).exists(): if not self.picts[p].people.filter(user=u2).exists():
PeoplePictureRelation(user=u2, picture=self.picts[p]).save() pictures_tags.append(
if not self.picts[p**2].people.filter(user=u1).exists(): PeoplePictureRelation(user=u2, picture=self.picts[p])
PeoplePictureRelation(user=u1, picture=self.picts[p**2]).save() )
if not self.picts[p**2].people.filter(user=u2).exists(): if not self.picts[p + self.NB_USERS].people.filter(user=u1).exists():
PeoplePictureRelation(user=u2, picture=self.picts[p**2]).save() pictures_tags.append(
PeoplePictureRelation(
user=u1, picture=self.picts[p + self.NB_USERS]
)
)
if not self.picts[p + self.NB_USERS].people.filter(user=u2).exists():
pictures_tags.append(
PeoplePictureRelation(
user=u2, picture=self.picts[p + self.NB_USERS]
)
)
PeoplePictureRelation.objects.bulk_create(pictures_tags)

View File

@ -348,8 +348,20 @@ class Galaxy(models.Model):
f"{rulable_users_count} citizen have been listed. Starting to rule." f"{rulable_users_count} citizen have been listed. Starting to rule."
) )
stars = GalaxyStar.objects.filter(galaxy=self) stars = []
self.logger.info("Creating stars for all citizen")
for user in rulable_users:
star = GalaxyStar(
owner=user, galaxy=self, mass=self.compute_user_score(user)
)
stars.append(star)
GalaxyStar.objects.bulk_create(stars)
stars = {}
for star in GalaxyStar.objects.filter(galaxy=self):
stars[star.owner.id] = star
self.logger.info("Creating lanes between stars")
# Display current speed every $speed_count_frequency users # Display current speed every $speed_count_frequency users
speed_count_frequency = max(rulable_users_count // 10, 1) # ten time at most speed_count_frequency = max(rulable_users_count // 10, 1) # ten time at most
global_avg_speed_accumulator = 0 global_avg_speed_accumulator = 0
@ -360,44 +372,36 @@ class Galaxy(models.Model):
user1_count += 1 user1_count += 1
rulable_users_count2 = len(rulable_users) rulable_users_count2 = len(rulable_users)
star1, created = stars.get_or_create(owner=user1) star1 = stars[user1.id]
if created:
star1.galaxy = self
star1.save()
if star1.mass == 0:
star1.mass = self.compute_user_score(user1)
star1.save()
user_avg_speed = 0 user_avg_speed = 0
user_avg_speed_count = 0 user_avg_speed_count = 0
tstart = time.time() tstart = time.time()
lanes = []
for user2_count, user2 in enumerate(rulable_users, start=1): for user2_count, user2 in enumerate(rulable_users, start=1):
self.logger.debug("") self.logger.debug("")
self.logger.debug( self.logger.debug(
f"\t> Examining '{user1}' ({user1_count}/{rulable_users_count}) with '{user2}' ({user2_count}/{rulable_users_count2})" f"\t> Examining '{user1}' ({user1_count}/{rulable_users_count}) with '{user2}' ({user2_count}/{rulable_users_count2})"
) )
star2, created = stars.get_or_create(owner=user2)
if created: star2 = stars[user2.id]
star2.galaxy = self
star2.save()
users_score, family, pictures, clubs = Galaxy.compute_users_score( users_score, family, pictures, clubs = Galaxy.compute_users_score(
user1, user2 user1, user2
) )
distance = self.scale_distance(users_score) distance = self.scale_distance(users_score)
if distance < 30: # TODO: this needs tuning with real-world data if distance < 30: # TODO: this needs tuning with real-world data
GalaxyLane( lanes.append(
star1=star1, GalaxyLane(
star2=star2, star1=star1,
distance=distance, star2=star2,
family=family, distance=distance,
pictures=pictures, family=family,
clubs=clubs, pictures=pictures,
).save() clubs=clubs,
)
)
if user2_count % speed_count_frequency == 0: if user2_count % speed_count_frequency == 0:
tend = time.time() tend = time.time()
@ -410,6 +414,8 @@ class Galaxy(models.Model):
) )
tstart = time.time() tstart = time.time()
GalaxyLane.objects.bulk_create(lanes)
self.logger.info("") self.logger.info("")
t_global_end = time.time() t_global_end = time.time()
@ -480,15 +486,27 @@ class Galaxy(models.Model):
F("owner__nick_name"), F("owner__nick_name"),
Value(")"), Value(")"),
) )
stars = GalaxyStar.objects.filter(galaxy=self).annotate( stars = (
owner_name=Case( GalaxyStar.objects.filter(galaxy=self)
When(owner__nick_name=None, then=without_nickname), .order_by(
default=with_nickname, "owner"
) # This helps determinism for the tests and doesn't cost much
.annotate(
owner_name=Case(
When(owner__nick_name=None, then=without_nickname),
default=with_nickname,
)
) )
) )
lanes = GalaxyLane.objects.filter(star1__galaxy=self).annotate( lanes = (
star1_owner=F("star1__owner__id"), GalaxyLane.objects.filter(star1__galaxy=self)
star2_owner=F("star2__owner__id"), .order_by(
"star1"
) # This helps determinism for the tests and doesn't cost much
.annotate(
star1_owner=F("star1__owner__id"),
star2_owner=F("star2__owner__id"),
)
) )
json = GalaxyDict( json = GalaxyDict(
nodes=[ nodes=[

File diff suppressed because one or more lines are too long

View File

@ -133,7 +133,7 @@ class GalaxyTest(TestCase):
self.assertDictEqual(expected_scores, computed_scores) self.assertDictEqual(expected_scores, computed_scores)
def test_page_is_citizen(self): def test_page_is_citizen(self):
with self.assertNumQueries(84): with self.assertNumQueries(59):
galaxy = Galaxy.objects.create() galaxy = Galaxy.objects.create()
galaxy.rule(0) # We want all users here galaxy.rule(0) # We want all users here
self.client.login(username="root", password="plop") self.client.login(username="root", password="plop")