perf(labrinth/random_projects_get): speed up through spatial queries according to profiling results

This commit is contained in:
Alejandro González 2025-06-09 17:15:55 +02:00
parent 06f1df1995
commit 82b4f11a13
No known key found for this signature in database
5 changed files with 48 additions and 29 deletions

View File

@ -0,0 +1,23 @@
{
"db_name": "PostgreSQL",
"query": "WITH random_id_point AS (\n SELECT POINT(RANDOM() * ((SELECT MAX(id) FROM mods) - (SELECT MIN(id) FROM mods) + 1) + (SELECT MIN(id) FROM mods), 0) AS point\n )\n SELECT id FROM mods\n WHERE status = ANY($1)\n ORDER BY POINT(id, 0) <-> (SELECT point FROM random_id_point)\n LIMIT $2",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
}
],
"parameters": {
"Left": [
"TextArray",
"Int8"
]
},
"nullable": [
false
]
},
"hash": "0d01a3991e7551a8b7936bf8f4cc1760d2e89af99dd71849eda35d6c6820aa43"
}

View File

@ -1,23 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "SELECT id FROM mods WHERE status = ANY($1)\n ORDER BY id\n LIMIT $2\n OFFSET GREATEST(ROUND(RANDOM() * (SELECT COUNT(*) FROM mods WHERE status = ANY($1)))::int8 - $2, 0)",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
}
],
"parameters": {
"Left": [
"TextArray",
"Int8"
]
},
"nullable": [
false
]
},
"hash": "1d017ac5f5b1e76ec241533fd4d061c79f4e6d2f1701e727a7474fd5029a5492"
}

View File

@ -0,0 +1,12 @@
-- The spatial query for retrieving random searchable projects is greatly sped
-- up by this index on a fixture of 1M mods, bringing down the total cost of
-- the query plan and runtime to be comparable to primary key lookups. See the
-- `labrinth::routes::v3::projects::random_projects_get` function and the
-- previous 20250608183828_random-project-index.sql migration for more details.
--
-- That previous migration created a non-spatial index for the status column which
-- does not get used in the new spatial query, but may still be useful for other
-- queries that filter mods by status.
CREATE INDEX mods_searchable_ids_gist ON mods USING gist (POINT(id, 0))
WHERE status = ANY(ARRAY['approved', 'archived']);

View File

@ -519,6 +519,9 @@ impl ProjectStatus {
}
// Project can be displayed in search
// IMPORTANT: if this is changed, make sure to update the `mods_searchable_ids_gist`
// index in the DB to keep random project queries fast (see the
// `20250609134334_spatial-random-project-index.sql` migration)
pub fn is_searchable(&self) -> bool {
matches!(self, ProjectStatus::Approved | ProjectStatus::Archived)
}

View File

@ -94,12 +94,16 @@ pub async fn random_projects_get(
})?;
let project_ids = sqlx::query!(
// IDs are randomly generated (see the `generate_ids` macro), so ID order is
// equivalent to a random order
"SELECT id FROM mods WHERE status = ANY($1)
ORDER BY id
LIMIT $2
OFFSET GREATEST(ROUND(RANDOM() * (SELECT COUNT(*) FROM mods WHERE status = ANY($1)))::int8 - $2, 0)",
// IDs are randomly generated (see the `generate_ids` macro), so fetching a
// number of mods nearest to a random point in the ID space is equivalent to
// random sampling
"WITH random_id_point AS (
SELECT POINT(RANDOM() * ((SELECT MAX(id) FROM mods) - (SELECT MIN(id) FROM mods) + 1) + (SELECT MIN(id) FROM mods), 0) AS point
)
SELECT id FROM mods
WHERE status = ANY($1)
ORDER BY POINT(id, 0) <-> (SELECT point FROM random_id_point)
LIMIT $2",
&*crate::models::projects::ProjectStatus::iterator()
.filter(|x| x.is_searchable())
.map(|x| x.to_string())