Fix random_projects route not returning the requested number of projects (#3758)

* Fix random_projects route not returning the requested number of projects

* fix(labrinth): further improve random project route SQL query

* chore: fix typo in comment

* tweak(labrinth): more apparent and fast randomness for `random_projects_get`

* tweak(labrinth): even better random projects query

* chore: address formatting review

---------

Co-authored-by: Alejandro González <me@alegon.dev>
This commit is contained in:
Emma Alexia 2025-06-08 19:49:39 -04:00 committed by GitHub
parent 3489771d2e
commit 06f1df1995
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 42 additions and 27 deletions

View File

@ -1,23 +0,0 @@
{
"db_name": "PostgreSQL",
"query": "\n SELECT id FROM mods TABLESAMPLE SYSTEM_ROWS($1) WHERE status = ANY($2)\n ",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
}
],
"parameters": {
"Left": [
"Int8",
"TextArray"
]
},
"nullable": [
false
]
},
"hash": "1cefe4924d3c1f491739858ce844a22903d2dbe26f255219299f1833a10ce3d7"
}

View File

@ -0,0 +1,23 @@
{
"db_name": "PostgreSQL",
"query": "SELECT id FROM mods WHERE status = ANY($1)\n ORDER BY id\n LIMIT $2\n OFFSET GREATEST(ROUND(RANDOM() * (SELECT COUNT(*) FROM mods WHERE status = ANY($1)))::int8 - $2, 0)",
"describe": {
"columns": [
{
"ordinal": 0,
"name": "id",
"type_info": "Int8"
}
],
"parameters": {
"Left": [
"TextArray",
"Int8"
]
},
"nullable": [
false
]
},
"hash": "1d017ac5f5b1e76ec241533fd4d061c79f4e6d2f1701e727a7474fd5029a5492"
}

View File

@ -0,0 +1,12 @@
-- This index substantially brings down the cost of the query plan for the
-- hot query at `labrinth::routes::v3::projects::random_projects_get`, from
-- 354.04..363.39 to 171.33..180.68 (~2x improvement).
--
-- The numbers above were calculated in a clean PostgreSQL 17.5.0 container
-- with 10k mods created with the SQL below.
--
-- WITH seq AS (SELECT n FROM GENERATE_SERIES(1, 10000) AS n)
-- INSERT INTO mods (id, team_id, name, summary, icon_url, license_url, slug, status)
-- SELECT n, 1, n, '', '', '', n, (ARRAY['approved', 'pending'])[n % 2 + 1] from seq;
CREATE INDEX mods_status ON mods(status);

View File

@ -94,14 +94,17 @@ pub async fn random_projects_get(
})?;
let project_ids = sqlx::query!(
"
SELECT id FROM mods TABLESAMPLE SYSTEM_ROWS($1) WHERE status = ANY($2)
",
count.count as i32,
// IDs are randomly generated (see the `generate_ids` macro), so ID order is
// equivalent to a random order
"SELECT id FROM mods WHERE status = ANY($1)
ORDER BY id
LIMIT $2
OFFSET GREATEST(ROUND(RANDOM() * (SELECT COUNT(*) FROM mods WHERE status = ANY($1)))::int8 - $2, 0)",
&*crate::models::projects::ProjectStatus::iterator()
.filter(|x| x.is_searchable())
.map(|x| x.to_string())
.collect::<Vec<String>>(),
count.count as i32,
)
.fetch(&**pool)
.map_ok(|m| db_ids::DBProjectId(m.id))