collab: Seed GitHub users from static data (#18301)

This PR updates the collab seed script to seed the GitHub users from a
set of static data.

This removes the need to hit the GitHub API to retrieve these users.

Release Notes:

- N/A
This commit is contained in:
Marshall Bowers 2024-09-24 16:35:09 -04:00 committed by GitHub
parent 692590bff4
commit d2ffad0f34
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 629 additions and 66 deletions

View file

@ -4,10 +4,13 @@ use anyhow::Context;
use chrono::{DateTime, Utc};
use db::Database;
use serde::{de::DeserializeOwned, Deserialize};
use std::{fmt::Write, fs, path::Path};
use std::{fs, path::Path};
use crate::Config;
/// A GitHub user.
///
/// This representation corresponds to the entries in the `seed/github_users.json` file.
#[derive(Debug, Deserialize)]
struct GithubUser {
id: i32,
@ -16,24 +19,12 @@ struct GithubUser {
created_at: DateTime<Utc>,
}
/// A GitHub user returned from the [List users](https://docs.github.com/en/rest/users/users?apiVersion=2022-11-28#list-users) endpoint.
///
/// Notably, this data type does not have the `created_at` field.
#[derive(Debug, Deserialize)]
struct ListGithubUser {
id: i32,
login: String,
email: Option<String>,
}
#[derive(Deserialize)]
struct SeedConfig {
/// Which users to create as admins.
admins: Vec<String>,
/// Which channels to create (all admins are invited to all channels).
channels: Vec<String>,
/// Number of random users to create from the Github API.
number_of_users: Option<usize>,
}
pub async fn seed(config: &Config, db: &Database, force: bool) -> anyhow::Result<()> {
@ -126,57 +117,29 @@ pub async fn seed(config: &Config, db: &Database, force: bool) -> anyhow::Result
}
}
// TODO: Fix this later
if let Some(number_of_users) = seed_config.number_of_users {
// Fetch 100 other random users from GitHub and insert them into the database
// (for testing autocompleters, etc.)
let mut user_count = db
.get_all_users(0, 200)
let github_users_filepath = seed_path.parent().unwrap().join("seed/github_users.json");
let github_users: Vec<GithubUser> =
serde_json::from_str(&fs::read_to_string(github_users_filepath)?)?;
for github_user in github_users {
log::info!("Seeding {:?} from GitHub", github_user.login);
let user = db
.get_or_create_user_by_github_account(
&github_user.login,
github_user.id,
github_user.email.as_deref(),
github_user.created_at,
None,
)
.await
.expect("failed to load users from db")
.len();
let mut last_user_id = None;
while user_count < number_of_users {
let mut uri = "https://api.github.com/users?per_page=100".to_string();
if let Some(last_user_id) = last_user_id {
write!(&mut uri, "&since={}", last_user_id).unwrap();
}
let users = fetch_github::<Vec<ListGithubUser>>(&client, &uri).await;
.expect("failed to insert user");
for github_user in users {
log::info!("Seeding {:?} from GitHub", github_user.login);
// Fetch the user to get their `created_at` timestamp, since it
// isn't on the list response.
let github_user: GithubUser = fetch_github(
&client,
&format!("https://api.github.com/user/{}", github_user.id),
)
.await;
last_user_id = Some(github_user.id);
user_count += 1;
let user = db
.get_or_create_user_by_github_account(
&github_user.login,
github_user.id,
github_user.email.as_deref(),
github_user.created_at,
None,
)
.await
.expect("failed to insert user");
for flag in &flags {
db.add_user_flag(user.id, *flag).await.context(format!(
"Unable to enable flag '{}' for user '{}'",
flag, user.id
))?;
}
// Sleep to avoid getting rate-limited by GitHub.
tokio::time::sleep(std::time::Duration::from_millis(250)).await;
}
for flag in &flags {
db.add_user_flag(user.id, *flag).await.context(format!(
"Unable to enable flag '{}' for user '{}'",
flag, user.id
))?;
}
}