Add fs::MTime newtype to encourage != instead of > (#20830)

See ["mtime comparison considered
harmful"](https://apenwarr.ca/log/20181113) for details of why
comparators other than equality/inequality should not be used with
mtime.

Release Notes:

- N/A
This commit is contained in:
Michael Sloan 2024-11-21 19:21:18 -07:00 committed by GitHub
parent 477c6e6833
commit 14ea4621ab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 155 additions and 112 deletions

View file

@ -7,6 +7,7 @@ use anyhow::{anyhow, Context as _, Result};
use collections::Bound;
use feature_flags::FeatureFlagAppExt;
use fs::Fs;
use fs::MTime;
use futures::stream::StreamExt;
use futures_batch::ChunksTimeoutStreamExt;
use gpui::{AppContext, Model, Task};
@ -17,14 +18,7 @@ use project::{Entry, UpdatedEntriesSet, Worktree};
use serde::{Deserialize, Serialize};
use smol::channel;
use smol::future::FutureExt;
use std::{
cmp::Ordering,
future::Future,
iter,
path::Path,
sync::Arc,
time::{Duration, SystemTime},
};
use std::{cmp::Ordering, future::Future, iter, path::Path, sync::Arc, time::Duration};
use util::ResultExt;
use worktree::Snapshot;
@ -451,7 +445,7 @@ struct ChunkFiles {
pub struct ChunkedFile {
pub path: Arc<Path>,
pub mtime: Option<SystemTime>,
pub mtime: Option<MTime>,
pub handle: IndexingEntryHandle,
pub text: String,
pub chunks: Vec<Chunk>,
@ -465,7 +459,7 @@ pub struct EmbedFiles {
#[derive(Debug, Serialize, Deserialize)]
pub struct EmbeddedFile {
pub path: Arc<Path>,
pub mtime: Option<SystemTime>,
pub mtime: Option<MTime>,
pub chunks: Vec<EmbeddedChunk>,
}

View file

@ -1,5 +1,6 @@
use collections::HashMap;
use std::{path::Path, sync::Arc, time::SystemTime};
use fs::MTime;
use std::{path::Path, sync::Arc};
const MAX_FILES_BEFORE_RESUMMARIZE: usize = 4;
const MAX_BYTES_BEFORE_RESUMMARIZE: u64 = 1_000_000; // 1 MB
@ -7,14 +8,14 @@ const MAX_BYTES_BEFORE_RESUMMARIZE: u64 = 1_000_000; // 1 MB
#[derive(Default, Debug)]
pub struct SummaryBacklog {
/// Key: path to a file that needs summarization, but that we haven't summarized yet. Value: that file's size on disk, in bytes, and its mtime.
files: HashMap<Arc<Path>, (u64, Option<SystemTime>)>,
files: HashMap<Arc<Path>, (u64, Option<MTime>)>,
/// Cache of the sum of all values in `files`, so we don't have to traverse the whole map to check if we're over the byte limit.
total_bytes: u64,
}
impl SummaryBacklog {
/// Store the given path in the backlog, along with how many bytes are in it.
pub fn insert(&mut self, path: Arc<Path>, bytes_on_disk: u64, mtime: Option<SystemTime>) {
pub fn insert(&mut self, path: Arc<Path>, bytes_on_disk: u64, mtime: Option<MTime>) {
let (prev_bytes, _) = self
.files
.insert(path, (bytes_on_disk, mtime))
@ -34,7 +35,7 @@ impl SummaryBacklog {
/// Remove all the entries in the backlog and return the file paths as an iterator.
#[allow(clippy::needless_lifetimes)] // Clippy thinks this 'a can be elided, but eliding it gives a compile error
pub fn drain<'a>(&'a mut self) -> impl Iterator<Item = (Arc<Path>, Option<SystemTime>)> + 'a {
pub fn drain<'a>(&'a mut self) -> impl Iterator<Item = (Arc<Path>, Option<MTime>)> + 'a {
self.total_bytes = 0;
self.files

View file

@ -1,6 +1,6 @@
use anyhow::{anyhow, Context as _, Result};
use arrayvec::ArrayString;
use fs::Fs;
use fs::{Fs, MTime};
use futures::{stream::StreamExt, TryFutureExt};
use futures_batch::ChunksTimeoutStreamExt;
use gpui::{AppContext, Model, Task};
@ -21,7 +21,7 @@ use std::{
future::Future,
path::Path,
sync::Arc,
time::{Duration, Instant, SystemTime},
time::{Duration, Instant},
};
use util::ResultExt;
use worktree::Snapshot;
@ -39,7 +39,7 @@ struct UnsummarizedFile {
// Path to the file on disk
path: Arc<Path>,
// The mtime of the file on disk
mtime: Option<SystemTime>,
mtime: Option<MTime>,
// BLAKE3 hash of the source file's contents
digest: Blake3Digest,
// The source file's contents
@ -51,7 +51,7 @@ struct SummarizedFile {
// Path to the file on disk
path: String,
// The mtime of the file on disk
mtime: Option<SystemTime>,
mtime: Option<MTime>,
// BLAKE3 hash of the source file's contents
digest: Blake3Digest,
// The LLM's summary of the file's contents
@ -63,7 +63,7 @@ pub type Blake3Digest = ArrayString<{ blake3::OUT_LEN * 2 }>;
#[derive(Debug, Serialize, Deserialize)]
pub struct FileDigest {
pub mtime: Option<SystemTime>,
pub mtime: Option<MTime>,
pub digest: Blake3Digest,
}
@ -88,7 +88,7 @@ pub struct SummaryIndex {
}
struct Backlogged {
paths_to_digest: channel::Receiver<Vec<(Arc<Path>, Option<SystemTime>)>>,
paths_to_digest: channel::Receiver<Vec<(Arc<Path>, Option<MTime>)>>,
task: Task<Result<()>>,
}
@ -319,7 +319,7 @@ impl SummaryIndex {
digest_db: heed::Database<Str, SerdeBincode<FileDigest>>,
txn: &RoTxn<'_>,
entry: &Entry,
) -> Vec<(Arc<Path>, Option<SystemTime>)> {
) -> Vec<(Arc<Path>, Option<MTime>)> {
let entry_db_key = db_key_for_path(&entry.path);
match digest_db.get(&txn, &entry_db_key) {
@ -414,7 +414,7 @@ impl SummaryIndex {
fn digest_files(
&self,
paths: channel::Receiver<Vec<(Arc<Path>, Option<SystemTime>)>>,
paths: channel::Receiver<Vec<(Arc<Path>, Option<MTime>)>>,
worktree_abs_path: Arc<Path>,
cx: &AppContext,
) -> MightNeedSummaryFiles {
@ -646,7 +646,7 @@ impl SummaryIndex {
let start = Instant::now();
let backlogged = {
let (tx, rx) = channel::bounded(512);
let needs_summary: Vec<(Arc<Path>, Option<SystemTime>)> = {
let needs_summary: Vec<(Arc<Path>, Option<MTime>)> = {
let mut backlog = self.backlog.lock();
backlog.drain().collect()