From 06dfe42f799119a4c35730845e3d93e50134ec04 Mon Sep 17 00:00:00 2001 From: extrawurst Date: Wed, 6 Sep 2023 12:21:33 +0200 Subject: [PATCH] file history rename detection find more renames that git-log --follow would simplify --- asyncgit/src/sync/commit_files.rs | 114 +++++++++++++++++++++++++++-- asyncgit/src/sync/commit_filter.rs | 57 ++++++++++++--- asyncgit/src/sync/logwalker.rs | 46 ++++++++++-- asyncgit/src/sync/mod.rs | 7 ++ asyncgit/src/sync/repository.rs | 5 ++ src/components/file_revlog.rs | 5 +- 6 files changed, 212 insertions(+), 22 deletions(-) diff --git a/asyncgit/src/sync/commit_files.rs b/asyncgit/src/sync/commit_files.rs index 772b910e..aff11001 100644 --- a/asyncgit/src/sync/commit_files.rs +++ b/asyncgit/src/sync/commit_files.rs @@ -3,10 +3,10 @@ use super::{diff::DiffOptions, CommitId, RepoPath}; use crate::{ error::Result, - sync::{get_stashes, repository::repo}, - StatusItem, StatusItemType, + sync::{get_stashes, repository::repo, utils::bytes2string}, + Error, StatusItem, StatusItemType, }; -use git2::{Diff, Repository}; +use git2::{Diff, DiffFindOptions, Repository}; use scopetime::scope_time; use std::{cmp::Ordering, collections::HashSet}; @@ -153,14 +153,94 @@ pub(crate) fn get_commit_diff<'a>( Ok(diff) } +/// +pub(crate) fn commit_contains_file( + repo: &Repository, + id: CommitId, + pathspec: &str, +) -> Result> { + let commit = repo.find_commit(id.into())?; + let commit_tree = commit.tree()?; + + let parent = if commit.parent_count() > 0 { + repo.find_commit(commit.parent_id(0)?) + .ok() + .and_then(|c| c.tree().ok()) + } else { + None + }; + + let mut opts = git2::DiffOptions::new(); + opts.pathspec(pathspec.to_string()) + .skip_binary_check(true) + .context_lines(0); + + let diff = repo.diff_tree_to_tree( + parent.as_ref(), + Some(&commit_tree), + Some(&mut opts), + )?; + + if diff.stats()?.files_changed() == 0 { + return Ok(None); + } + + Ok(diff.deltas().map(|delta| delta.status()).next()) +} + +/// +pub(crate) fn commit_detect_file_rename( + repo: &Repository, + id: CommitId, + pathspec: &str, +) -> Result> { + scope_time!("commit_detect_file_rename"); + + let mut diff = get_commit_diff(repo, id, None, None, None)?; + + diff.find_similar(Some( + DiffFindOptions::new() + .renames(true) + .renames_from_rewrites(true) + .rename_from_rewrite_threshold(100), + ))?; + + let current_path = std::path::Path::new(pathspec); + + for delta in diff.deltas() { + let new_file_matches = delta + .new_file() + .path() + .map(|path| path == current_path) + .unwrap_or_default(); + + if new_file_matches + && matches!(delta.status(), git2::Delta::Renamed) + { + return Ok(Some(bytes2string( + delta.old_file().path_bytes().ok_or_else(|| { + Error::Generic(String::from("old_file error")) + })?, + )?)); + } + } + + Ok(None) +} + #[cfg(test)] mod tests { use super::get_commit_files; use crate::{ error::Result, sync::{ - commit, stage_add_file, stash_save, - tests::{get_statuses, repo_init}, + commit, + commit_files::commit_detect_file_rename, + stage_add_all, stage_add_file, stash_save, + tests::{ + get_statuses, rename_file, repo_init, + repo_init_empty, write_commit_file, + }, RepoPath, }, StatusItemType, @@ -240,4 +320,28 @@ mod tests { Ok(()) } + + #[test] + fn test_rename_detection() { + let (td, repo) = repo_init_empty().unwrap(); + let repo_path: RepoPath = td.path().into(); + + write_commit_file(&repo, "foo.txt", "foobar", "c1"); + rename_file(&repo, "foo.txt", "bar.txt"); + stage_add_all( + &repo_path, + "*", + Some(crate::sync::ShowUntrackedFilesConfig::All), + ) + .unwrap(); + let rename_commit = commit(&repo_path, "c2").unwrap(); + + let rename = commit_detect_file_rename( + &repo, + rename_commit, + "bar.txt", + ) + .unwrap(); + assert_eq!(rename, Some(String::from("foo.txt"))); + } } diff --git a/asyncgit/src/sync/commit_filter.rs b/asyncgit/src/sync/commit_filter.rs index 7fdb7818..de70ff1e 100644 --- a/asyncgit/src/sync/commit_filter.rs +++ b/asyncgit/src/sync/commit_filter.rs @@ -1,9 +1,14 @@ -use super::{commit_files::get_commit_diff, CommitId}; -use crate::error::Result; +use super::{ + commit_files::{commit_contains_file, get_commit_diff}, + CommitId, +}; +use crate::{ + error::Result, sync::commit_files::commit_detect_file_rename, +}; use bitflags::bitflags; use fuzzy_matcher::FuzzyMatcher; use git2::{Diff, Repository}; -use std::sync::Arc; +use std::sync::{Arc, RwLock}; /// pub type SharedCommitFilterFn = Arc< @@ -11,22 +16,52 @@ pub type SharedCommitFilterFn = Arc< >; /// -pub fn diff_contains_file(file_path: String) -> SharedCommitFilterFn { +pub fn diff_contains_file( + file_path: Arc>, +) -> SharedCommitFilterFn { Arc::new(Box::new( move |repo: &Repository, commit_id: &CommitId| -> Result { - let diff = get_commit_diff( + let current_file_path = file_path.read()?.to_string(); + + if let Some(delta) = commit_contains_file( repo, *commit_id, - Some(file_path.clone()), - None, - None, - )?; + current_file_path.as_str(), + )? { + //note: only do rename test in case file looks like being added in this commit - let contains_file = diff.deltas().len() > 0; + // log::info!( + // "edit: [{}] ({:?}) - {}", + // commit_id.get_short_string(), + // delta, + // ¤t_file_path + // ); - Ok(contains_file) + if matches!(delta, git2::Delta::Added) { + let rename = commit_detect_file_rename( + repo, + *commit_id, + current_file_path.as_str(), + )?; + + if let Some(old_name) = rename { + // log::info!( + // "rename: [{}] {:?} <- {:?}", + // commit_id.get_short_string(), + // current_file_path, + // old_name, + // ); + + (*file_path.write()?) = old_name; + } + } + + return Ok(true); + } + + Ok(false) }, )) } diff --git a/asyncgit/src/sync/logwalker.rs b/asyncgit/src/sync/logwalker.rs index b13a9e5a..5ef608bd 100644 --- a/asyncgit/src/sync/logwalker.rs +++ b/asyncgit/src/sync/logwalker.rs @@ -113,16 +113,17 @@ mod tests { use super::*; use crate::error::Result; use crate::sync::commit_filter::{SearchFields, SearchOptions}; - use crate::sync::tests::write_commit_file; + use crate::sync::tests::{rename_file, write_commit_file}; use crate::sync::{ commit, get_commits_info, stage_add_file, tests::repo_init_empty, }; use crate::sync::{ - diff_contains_file, filter_commit_by_search, LogFilterSearch, - LogFilterSearchOptions, RepoPath, + diff_contains_file, filter_commit_by_search, stage_add_all, + LogFilterSearch, LogFilterSearchOptions, RepoPath, }; use pretty_assertions::assert_eq; + use std::sync::{Arc, RwLock}; use std::{fs::File, io::Write, path::Path}; #[test] @@ -207,7 +208,8 @@ mod tests { let _third_commit_id = commit(&repo_path, "commit3").unwrap(); - let diff_contains_baz = diff_contains_file("baz".into()); + let file_path = Arc::new(RwLock::new(String::from("baz"))); + let diff_contains_baz = diff_contains_file(file_path); let mut items = Vec::new(); let mut walker = LogWalker::new(&repo, 100)? @@ -222,7 +224,8 @@ mod tests { assert_eq!(items.len(), 0); - let diff_contains_bar = diff_contains_file("bar".into()); + let file_path = Arc::new(RwLock::new(String::from("bar"))); + let diff_contains_bar = diff_contains_file(file_path); let mut items = Vec::new(); let mut walker = LogWalker::new(&repo, 100)? @@ -280,4 +283,37 @@ mod tests { assert_eq!(items.len(), 2); } + + #[test] + fn test_logwalker_with_filter_rename() { + let (td, repo) = repo_init_empty().unwrap(); + let repo_path: RepoPath = td.path().into(); + + write_commit_file(&repo, "foo.txt", "foobar", "c1"); + rename_file(&repo, "foo.txt", "bar.txt"); + stage_add_all( + &repo_path, + "*", + Some(crate::sync::ShowUntrackedFilesConfig::All), + ) + .unwrap(); + let rename_commit = commit(&repo_path, "c2").unwrap(); + + write_commit_file(&repo, "bar.txt", "new content", "c3"); + + let file_path = + Arc::new(RwLock::new(String::from("bar.txt"))); + let log_filter = diff_contains_file(file_path.clone()); + + let mut items = Vec::new(); + let mut walker = LogWalker::new(&repo, 100) + .unwrap() + .filter(Some(log_filter)); + walker.read(&mut items).unwrap(); + + assert_eq!(items.len(), 3); + assert_eq!(items[1], rename_commit); + + assert_eq!(file_path.read().unwrap().as_str(), "foo.txt"); + } } diff --git a/asyncgit/src/sync/mod.rs b/asyncgit/src/sync/mod.rs index 909bea6f..ff9e6e6a 100644 --- a/asyncgit/src/sync/mod.rs +++ b/asyncgit/src/sync/mod.rs @@ -143,6 +143,13 @@ mod tests { }); } + pub fn rename_file(repo: &Repository, old: &str, new: &str) { + let dir = repo.workdir().unwrap(); + let old = dir.join(old); + let new = dir.join(new); + std::fs::rename(old, new).unwrap(); + } + /// write, stage and commit a file pub fn write_commit_file( repo: &Repository, diff --git a/asyncgit/src/sync/repository.rs b/asyncgit/src/sync/repository.rs index 24620d4c..7fee7477 100644 --- a/asyncgit/src/sync/repository.rs +++ b/asyncgit/src/sync/repository.rs @@ -47,6 +47,11 @@ impl From<&str> for RepoPath { Self::Path(PathBuf::from(p)) } } +impl From<&Path> for RepoPath { + fn from(p: &Path) -> Self { + Self::Path(PathBuf::from(p)) + } +} pub fn repo(repo_path: &RepoPath) -> Result { let repo = Repository::open_ext( diff --git a/src/components/file_revlog.rs b/src/components/file_revlog.rs index 6ce4eeac..c18fac72 100644 --- a/src/components/file_revlog.rs +++ b/src/components/file_revlog.rs @@ -1,3 +1,5 @@ +use std::sync::{Arc, RwLock}; + use super::utils::logitems::ItemBatch; use super::{visibility_blocking, BlameFileOpen, InspectCommitOpen}; use crate::keys::key_match; @@ -116,7 +118,8 @@ impl FileRevlogComponent { pub fn open(&mut self, open_request: FileRevOpen) -> Result<()> { self.open_request = Some(open_request.clone()); - let filter = diff_contains_file(open_request.file_path); + let file_name = Arc::new(RwLock::new(open_request.file_path)); + let filter = diff_contains_file(file_name); self.git_log = Some(AsyncLog::new( self.repo_path.borrow().clone(), &self.sender,