Parallelize log search (#1874)

This commit is contained in:
extrawurst 2023-09-04 20:55:17 +02:00 committed by GitHub
parent 5808515853
commit 0e1d83fb02
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 76 additions and 20 deletions

View file

@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
## Unreleased ## Unreleased
### Changed
* parallelise log search - performance gain ~100% ([#1869](https://github.com/extrawurst/gitui/issues/1869))
## [0.24.2] - 2023-09-03 ## [0.24.2] - 2023-09-03
### Fixes ### Fixes

11
Cargo.lock generated
View file

@ -109,6 +109,7 @@ dependencies = [
"log", "log",
"openssl-sys", "openssl-sys",
"pretty_assertions", "pretty_assertions",
"rayon",
"rayon-core", "rayon-core",
"scopetime", "scopetime",
"serde", "serde",
@ -1310,6 +1311,16 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "rayon"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d2df5196e37bcc87abebc0053e20787d73847bb33134a69841207dd0a47f03b"
dependencies = [
"either",
"rayon-core",
]
[[package]] [[package]]
name = "rayon-core" name = "rayon-core"
version = "1.11.0" version = "1.11.0"

View file

@ -22,6 +22,7 @@ log = "0.4"
# git2 = { git="https://github.com/extrawurst/git2-rs.git", rev="fc13dcc", features = ["vendored-openssl"]} # git2 = { git="https://github.com/extrawurst/git2-rs.git", rev="fc13dcc", features = ["vendored-openssl"]}
# pinning to vendored openssl, using the git2 feature this gets lost with new resolver # pinning to vendored openssl, using the git2 feature this gets lost with new resolver
openssl-sys = { version = '0.9', features = ["vendored"], optional = true } openssl-sys = { version = '0.9', features = ["vendored"], optional = true }
rayon = "1.7"
rayon-core = "1.11" rayon-core = "1.11"
scopetime = { path = "../scopetime", version = "0.1" } scopetime = { path = "../scopetime", version = "0.1" }
serde = { version = "1.0", features = ["derive"] } serde = { version = "1.0", features = ["derive"] }

View file

@ -84,6 +84,10 @@ pub enum Error {
/// ///
#[error("not on a branch")] #[error("not on a branch")]
NoBranch, NoBranch,
///
#[error("rayon error: {0}")]
ThreadPool(#[from] rayon_core::ThreadPoolBuildError),
} }
/// ///

View file

@ -1,3 +1,8 @@
use rayon::{
prelude::ParallelIterator,
slice::{ParallelSlice, ParallelSliceMut},
};
use crate::{ use crate::{
asyncjob::{AsyncJob, RunParams}, asyncjob::{AsyncJob, RunParams},
error::Result, error::Result,
@ -5,7 +10,7 @@ use crate::{
AsyncGitNotification, ProgressPercent, AsyncGitNotification, ProgressPercent,
}; };
use std::{ use std::{
sync::{Arc, Mutex}, sync::{atomic::AtomicUsize, Arc, Mutex},
time::{Duration, Instant}, time::{Duration, Instant},
}; };
@ -69,41 +74,73 @@ impl AsyncCommitFilterJob {
commits: Vec<CommitId>, commits: Vec<CommitId>,
params: &RunParams<AsyncGitNotification, ProgressPercent>, params: &RunParams<AsyncGitNotification, ProgressPercent>,
) -> JobState { ) -> JobState {
let response = sync::repo(repo_path) let result = self
.map(|repo| self.filter_commits(&repo, commits, params)) .filter_commits(repo_path, commits, params)
.map(|(start, result)| CommitFilterResult { .map(|(start, result)| CommitFilterResult {
result, result,
duration: start.elapsed(), duration: start.elapsed(),
}); });
JobState::Response(response) JobState::Response(result)
} }
fn filter_commits( fn filter_commits(
&self, &self,
repo: &git2::Repository, repo_path: &RepoPath,
commits: Vec<CommitId>, commits: Vec<CommitId>,
params: &RunParams<AsyncGitNotification, ProgressPercent>, params: &RunParams<AsyncGitNotification, ProgressPercent>,
) -> (Instant, Vec<CommitId>) { ) -> Result<(Instant, Vec<CommitId>)> {
let total_amount = commits.len(); let total_amount = commits.len();
let start = Instant::now(); let start = Instant::now();
let result = commits //note: for some reason >4 threads degrades search performance
.into_iter() let pool =
.enumerate() rayon::ThreadPoolBuilder::new().num_threads(4).build()?;
.filter_map(|(idx, c)| {
Self::update_progress(
params,
ProgressPercent::new(idx, total_amount),
);
(*self.filter)(repo, &c) let idx = AtomicUsize::new(0);
.ok()
.and_then(|res| res.then_some(c))
})
.collect::<Vec<_>>();
(start, result) let mut result = pool.install(|| {
commits
.into_iter()
.enumerate()
.collect::<Vec<(usize, CommitId)>>()
.par_chunks(1000)
.filter_map(|c| {
//TODO: error log repo open errors
sync::repo(repo_path).ok().map(|repo| {
c.iter()
.filter_map(|(e, c)| {
let idx = idx.fetch_add(
1,
std::sync::atomic::Ordering::Relaxed,
);
Self::update_progress(
params,
ProgressPercent::new(
idx,
total_amount,
),
);
(*self.filter)(&repo, c)
.ok()
.and_then(|res| {
res.then_some((*e, *c))
})
})
.collect::<Vec<_>>()
})
})
.flatten()
.collect::<Vec<_>>()
});
result.par_sort_by(|a, b| a.0.cmp(&b.0));
let result = result.into_iter().map(|c| c.1).collect();
Ok((start, result))
} }
fn update_progress( fn update_progress(