diff --git a/Cargo.lock b/Cargo.lock index d8aee88db33..e756e623c00 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1325,6 +1325,7 @@ dependencies = [ "gix-actor 0.33.0", "gix-archive", "gix-attributes 0.23.0", + "gix-blame", "gix-command", "gix-commitgraph 0.25.0", "gix-config", @@ -1497,7 +1498,17 @@ dependencies = [ name = "gix-blame" version = "0.0.0" dependencies = [ + "gix-diff", + "gix-filter", + "gix-fs 0.12.0", + "gix-hash 0.15.0", + "gix-index 0.36.0", + "gix-object 0.45.0", + "gix-odb", + "gix-ref 0.48.0", "gix-testtools", + "gix-traverse 0.42.0", + "gix-worktree 0.37.0", ] [[package]] diff --git a/README.md b/README.md index 8d2b8e2e6e0..0d6cbd7f35a 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ is usable to some extent. * `gitoxide-core` * **very early** _(possibly without any documentation and many rough edges)_ * [gix-merge](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-merge) + * [gix-blame](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-blame) * **idea** _(just a name placeholder)_ * [gix-note](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-note) * [gix-fetchhead](https://github.com/GitoxideLabs/gitoxide/blob/main/crate-status.md#gix-fetchhead) diff --git a/crate-status.md b/crate-status.md index 502583127b8..7b813287503 100644 --- a/crate-status.md +++ b/crate-status.md @@ -356,6 +356,15 @@ Check out the [performance discussion][gix-diff-performance] as well. * [x] API documentation * [ ] Examples +### gix-blame + +* [ ] commit-annotations for a single file + - [ ] progress + - [ ] interruptability + - [ ] streaming +* [x] API documentation + * [ ] Examples + ### gix-traverse Check out the [performance discussion][gix-traverse-performance] as well. diff --git a/gitoxide-core/Cargo.toml b/gitoxide-core/Cargo.toml index 83fe9d05957..4ce67a565a1 100644 --- a/gitoxide-core/Cargo.toml +++ b/gitoxide-core/Cargo.toml @@ -49,7 +49,7 @@ serde = ["gix/serde", "dep:serde_json", "dep:serde", "bytesize/serde"] [dependencies] # deselect everything else (like "performance") as this should be controllable by the parent application. -gix = { version = "^0.67.0", path = "../gix", default-features = false, features = ["blob-merge", "blob-diff", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] } +gix = { version = "^0.67.0", path = "../gix", default-features = false, features = ["blob-merge", "blob-diff", "blame", "revision", "mailmap", "excludes", "attributes", "worktree-mutation", "credentials", "interrupt", "status", "dirwalk"] } gix-pack-for-configuration-only = { package = "gix-pack", version = "^0.54.0", path = "../gix-pack", default-features = false, features = ["pack-cache-lru-dynamic", "pack-cache-lru-static", "generate", "streaming-input"] } gix-transport-configuration-only = { package = "gix-transport", version = "^0.43.0", path = "../gix-transport", default-features = false } gix-archive-for-configuration-only = { package = "gix-archive", version = "^0.16.0", path = "../gix-archive", optional = true, features = ["tar", "tar_gz"] } diff --git a/gitoxide-core/src/repository/blame.rs b/gitoxide-core/src/repository/blame.rs new file mode 100644 index 00000000000..0129e83b20e --- /dev/null +++ b/gitoxide-core/src/repository/blame.rs @@ -0,0 +1,62 @@ +use std::{ffi::OsStr, path::PathBuf, str::Lines}; + +use anyhow::anyhow; +use gix::bstr::BStr; + +pub fn blame_file(mut repo: gix::Repository, file: &OsStr, out: impl std::io::Write) -> anyhow::Result<()> { + repo.object_cache_size_if_unset(repo.compute_object_cache_size_for_tree_diffs(&**repo.index_or_empty()?)); + + let suspect = repo.head()?.peel_to_commit_in_place()?; + let traverse: Vec<_> = + gix::traverse::commit::topo::Builder::from_iters(&repo.objects, [suspect.id], None::>) + .build()? + .collect(); + let mut resource_cache = repo.diff_resource_cache_for_tree_diff()?; + + let work_dir: PathBuf = repo + .work_dir() + .ok_or_else(|| anyhow!("blame needs a workdir, but there is none"))? + .into(); + let file_path: &BStr = gix::path::os_str_into_bstr(file)?; + + let blame_entries = gix::blame::blame_file( + &repo.objects, + traverse, + &mut resource_cache, + suspect.id, + work_dir.clone(), + file_path, + )?; + + let absolute_path = work_dir.join(file); + let file_content = std::fs::read_to_string(absolute_path)?; + let lines = file_content.lines(); + + write_blame_entries(out, lines, blame_entries)?; + + Ok(()) +} + +fn write_blame_entries( + mut out: impl std::io::Write, + mut lines: Lines<'_>, + blame_entries: Vec, +) -> Result<(), std::io::Error> { + for blame_entry in blame_entries { + for line_number in blame_entry.range_in_blamed_file { + let line = lines.next().unwrap(); + + writeln!( + out, + "{} {} {}", + blame_entry.commit_id.to_hex_with_len(8), + // `line_number` is 0-based, but we want to show 1-based line numbers (as `git` + // does). + line_number + 1, + line + )?; + } + } + + Ok(()) +} diff --git a/gitoxide-core/src/repository/mod.rs b/gitoxide-core/src/repository/mod.rs index 489d5c32e66..e8900ccf294 100644 --- a/gitoxide-core/src/repository/mod.rs +++ b/gitoxide-core/src/repository/mod.rs @@ -21,6 +21,7 @@ pub enum PathsOrPatterns { pub mod archive; pub mod cat; pub use cat::function::cat; +pub mod blame; pub mod commit; pub mod config; mod credential; diff --git a/gix-blame/Cargo.toml b/gix-blame/Cargo.toml index de8b8fa22b9..50340eb3488 100644 --- a/gix-blame/Cargo.toml +++ b/gix-blame/Cargo.toml @@ -5,7 +5,7 @@ name = "gix-blame" version = "0.0.0" repository = "https://github.com/GitoxideLabs/gitoxide" license = "MIT OR Apache-2.0" -description = "A crate of the gitoxide project dedicated implementing a 'blame' algorithm" +description = "A crate of the gitoxide project dedicated to implementing a 'blame' algorithm" authors = ["Christoph Rüßler ", "Sebastian Thiel "] edition = "2021" rust-version = "1.65" @@ -14,6 +14,16 @@ rust-version = "1.65" doctest = false [dependencies] +gix-diff = { version = "^0.47.0", path = "../gix-diff", default-features = false, features = ["blob"] } +gix-object = { version = "^0.45.0", path = "../gix-object" } +gix-hash = { version = "^0.15.0", path = "../gix-hash" } +gix-worktree = { version = "^0.37.0", path = "../gix-worktree", default-features = false, features = ["attributes"] } +gix-traverse = { version = "^0.42.0", path = "../gix-traverse" } [dev-dependencies] +gix-ref = { version = "^0.48.0", path = "../gix-ref" } +gix-filter = { version = "^0.14.0", path = "../gix-filter" } +gix-fs = { version = "^0.12.0", path = "../gix-fs" } +gix-index = { version = "^0.36.0", path = "../gix-index" } +gix-odb = { version = "^0.64.0", path = "../gix-odb" } gix-testtools = { path = "../tests/tools" } diff --git a/gix-blame/src/lib.rs b/gix-blame/src/lib.rs index d13db17bbbb..a1bce5dbd28 100644 --- a/gix-blame/src/lib.rs +++ b/gix-blame/src/lib.rs @@ -2,9 +2,960 @@ #![deny(rust_2018_idioms)] #![forbid(unsafe_code)] -#[cfg(test)] -mod tests { - #[test] - #[ignore] - fn it_works() {} +use std::{ + collections::BTreeMap, + ops::{Add, AddAssign, Range, SubAssign}, + path::PathBuf, +}; + +use gix_hash::ObjectId; +use gix_object::bstr::BStr; +use gix_object::FindExt; + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Offset { + Added(u32), + Deleted(u32), +} + +impl Add for Offset { + type Output = Offset; + + fn add(self, rhs: u32) -> Self::Output { + let Self::Added(added) = self else { todo!() }; + + Self::Added(added + rhs) + } +} + +impl Add for Offset { + type Output = Offset; + + fn add(self, rhs: Offset) -> Self::Output { + match (self, rhs) { + (Self::Added(added), Offset::Added(added_rhs)) => Self::Added(added + added_rhs), + (Self::Added(added), Offset::Deleted(deleted_rhs)) => { + if deleted_rhs > added { + Self::Deleted(deleted_rhs - added) + } else { + Self::Added(added - deleted_rhs) + } + } + (Self::Deleted(deleted), Offset::Added(added_rhs)) => { + if added_rhs > deleted { + Self::Added(added_rhs - deleted) + } else { + Self::Deleted(deleted - added_rhs) + } + } + (Self::Deleted(deleted), Offset::Deleted(deleted_rhs)) => Self::Deleted(deleted + deleted_rhs), + } + } +} + +impl AddAssign for Offset { + fn add_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => *self = Self::Added(*added + rhs), + Self::Deleted(deleted) => { + if rhs > *deleted { + *self = Self::Added(rhs - *deleted); + } else { + *self = Self::Deleted(*deleted - rhs); + } + } + } + } +} + +impl SubAssign for Offset { + fn sub_assign(&mut self, rhs: u32) { + match self { + Self::Added(added) => { + if rhs > *added { + *self = Self::Deleted(rhs - *added); + } else { + *self = Self::Added(*added - rhs); + } + } + Self::Deleted(deleted) => *self = Self::Deleted(*deleted + rhs), + } + } +} + +#[derive(Debug, PartialEq)] +pub struct BlameEntry { + pub range_in_blamed_file: Range, + pub range_in_original_file: Range, + pub commit_id: ObjectId, +} + +impl BlameEntry { + pub fn new(range_in_blamed_file: Range, range_in_original_file: Range, commit_id: ObjectId) -> Self { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + assert!( + range_in_original_file.end > range_in_original_file.start, + "{range_in_original_file:?}" + ); + + Self { + range_in_blamed_file: range_in_blamed_file.clone(), + range_in_original_file: range_in_original_file.clone(), + commit_id, + } + } + + fn with_offset(range_in_original_file: Range, commit_id: ObjectId, offset: Offset) -> Self { + assert!( + range_in_original_file.end > range_in_original_file.start, + "{range_in_original_file:?}" + ); + + match offset { + Offset::Added(added) => Self { + range_in_blamed_file: (range_in_original_file.start + added)..(range_in_original_file.end + added), + range_in_original_file, + commit_id, + }, + Offset::Deleted(deleted) => { + assert!( + range_in_original_file.start >= deleted, + "{range_in_original_file:?} {offset:?}" + ); + + Self { + range_in_blamed_file: (range_in_original_file.start - deleted) + ..(range_in_original_file.end - deleted), + range_in_original_file, + commit_id, + } + } + } + } + + fn from_unblamed_hunk(unblamed_hunk: &UnblamedHunk, commit_id: ObjectId) -> Self { + let range_in_original_file = unblamed_hunk.suspects.get(&commit_id).expect("TODO"); + + Self { + range_in_blamed_file: unblamed_hunk.range_in_blamed_file.clone(), + range_in_original_file: range_in_original_file.clone(), + commit_id, + } + } +} + +trait LineRange { + fn shift_by(&self, offset: Offset) -> Self; +} + +impl LineRange for Range { + fn shift_by(&self, offset: Offset) -> Self { + match offset { + Offset::Added(added) => { + assert!(self.start >= added, "{self:?} {offset:?}"); + + Self { + start: self.start - added, + end: self.end - added, + } + } + Offset::Deleted(deleted) => Self { + start: self.start + deleted, + end: self.end + deleted, + }, + } + } +} + +#[derive(Clone, Debug, PartialEq)] +pub struct UnblamedHunk { + pub range_in_blamed_file: Range, + pub suspects: BTreeMap>, +} + +#[derive(Debug)] +enum Either { + Left(T), + Right(U), +} + +impl UnblamedHunk { + pub fn new(range_in_blamed_file: Range, suspect: ObjectId, offset: Offset) -> Self { + assert!( + range_in_blamed_file.end > range_in_blamed_file.start, + "{range_in_blamed_file:?}" + ); + + let range_in_destination = range_in_blamed_file.shift_by(offset); + + Self { + range_in_blamed_file, + suspects: [(suspect, range_in_destination)].into(), + } + } + + fn shift_by(mut self, suspect: ObjectId, offset: Offset) -> Self { + self.suspects.entry(suspect).and_modify(|e| *e = e.shift_by(offset)); + + self + } + + fn split_at(self, suspect: ObjectId, line_number_in_destination: u32) -> Either { + match self.suspects.get(&suspect) { + None => Either::Left(self), + Some(range_in_suspect) => { + if line_number_in_destination > range_in_suspect.start + && line_number_in_destination < range_in_suspect.end + { + let split_at_from_start = line_number_in_destination - range_in_suspect.start; + + if split_at_from_start > 0 { + let new_suspects_before = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, range.start..(range.start + split_at_from_start))) + .collect(); + + let new_suspects_after = self + .suspects + .iter() + .map(|(suspect, range)| (*suspect, (range.start + split_at_from_start)..range.end)) + .collect(); + + let new_hunk_before = Self { + range_in_blamed_file: self.range_in_blamed_file.start + ..(self.range_in_blamed_file.start + split_at_from_start), + suspects: new_suspects_before, + }; + let new_hunk_after = Self { + range_in_blamed_file: (self.range_in_blamed_file.start + split_at_from_start) + ..(self.range_in_blamed_file.end), + suspects: new_suspects_after, + }; + + Either::Right((new_hunk_before, new_hunk_after)) + } else { + Either::Left(self) + } + } else { + Either::Left(self) + } + } + } + } + + fn offset_for(&self, suspect: ObjectId) -> Offset { + let range_in_suspect = self.suspects.get(&suspect).expect("TODO"); + + if self.range_in_blamed_file.start > range_in_suspect.start { + Offset::Added(self.range_in_blamed_file.start - range_in_suspect.start) + } else { + Offset::Deleted(range_in_suspect.start - self.range_in_blamed_file.start) + } + } + + fn pass_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.remove(&from) { + self.suspects.insert(to, range_in_suspect); + } + } + + fn clone_blame(&mut self, from: ObjectId, to: ObjectId) { + if let Some(range_in_suspect) = self.suspects.get(&from) { + self.suspects.insert(to, range_in_suspect.clone()); + } + } + + fn remove_blame(&mut self, suspect: ObjectId) { + let _ = self.suspects.remove(&suspect); + } +} + +#[derive(Clone, Debug, PartialEq)] +pub enum Change { + Unchanged(Range), + Added(Range, u32), + Deleted(u32, u32), +} + +struct ChangeRecorder { + previous_after_end: u32, + changes: Vec, + total_number_of_lines: u32, +} + +impl ChangeRecorder { + fn new(total_number_of_lines: u32) -> Self { + ChangeRecorder { + previous_after_end: 0, + changes: vec![], + total_number_of_lines, + } + } +} + +impl gix_diff::blob::Sink for ChangeRecorder { + type Out = Vec; + + // “imara-diff will compute a line diff by default”, so each `start` and `end` represents a + // line in a file. + fn process_change(&mut self, before: Range, after: Range) { + // This checks for unchanged hunks. + // + // https://docs.rs/imara-diff/latest/imara_diff/sink/trait.Sink.html#notes + if after.start > self.previous_after_end { + self.changes + .push(Change::Unchanged(self.previous_after_end..after.start)); + } + + match (before.end > before.start, after.end > after.start) { + (_, true) => { + self.changes + .push(Change::Added(after.start..after.end, before.end - before.start)); + } + (true, false) => { + self.changes + .push(Change::Deleted(after.start, before.end - before.start)); + } + (false, false) => unimplemented!(), + } + + self.previous_after_end = after.end; + } + + fn finish(mut self) -> Self::Out { + if self.total_number_of_lines > self.previous_after_end { + self.changes + .push(Change::Unchanged(self.previous_after_end..self.total_number_of_lines)); + } + + self.changes + } +} + +pub fn process_change( + out: &mut Vec, + new_hunks_to_blame: &mut Vec, + offset_in_destination: &mut Offset, + suspect: ObjectId, + hunk: Option, + change: Option, +) -> (Option, Option) { + match (hunk, change) { + (Some(hunk), Some(Change::Unchanged(unchanged))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + + return (None, Some(Change::Unchanged(unchanged))); + }; + + match ( + // Since `unchanged` is a range that is not inclusive at the end, + // `unchanged.end` is not part of `unchanged`. The first line that is + // `unchanged.end - 1`. + range_in_suspect.contains(&unchanged.start), + (unchanged.end - 1) >= range_in_suspect.start && unchanged.end <= range_in_suspect.end, + ) { + (_, true) => { + // <------> (hunk) + // <-------> (unchanged) + // + // <----------> (hunk) + // <---> (unchanged) + + (Some(hunk), None) + } + (true, false) => { + // <--------> (hunk) + // <-------> (unchanged) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Unchanged(unchanged))) + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (unchanged) + // + // <----> (hunk) + // <--> (unchanged) + // + // <--> (hunk) + // <----> (unchanged) + + if unchanged.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (unchanged) + + (Some(hunk.clone()), None) + } else { + // <--> (hunk) + // <----> (unchanged) + // + // <---> (hunk) + // <----------> (unchanged) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Unchanged(unchanged.clone()))) + } + } + } + } + (Some(hunk), Some(Change::Added(added, number_of_lines_deleted))) => { + let Some(range_in_suspect) = hunk.suspects.get(&suspect) else { + new_hunks_to_blame.push(hunk); + + return (None, Some(Change::Added(added, number_of_lines_deleted))); + }; + + let range_in_suspect = range_in_suspect.clone(); + + match ( + range_in_suspect.contains(&added.start), + // Since `added` is a range that is not inclusive at the end, `added.end` is + // not part of `added`. The first line that is is `added.end - 1`. + (added.end - 1) >= range_in_suspect.start && added.end <= range_in_suspect.end, + ) { + (true, true) => { + // <----------> (hunk) + // <---> (added) + // <---> (blamed) + // <--> <-> (new hunk) + + let new_hunk = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + out.push(BlameEntry::with_offset( + added.clone(), + suspect, + new_hunk.offset_for(suspect), + )); + + match new_hunk.split_at(suspect, added.end) { + Either::Left(_) => (None, None), + Either::Right((_, after)) => (Some(after), None), + } + } + (true, false) => { + // <--------> (hunk) + // <-------> (added) + // <----> (blamed) + // <--> (new hunk) + + let new_hunk = match hunk.split_at(suspect, added.start) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + out.push(BlameEntry::with_offset( + added.start..range_in_suspect.end, + suspect, + new_hunk.offset_for(suspect), + )); + + if added.end > range_in_suspect.end { + (None, Some(Change::Added(added, number_of_lines_deleted))) + } else { + todo!(); + } + } + (false, true) => { + // <-------> (hunk) + // <------> (added) + // <---> (blamed) + // <--> (new hunk) + + out.push(BlameEntry::with_offset( + range_in_suspect.start..added.end, + suspect, + hunk.offset_for(suspect), + )); + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + match hunk.split_at(suspect, added.end) { + Either::Left(_) => (None, None), + Either::Right((_, after)) => (Some(after), None), + } + } + (false, false) => { + // Any of the following cases are handled by this branch: + // <---> (hunk) + // <----------> (added) + // + // <----> (hunk) + // <--> (added) + // + // <--> (hunk) + // <----> (added) + + if added.end <= range_in_suspect.start { + // <----> (hunk) + // <--> (added) + + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + (Some(hunk.clone()), None) + } else if range_in_suspect.end <= added.start { + // <--> (hunk) + // <----> (added) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + } else { + // <---> (hunk) + // <----------> (added) + // <---> (blamed) + + out.push(BlameEntry::with_offset( + range_in_suspect.clone(), + suspect, + hunk.offset_for(suspect), + )); + + (None, Some(Change::Added(added.clone(), number_of_lines_deleted))) + } + } + } + } + (Some(hunk), Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted))) => { + let range_in_suspect = hunk.suspects.get(&suspect).expect("TODO"); + + if line_number_in_destination < range_in_suspect.start { + // <---> (hunk) + // | (line_number_in_destination) + + *offset_in_destination -= number_of_lines_deleted; + + (Some(hunk), None) + } else if line_number_in_destination < range_in_suspect.end { + // <-----> (hunk) + // | (line_number_in_destination) + + let new_hunk = match hunk.split_at(suspect, line_number_in_destination) { + Either::Left(hunk) => hunk, + Either::Right((before, after)) => { + new_hunks_to_blame.push(before.shift_by(suspect, *offset_in_destination)); + + after + } + }; + + *offset_in_destination -= number_of_lines_deleted; + + (Some(new_hunk), None) + } else { + // <---> (hunk) + // | (line_number_in_destination) + + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + ( + None, + Some(Change::Deleted(line_number_in_destination, number_of_lines_deleted)), + ) + } + } + (Some(hunk), None) => { + new_hunks_to_blame.push(hunk.shift_by(suspect, *offset_in_destination)); + + (None, None) + } + (None, Some(Change::Unchanged(_))) => (None, None), + (None, Some(Change::Added(added, number_of_lines_deleted))) => { + *offset_in_destination += added.end - added.start; + *offset_in_destination -= number_of_lines_deleted; + + (None, None) + } + (None, Some(Change::Deleted(_, number_of_lines_deleted))) => { + *offset_in_destination -= number_of_lines_deleted; + + (None, None) + } + (None, None) => (None, None), + } +} + +pub fn process_changes( + out: &mut Vec, + hunks_to_blame: &[UnblamedHunk], + changes: &[Change], + suspect: ObjectId, +) -> Vec { + let mut hunks_iter = hunks_to_blame.iter().cloned(); + let mut changes_iter = changes.iter().cloned(); + + let mut hunk: Option = hunks_iter.next(); + let mut change: Option = changes_iter.next(); + + let mut new_hunks_to_blame: Vec = vec![]; + let mut offset_in_destination: Offset = Offset::Added(0); + + loop { + (hunk, change) = process_change( + out, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + hunk, + change, + ); + + hunk = hunk.or_else(|| hunks_iter.next()); + change = change.or_else(|| changes_iter.next()); + + if hunk.is_none() && change.is_none() { + break; + } + } + + new_hunks_to_blame +} + +fn get_changes_for_file_path( + odb: impl gix_object::Find + gix_object::FindHeader, + file_path: &BStr, + id: ObjectId, + parent_id: ObjectId, +) -> Vec { + let mut buffer = Vec::new(); + + let parent = odb.find_commit(&parent_id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let parent_tree_iter = odb + .find(&parent.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut buffer = Vec::new(); + let commit = odb.find_commit(&id, &mut buffer).unwrap(); + + let mut buffer = Vec::new(); + let tree_iter = odb + .find(&commit.tree(), &mut buffer) + .unwrap() + .try_into_tree_iter() + .unwrap(); + + let mut recorder = gix_diff::tree::Recorder::default(); + gix_diff::tree( + parent_tree_iter, + tree_iter, + gix_diff::tree::State::default(), + &odb, + &mut recorder, + ) + .unwrap(); + + recorder + .records + .iter() + .filter(|change| match change { + gix_diff::tree::recorder::Change::Modification { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Addition { path, .. } => path == file_path, + gix_diff::tree::recorder::Change::Deletion { path, .. } => path == file_path, + }) + .cloned() + .collect() +} + +fn get_changes( + odb: impl gix_object::Find + gix_object::FindHeader, + resource_cache: &mut gix_diff::blob::Platform, + oid: ObjectId, + previous_oid: ObjectId, + file_path: &BStr, +) -> Vec { + resource_cache + .set_resource( + previous_oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::OldOrSource, + &odb, + ) + .unwrap(); + resource_cache + .set_resource( + oid, + gix_object::tree::EntryKind::Blob, + file_path, + gix_diff::blob::ResourceKind::NewOrDestination, + &odb, + ) + .unwrap(); + + let outcome = resource_cache.prepare_diff().unwrap(); + let input = outcome.interned_input(); + let number_of_lines_in_destination = input.after.len(); + let change_recorder = ChangeRecorder::new(number_of_lines_in_destination.try_into().unwrap()); + + gix_diff::blob::diff(gix_diff::blob::Algorithm::Histogram, &input, change_recorder) +} + +/// This function merges adjacent blame entries. It merges entries that are adjacent both in the +/// blamed file and in the original file that introduced them. This follows `git`’s +/// behaviour. `libgit2`, as of 2024-09-19, only checks whether two entries are adjacent in the +/// blamed file which can result in different blames in certain edge cases. See [the commit][1] +/// that introduced the extra check into `git` for context. See [this commit][2] for a way to test +/// for this behaviour in `git`. +/// +/// [1]: https://github.com/git/git/commit/c2ebaa27d63bfb7c50cbbdaba90aee4efdd45d0a +/// [2]: https://github.com/git/git/commit/6dbf0c7bebd1c71c44d786ebac0f2b3f226a0131 +fn coalesce_blame_entries(lines_blamed: Vec) -> Vec { + // TODO + // It’s possible this could better be done on insertion into `lines_blamed`. + lines_blamed.into_iter().fold(vec![], |mut acc, entry| { + let previous_entry = acc.last(); + + if let Some(previous_entry) = previous_entry { + if previous_entry.commit_id == entry.commit_id + && previous_entry.range_in_blamed_file.end == entry.range_in_blamed_file.start + // As of 2024-09-19, the check below only is in `git`, but not in `libgit2`. + && previous_entry.range_in_original_file.end == entry.range_in_original_file.start + { + let coalesced_entry = BlameEntry { + range_in_blamed_file: previous_entry.range_in_blamed_file.start..entry.range_in_blamed_file.end, + range_in_original_file: previous_entry.range_in_original_file.start + ..entry.range_in_original_file.end, + commit_id: previous_entry.commit_id, + }; + + acc.pop(); + acc.push(coalesced_entry); + } else { + acc.push(entry); + } + + acc + } else { + acc.push(entry); + + acc + } + }) +} + +// TODO: do not instantiate anything, get everything passed as argument. +pub fn blame_file( + odb: impl gix_object::Find + gix_object::FindHeader, + traverse: impl IntoIterator>, + resource_cache: &mut gix_diff::blob::Platform, + suspect: ObjectId, + worktree_path: PathBuf, + file_path: &BStr, +) -> Result, E> { + // TODO + // At a high level, what we want to do is the following: + // + // - get the commit that belongs to a commit id + // - walk through parents + // - for each parent, do a diff and mark lines that don’t have a suspect (this is the term + // used in `libgit2`) yet, but that have been changed in this commit + // + // The algorithm in `libgit2` works by going through parents and keeping a linked list of blame + // suspects. It can be visualized as follows: + // + // <----------------------------------------> + // <---------------><-----------------------> + // <---><----------><-----------------------> + // <---><----------><-------><-----><-------> + // <---><---><-----><-------><-----><-------> + // <---><---><-----><-------><-----><-><-><-> + + // Needed for `to_str`. + use gix_object::bstr::ByteSlice; + + let absolute_path = worktree_path.join(file_path.to_str().unwrap()); + + // TODO Verify that `imara-diff` tokenizes lines the same way `lines` does. + let number_of_lines = std::fs::read_to_string(absolute_path).unwrap().lines().count(); + + let mut hunks_to_blame: Vec = vec![UnblamedHunk::new( + 0..number_of_lines.try_into().unwrap(), + suspect, + Offset::Added(0), + )]; + let mut out: Vec = vec![]; + + 'outer: for item in traverse { + let item = item?; + let suspect = item.id; + + let parent_ids = item.parent_ids; + if parent_ids.is_empty() { + // I’m not entirely sure if this is correct yet. `suspect`, at this point, is the `id` of + // the last `item` that was yielded by `traverse`, so it makes sense to assign the + // remaining lines to it, even though we don’t explicitly check whether that is true + // here. We could perhaps use `needed_to_obtain` to compare `suspect` against an empty + // tree to validate this assumption. + out.extend( + hunks_to_blame + .iter() + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); + + hunks_to_blame = vec![]; + + break; + } + + let mut buffer = Vec::new(); + let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); + let tree = odb.find_tree(&commit_id, &mut buffer).unwrap(); + + let Some(entry) = tree.bisect_entry(file_path, false) else { + continue; + }; + + if parent_ids.len() == 1 { + let parent_id: ObjectId = *parent_ids.last().unwrap(); + + let mut buffer = Vec::new(); + let parent_commit_id = odb.find_commit(&parent_id, &mut buffer).unwrap().tree(); + let parent_tree = odb.find_tree(&parent_commit_id, &mut buffer).unwrap(); + + if let Some(parent_entry) = parent_tree.bisect_entry(file_path, false) { + if entry.oid == parent_entry.oid { + // The blobs storing the blamed file in `entry` and `parent_entry` are identical + // which is why we can pass blame to the parent without further checks. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + + continue; + } + } + + let changes_for_file_path = get_changes_for_file_path(&odb, file_path, item.id, parent_id); + + let [ref modification]: [gix_diff::tree::recorder::Change] = changes_for_file_path[..] else { + // None of the changes affected the file we’re currently blaming. Pass blame to parent. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + + continue; + }; + + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + // Every line that has not been blamed yet on a commit, is expected to have been + // added when the file was added to the repository. + out.extend( + hunks_to_blame + .iter() + .map(|hunk| BlameEntry::from_unblamed_hunk(hunk, suspect)), + ); + + hunks_to_blame = vec![]; + + break; + } + gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = get_changes(&odb, resource_cache, *oid, *previous_oid, file_path); + + hunks_to_blame = process_changes(&mut out, &hunks_to_blame, &changes, suspect); + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + } + } + } else { + let mut buffer = Vec::new(); + let commit_id = odb.find_commit(&suspect, &mut buffer).unwrap().tree(); + let tree = odb.find_tree(&commit_id, &mut buffer).unwrap(); + let entry = tree.bisect_entry(file_path, false).unwrap(); + + for parent_id in &parent_ids { + let mut buffer = Vec::new(); + let parent_commit_id = odb.find_commit(parent_id, &mut buffer).unwrap().tree(); + let parent_tree = odb.find_tree(&parent_commit_id, &mut buffer).unwrap(); + + if let Some(parent_entry) = parent_tree.bisect_entry(file_path, false) { + if entry.oid == parent_entry.oid { + // The blobs storing the blamed file in `entry` and `parent_entry` are + // identical which is why we can pass blame to the parent without further + // checks. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, *parent_id)); + + continue 'outer; + } + } + } + + for parent_id in parent_ids { + let changes_for_file_path = get_changes_for_file_path(&odb, file_path, item.id, parent_id); + + let [ref modification]: [gix_diff::tree::recorder::Change] = changes_for_file_path[..] else { + // None of the changes affected the file we’re currently blaming. Pass blame + // to parent. + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.clone_blame(suspect, parent_id)); + + continue; + }; + + match modification { + gix_diff::tree::recorder::Change::Addition { .. } => { + // Do nothing under the assumption that this always (or almost always) + // implies that the file comes from a different parent, compared to which + // it was modified, not added. + // + // TODO: I still have to figure out whether this is correct in all cases. + } + gix_diff::tree::recorder::Change::Deletion { .. } => todo!(), + gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } => { + let changes = get_changes(&odb, resource_cache, *oid, *previous_oid, file_path); + + hunks_to_blame = process_changes(&mut out, &hunks_to_blame, &changes, suspect); + + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.pass_blame(suspect, parent_id)); + } + } + } + + hunks_to_blame + .iter_mut() + .for_each(|unblamed_hunk| unblamed_hunk.remove_blame(suspect)); + } + } + + assert_eq!(hunks_to_blame, vec![]); + + // I don’t know yet whether it would make sense to use a data structure instead that preserves + // order on insertion. + out.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + + Ok(coalesce_blame_entries(out)) } diff --git a/gix-blame/tests/blame.rs b/gix-blame/tests/blame.rs index 4cb22417ece..b72aba2996c 100644 --- a/gix-blame/tests/blame.rs +++ b/gix-blame/tests/blame.rs @@ -1,4 +1,1576 @@ +use gix_blame::{blame_file, process_change, process_changes, BlameEntry, Change, Offset, UnblamedHunk}; +use gix_hash::ObjectId; +use gix_object::bstr; +use std::path::PathBuf; + +struct Baseline<'a> { + lines: bstr::Lines<'a>, +} + +mod baseline { + use std::path::Path; + + use gix_hash::ObjectId; + use gix_ref::bstr::ByteSlice; + + use super::Baseline; + use gix_blame::BlameEntry; + + // These fields are used by `git` in its porcelain output. + const HEADER_FIELDS: [&str; 12] = [ + // https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L256-L280 + "author", + "author-mail", + "author-time", + "author-tz", + "committer", + "committer-mail", + "committer-time", + "committer-tz", + "summary", + "boundary", + // https://github.com/git/git/blob/6258f68c3c1092c901337895c864073dcdea9213/builtin/blame.c#L239-L248 + "previous", + "filename", + ]; + + fn is_known_header_field(field: &&str) -> bool { + HEADER_FIELDS.contains(field) + } + + impl<'a> Baseline<'a> { + pub fn collect(baseline_path: impl AsRef) -> std::io::Result> { + let content = std::fs::read(baseline_path)?; + + Ok(Baseline { lines: content.lines() }.collect()) + } + } + + impl<'a> Iterator for Baseline<'a> { + type Item = BlameEntry; + + fn next(&mut self) -> Option { + let mut ranges = None; + let mut commit_id = gix_hash::Kind::Sha1.null(); + let mut skip_lines: u32 = 0; + + for line in self.lines.by_ref() { + if line.starts_with(b"\t") { + // Each group consists of a header and one or more lines. We break from the + // loop, thus returning a `BlameEntry` from `next` once we have seen the number + // of lines starting with "\t" as indicated in the group’s header. + skip_lines -= 1; + + if skip_lines == 0 { + break; + } else { + continue; + } + } + + let fields: Vec<&str> = line.to_str().unwrap().split(' ').collect(); + if fields.len() == 4 { + // We’re possibly dealing with a group header. + // If we can’t parse the first field as an `ObjectId`, we know this is not a + // group header, so we continue. This can yield false positives, but for + // testing purposes, we don’t bother. + commit_id = match ObjectId::from_hex(fields[0].as_bytes()) { + Ok(id) => id, + Err(_) => continue, + }; + + let line_number_in_original_file = fields[1].parse::().unwrap(); + let line_number_in_final_file = fields[2].parse::().unwrap(); + // The last field indicates the number of lines this group contains info for + // (this is not equal to the number of lines in git blame’s porcelain output). + let number_of_lines_in_group = fields[3].parse::().unwrap(); + + skip_lines = number_of_lines_in_group; + + let original_range = (line_number_in_original_file - 1) + ..(line_number_in_original_file + number_of_lines_in_group - 1); + let blame_range = + (line_number_in_final_file - 1)..(line_number_in_final_file + number_of_lines_in_group - 1); + assert!(ranges.is_none(), "should not overwrite existing ranges"); + ranges = Some((blame_range, original_range)); + } else if !is_known_header_field(&fields[0]) && ObjectId::from_hex(fields[0].as_bytes()).is_err() { + panic!("unexpected line: '{:?}'", line.as_bstr()); + } + } + + let Some((range_in_blamed_file, range_in_original_file)) = ranges else { + // No new lines were parsed, so we assume the iterator is finished. + return None; + }; + Some(BlameEntry::new(range_in_blamed_file, range_in_original_file, commit_id)) + } + } +} + +struct Fixture { + worktree_path: PathBuf, + odb: gix_odb::Handle, + resource_cache: gix_diff::blob::Platform, + suspect: ObjectId, + commits: Vec>, +} + +impl Fixture { + fn new() -> gix_testtools::Result { + Self::for_worktree_path(fixture_path()) + } + + fn for_worktree_path(worktree_path: PathBuf) -> gix_testtools::Result { + use gix_ref::store::WriteReflog; + + let store = gix_ref::file::Store::at( + worktree_path.join(".git"), + gix_ref::store::init::Options { + write_reflog: WriteReflog::Disable, + ..Default::default() + }, + ); + let odb = gix_odb::at(worktree_path.join(".git/objects"))?; + + let mut reference = gix_ref::file::Store::find(&store, "HEAD")?; + + // Needed for `peel_to_id_in_place`. + use gix_ref::file::ReferenceExt; + + let head_id = reference.peel_to_id_in_place(&store, &odb)?; + + let commits: Vec<_> = gix_traverse::commit::topo::Builder::from_iters(&odb, [head_id], None::>) + .build()? + .collect(); + + let git_dir = worktree_path.join(".git"); + let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default())?; + let stack = gix_worktree::Stack::from_state_and_ignore_case( + worktree_path.clone(), + false, + gix_worktree::stack::State::AttributesAndIgnoreStack { + attributes: Default::default(), + ignore: Default::default(), + }, + &index, + index.path_backing(), + ); + let capabilities = gix_fs::Capabilities::probe(&git_dir); + let resource_cache = gix_diff::blob::Platform::new( + Default::default(), + gix_diff::blob::Pipeline::new( + gix_diff::blob::pipeline::WorktreeRoots { + old_root: None, + new_root: None, + }, + gix_filter::Pipeline::new(Default::default(), Default::default()), + vec![], + gix_diff::blob::pipeline::Options { + large_file_threshold_bytes: 0, + fs: capabilities, + }, + ), + gix_diff::blob::pipeline::Mode::ToGit, + stack, + ); + Ok(Fixture { + odb, + worktree_path, + resource_cache, + suspect: head_id, + commits, + }) + } +} + +macro_rules! mktest { + ($name:ident, $case:expr, $number_of_lines:literal) => { + #[test] + fn $name() { + let Fixture { + worktree_path, + odb, + mut resource_cache, + suspect, + commits, + } = Fixture::new().unwrap(); + + let lines_blamed = blame_file( + &odb, + commits, + &mut resource_cache, + suspect, + worktree_path, + format!("{}.txt", $case).as_str().into(), + ) + .unwrap(); + + assert_eq!(lines_blamed.len(), $number_of_lines); + + let git_dir = fixture_path().join(".git"); + let baseline = Baseline::collect(git_dir.join(format!("{}.baseline", $case))).unwrap(); + + assert_eq!(baseline.len(), $number_of_lines); + assert_eq!(lines_blamed, baseline); + } + }; +} + +mktest!(simple_case, "simple", 4); +mktest!(multiline_hunks, "multiline-hunks", 3); +mktest!(deleted_lines, "deleted-lines", 1); +mktest!(deleted_lines_multiple_hunks, "deleted-lines-multiple-hunks", 2); +mktest!(changed_lines, "changed-lines", 1); +mktest!( + changed_line_between_unchanged_lines, + "changed-line-between-unchanged-lines", + 3 +); +mktest!(added_lines, "added-lines", 2); +mktest!(added_lines_around, "added-lines-around", 3); +mktest!(switched_lines, "switched-lines", 4); +mktest!(added_line_before_changed_line, "added-line-before-changed-line", 3); +mktest!(same_line_changed_twice, "same-line-changed-twice", 2); +mktest!(coalesce_adjacent_hunks, "coalesce-adjacent-hunks", 1); + +mktest!(resolved_conflict, "resolved-conflict", 2); +mktest!(file_in_one_chain_of_ancestors, "file-in-one-chain-of-ancestors", 1); +mktest!( + different_file_in_another_chain_of_ancestors, + "different-file-in-another-chain-of-ancestors", + 1 +); +mktest!(file_only_changed_in_branch, "file-only-changed-in-branch", 2); + +#[test] +#[ignore = "TBD: figure out what the problem is"] +// As of 2024-09-24, these tests are expected to fail. +// +// Context: https://github.com/Byron/gitoxide/pull/1453#issuecomment-2371013904 +fn diff_disparity() { + for case in ["empty-lines-myers", "empty-lines-histogram"] { + let Fixture { + worktree_path, + odb, + mut resource_cache, + suspect, + commits, + } = Fixture::new().unwrap(); + + let lines_blamed = blame_file( + &odb, + commits, + &mut resource_cache, + suspect, + worktree_path, + format!("{case}.txt").as_str().into(), + ) + .unwrap(); + + assert_eq!(lines_blamed.len(), 5); + + let git_dir = fixture_path().join(".git"); + let baseline = Baseline::collect(git_dir.join(format!("{case}.baseline"))).unwrap(); + + assert_eq!(lines_blamed, baseline, "{case}"); + } +} + +#[test] +fn process_change_works() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + None, + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_added_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Added(0..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..3, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_added_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Added(2..3, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..3, + range_in_original_file: 2..3, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..2, + suspects: [(suspect, 0..2)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(5); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(10..15, suspect, Offset::Added(0))), + Some(Change::Added(12..13, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 13..15, + suspects: [(suspect, 13..15)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 12..13, + range_in_original_file: 12..13, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 10..12, + suspects: [(suspect, 5..7)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(6)); +} + +#[test] +fn process_change_works_added_hunk_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 7..12 + Some(UnblamedHunk::new(12..17, suspect, Offset::Added(5))), + Some(Change::Added(9..10, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 15..17, + suspects: [(suspect, 10..12)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 14..15, + range_in_original_file: 9..10, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 7..9)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Added(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 3..5, + suspects: [(suspect, 3..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..3, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); +} + +#[test] +fn process_change_works_added_hunk_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 0..4 + Some(UnblamedHunk::new(1..5, suspect, Offset::Added(1))), + Some(Change::Added(0..3, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..5, + suspects: [(suspect, 3..4)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 1..4, + range_in_original_file: 0..3, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(2)); +} + +#[test] +fn process_change_works_added_hunk_7() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..6 + Some(UnblamedHunk::new(3..7, suspect, Offset::Added(1))), + Some(Change::Added(3..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 6..7, + suspects: [(suspect, 5..6)].into() + }) + ); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 4..6, + range_in_original_file: 3..5, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..4, + suspects: [(suspect, 0..1)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_added_hunk_8() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..26 + Some(UnblamedHunk::new(23..24, suspect, Offset::Deleted(2))), + Some(Change::Added(25..27, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(25..27, 1))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 23..24, + range_in_original_file: 25..26, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_9() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 21..22 + Some(UnblamedHunk::new(23..24, suspect, Offset::Added(2))), + Some(Change::Added(18..22, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 23..24, + range_in_original_file: 21..22, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_added_hunk_10() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 70..108 + Some(UnblamedHunk::new(71..109, suspect, Offset::Added(1))), + Some(Change::Added(106..109, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(106..109, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 107..109, + range_in_original_file: 106..108, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 71..107, + suspects: [(suspect, 70..106)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_added_hunk_11() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 137..144 + Some(UnblamedHunk::new(149..156, suspect, Offset::Added(12))), + Some(Change::Added(143..146, 0)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(143..146, 0))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 155..156, + range_in_original_file: 143..144, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 149..155, + suspects: [(suspect, 137..143)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_no_overlap() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 2..5 + Some(UnblamedHunk::new(3..6, suspect, Offset::Added(1))), + Some(Change::Added(7..10, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(7..10, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 3..6, + suspects: [(suspect, 5..8)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Deleted(3)); +} + +#[test] +fn process_change_works_no_overlap_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 6..8 + Some(UnblamedHunk::new(9..11, suspect, Offset::Added(3))), + Some(Change::Added(2..5, 0)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 9..11, + suspects: [(suspect, 6..8)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_no_overlap_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..15 + Some(UnblamedHunk::new(4..15, suspect, Offset::Deleted(1))), + Some(Change::Added(4..5, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 4..15, + suspects: [(suspect, 5..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_no_overlap_4() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 25..27 + Some(UnblamedHunk::new(23..25, suspect, Offset::Deleted(2))), + Some(Change::Unchanged(21..22)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 25..27)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_no_overlap_5() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 17..18 + Some(UnblamedHunk::new(15..16, suspect, Offset::Deleted(2))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(20, 1))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 15..16, + suspects: [(suspect, 16..17)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_no_overlap_6() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 22..24 + Some(UnblamedHunk::new(23..25, suspect, Offset::Added(1))), + Some(Change::Deleted(20, 1)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 23..25, + suspects: [(suspect, 22..24)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(1)); +} + +#[test] +fn process_change_works_enclosing_addition() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 5..8 + Some(UnblamedHunk::new(2..5, suspect, Offset::Deleted(3))), + Some(Change::Added(3..12, 2)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Added(3..12, 2))); + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..5, + range_in_original_file: 5..8, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_enclosing_deletion() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 13..20 + Some(UnblamedHunk::new(12..19, suspect, Offset::Deleted(1))), + Some(Change::Deleted(15, 2)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..19, + suspects: [(suspect, 15..20)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 12..14, + suspects: [(suspect, 10..12)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(1)); +} + +#[test] +fn process_change_works_enclosing_unchanged_lines() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(3); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + // range_in_destination: 109..113 + Some(UnblamedHunk::new(110..114, suspect, Offset::Added(1))), + Some(Change::Unchanged(109..172)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(109..172))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 110..114, + suspects: [(suspect, 106..110)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_unchanged_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..3)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_unchanged_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Unchanged(0..7)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Unchanged(0..7))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_unchanged_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Deleted(2); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into(), + }), + Some(Change::Unchanged(21..23)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 22..30, + suspects: [(suspect, 21..29)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(2)); +} + +#[test] +fn process_change_works_deleted_hunk() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(0..5, suspect, Offset::Added(0))), + Some(Change::Deleted(5, 3)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, Some(Change::Deleted(5, 3))); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 0..5, + suspects: [(suspect, 0..5)].into() + }] + ); + assert_eq!(offset_in_destination, Offset::Added(0)); +} + +#[test] +fn process_change_works_deleted_hunk_2() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(0, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 2..16, + suspects: [(suspect, 2..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); +} + +#[test] +fn process_change_works_deleted_hunk_3() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(0); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + Some(UnblamedHunk::new(2..16, suspect, Offset::Added(0))), + Some(Change::Deleted(14, 4)), + ); + + assert_eq!( + hunk, + Some(UnblamedHunk { + range_in_blamed_file: 14..16, + suspects: [(suspect, 14..16)].into() + }) + ); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk::new(2..14, suspect, Offset::Added(0))] + ); + assert_eq!(offset_in_destination, Offset::Deleted(4)); +} + +#[test] +fn process_change_works_addition_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Added(22..25, 1)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(3)); +} + +#[test] +fn process_change_works_deletion_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Deleted(11, 5)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Deleted(4)); +} + +#[test] +fn process_change_works_unchanged_only() { + let mut lines_blamed = Vec::new(); + let mut new_hunks_to_blame = Vec::new(); + let mut offset_in_destination: Offset = Offset::Added(1); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + + let (hunk, change) = process_change( + &mut lines_blamed, + &mut new_hunks_to_blame, + &mut offset_in_destination, + suspect, + None, + Some(Change::Unchanged(11..13)), + ); + + assert_eq!(hunk, None); + assert_eq!(change, None); + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); + assert_eq!(offset_in_destination, Offset::Added(1)); +} +#[test] +fn process_changes_works() { + let mut lines_blamed = Vec::new(); + let hunks_to_blame = &[]; + let changes = &[]; + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, []); +} + #[test] -fn it_works() { - let _worktree = gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap(); +fn process_changes_works_added_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..4, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, []); +} + +#[test] +fn process_changes_works_added_hunk_2() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); +} + +#[test] +fn process_changes_works_added_hunk_3() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Unchanged(0..2), Change::Added(2..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 2..4, + range_in_original_file: 2..4, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [ + UnblamedHunk::new(0..2, suspect, Offset::Added(0)), + UnblamedHunk::new(4..6, suspect, Offset::Added(2)) + ] + ); +} + +#[test] +fn process_changes_works_added_hunk_4_0() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..1, 0), Change::Added(1..4, 0), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 1..4, + range_in_original_file: 1..4, + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); +} + +#[test] +fn process_changes_works_added_hunk_4_1() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(1..6, suspect, Offset::Added(1))]); +} + +#[test] +fn process_changes_works_added_hunk_4_2() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect, + }]; + let hunks_to_blame = &[UnblamedHunk::new(2..6, suspect_2, Offset::Added(2))]; + let changes = &[Change::Added(0..1, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 2..3, + range_in_original_file: 0..1, + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk::new(3..6, suspect_2, Offset::Added(3))] + ); +} + +#[test] +fn process_changes_works_added_hunk_5() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(0..6, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..4, 3), Change::Unchanged(4..6)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(1))]); +} + +#[test] +fn process_changes_works_added_hunk_6() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[UnblamedHunk::new(4..6, suspect, Offset::Added(1))]; + let changes = &[Change::Added(0..3, 0), Change::Unchanged(3..5)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!(lines_blamed, []); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(4..6, suspect, Offset::Added(4))]); +} + +#[test] +fn process_changes_works_added_hunk_7() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let suspect_2 = ObjectId::from_hex(b"2222222222222222222222222222222222222222").unwrap(); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect, + }]; + let hunks_to_blame = &[UnblamedHunk::new(1..3, suspect_2, Offset::Added(1))]; + let changes = &[Change::Added(0..1, 2)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect_2); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..1, + range_in_original_file: 0..1, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 1..2, + range_in_original_file: 0..1, + commit_id: suspect_2 + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk::new(2..3, suspect_2, Offset::Added(0))] + ); +} + +#[test] +fn process_changes_works_added_hunk_8() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed = Vec::new(); + let hunks_to_blame = &[UnblamedHunk::new(0..4, suspect, Offset::Added(0))]; + let changes = &[Change::Added(0..2, 0), Change::Unchanged(2..3), Change::Added(3..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 0..2, + range_in_original_file: 0..2, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 3..4, + range_in_original_file: 3..4, + commit_id: suspect + } + ] + ); + assert_eq!(new_hunks_to_blame, [UnblamedHunk::new(2..3, suspect, Offset::Added(2))]); +} + +#[test] +fn process_changes_works_added_hunk_9() { + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let mut lines_blamed: Vec = vec![BlameEntry { + range_in_blamed_file: 30..31, + range_in_original_file: 30..31, + commit_id: suspect, + }]; + let hunks_to_blame = &[ + UnblamedHunk { + range_in_blamed_file: 0..30, + suspects: [(suspect, 0..30)].into(), + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 31..37)].into(), + }, + ]; + let changes = &[ + Change::Unchanged(0..16), + Change::Added(16..17, 0), + Change::Unchanged(17..37), + ]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + lines_blamed.sort_by(|a, b| a.range_in_blamed_file.start.cmp(&b.range_in_blamed_file.start)); + + assert_eq!( + lines_blamed, + [ + BlameEntry { + range_in_blamed_file: 16..17, + range_in_original_file: 16..17, + commit_id: suspect + }, + BlameEntry { + range_in_blamed_file: 30..31, + range_in_original_file: 30..31, + commit_id: suspect + } + ] + ); + assert_eq!( + new_hunks_to_blame, + [ + UnblamedHunk { + range_in_blamed_file: 0..16, + suspects: [(suspect, 0..16)].into() + }, + UnblamedHunk { + range_in_blamed_file: 17..30, + suspects: [(suspect, 16..29)].into() + }, + UnblamedHunk { + range_in_blamed_file: 31..37, + suspects: [(suspect, 30..36)].into() + } + ] + ); +} + +#[test] +fn process_changes_works_deleted_hunk() { + let mut lines_blamed = Vec::new(); + let suspect = ObjectId::null(gix_hash::Kind::Sha1); + let hunks_to_blame = &[ + UnblamedHunk::new(0..4, suspect, Offset::Added(0)), + UnblamedHunk::new(4..7, suspect, Offset::Added(0)), + ]; + let changes = &[Change::Deleted(0, 3), Change::Added(0..4, 0)]; + let new_hunks_to_blame = process_changes(&mut lines_blamed, hunks_to_blame, changes, suspect); + + assert_eq!( + lines_blamed, + [BlameEntry { + range_in_blamed_file: 0..4, + range_in_original_file: 0..4, + commit_id: suspect + }] + ); + assert_eq!( + new_hunks_to_blame, + [UnblamedHunk { + range_in_blamed_file: 4..7, + suspects: [(suspect, 3..6)].into() + }] + ); +} + +fn fixture_path() -> PathBuf { + gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap() } diff --git a/gix-blame/tests/fixtures/make_blame_repo.sh b/gix-blame/tests/fixtures/make_blame_repo.sh index 279cb3fe9d5..54c82eb2362 100755 --- a/gix-blame/tests/fixtures/make_blame_repo.sh +++ b/gix-blame/tests/fixtures/make_blame_repo.sh @@ -1,23 +1,206 @@ #!/usr/bin/env bash set -eu -o pipefail +git config --local diff.algorithm histogram git init -q git config merge.ff false git checkout -q -b main -git commit -q --allow-empty -m c1 -git tag at-c1 -git commit -q --allow-empty -m c2 -git commit -q --allow-empty -m c3 -git commit -q --allow-empty -m c4 - -git checkout -q -b branch1 -git commit -q --allow-empty -m b1c1 -git tag at-b1c1 -git commit -q --allow-empty -m b1c2 - -git checkout -q main -git commit -q --allow-empty -m c5 -git tag at-c5 -git merge branch1 -m m1b1 + +echo "line 1" >> simple.txt +git add simple.txt +git commit -q -m c1 + +echo -e "line 1\nline 2\nline 3" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c1.1 + +echo -e "line 1\nline 2" > changed-lines.txt +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> changed-line-between-unchanged-lines.txt +git add changed-lines.txt +git add changed-line-between-unchanged-lines.txt +git commit -q -m c1.2 + +echo "line 2" >> added-lines.txt +echo "line 2" >> added-lines-around.txt +echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt +git add added-lines.txt +git add added-lines-around.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c1.3 + +echo "line 2" >> simple.txt +git add simple.txt +git commit -q -m c2 + +echo -e "line 4\nline 5\nline 6" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c2.1 + +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines.txt +echo -e "line 1\nline 2\nline 3\nline 4\nline 5\nline 6" >> deleted-lines-multiple-hunks.txt +git add deleted-lines.txt +git add deleted-lines-multiple-hunks.txt +git commit -q -m c2.2 + +echo -e "line 1\nline 2\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c2.3 + +echo -e "line 1\nline 2" > same-line-changed-twice.txt +echo -e "line 1\nline in between\nline 2" > coalesce-adjacent-hunks.txt +git add same-line-changed-twice.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c2.4 + +echo "line 3" >> simple.txt +git add simple.txt +git commit -q -m c3 + +echo -e "line 3\nline 4" > deleted-lines.txt +echo -e "line 2\nline 4" > deleted-lines-multiple-hunks.txt +git add deleted-lines.txt +git add deleted-lines-multiple-hunks.txt +git commit -q -m c3.1 + +echo -e "line 3\nline 4" > changed-lines.txt +echo -e "line 1\nline 2\nline 3 changed\nline 4\nline 5\nline 6" > changed-line-between-unchanged-lines.txt +git add changed-lines.txt +git add changed-line-between-unchanged-lines.txt +git commit -q -m c3.2 + +echo -e "line 2\nline 3" > added-line-before-changed-line.txt +echo -e "line 1\nline 2" > coalesce-adjacent-hunks.txt +git add added-line-before-changed-line.txt +git add coalesce-adjacent-hunks.txt +git commit -q -m c3.3 + +echo -e "line 1\nline 2 changed" > same-line-changed-twice.txt +git add same-line-changed-twice.txt +git commit -q -m c3.4 + +echo "line 4" >> simple.txt +git add simple.txt +git commit -q -m c4 + +echo -e "line 7\nline 8\nline 9" >> multiline-hunks.txt +git add multiline-hunks.txt +git commit -q -m c4.1 + +echo -e "line 1\nline 3\nline 2\nline 4" > switched-lines.txt +git add switched-lines.txt +git commit -q -m c4.2 + +echo -e "line 2 changed\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c4.3 + +echo -e "line 1\nline 2 changed a second time" > same-line-changed-twice.txt +git add same-line-changed-twice.txt +git commit -q -m c4.4 + +echo -e " line 1\n\n line 2\n\n line 3" > empty-lines-histogram.txt +cp empty-lines-histogram.txt empty-lines-myers.txt +git add empty-lines-histogram.txt empty-lines-myers.txt +git commit -q -m c4.5 + +echo -e "line 0\nline 1\nline 2" > added-lines.txt +echo -e "line 0\nline 1\nline 2\nline 3" > added-lines-around.txt +git add added-lines.txt +git add added-lines-around.txt +git commit -q -m c5 + +echo -e "line 4" > deleted-lines.txt +git add deleted-lines.txt +git commit -q -m c5.1 + +echo -e "line 1\nline 2\nline 3\nline 4" > switched-lines.txt +git add switched-lines.txt +git commit -q -m c5.2 + +echo -e "line 1\nline 2 changed\nline 3" > added-line-before-changed-line.txt +git add added-line-before-changed-line.txt +git commit -q -m c5.3 + +echo -e " line 1\n\n line in between\n\n line 2\n\n line in between\n\n line 3" > empty-lines-histogram.txt +cp empty-lines-histogram.txt empty-lines-myers.txt +git add empty-lines-histogram.txt empty-lines-myers.txt +git commit -q -m c5.4 + +# The commit history created by the commits above this line is linear, it only +# contains commits that have exactly one parent. +# Below this line, there’s also commits that have more than one parent. + +echo -e "line 1 original\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c6 + +echo -e "line 1 changed\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c7 + +git checkout -b different-branch-to-create-a-conflict +git reset --hard HEAD~1 + +echo -e "line 1 changed in a different way\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c8 + +git checkout main +git merge different-branch-to-create-a-conflict || true + +echo -e "line 1 conflict resolved\nline 2\n line 3" > resolved-conflict.txt +git add resolved-conflict.txt +git commit -q -m c9 + +echo -e "line 1\nline 2\n line 3" > file-in-one-chain-of-ancestors.txt +git add file-in-one-chain-of-ancestors.txt +git commit -q -m c10 + +git checkout -b different-branch-that-does-not-contain-file +git reset --hard HEAD~1 + +echo -e "line 4\nline 5\n line 6" > different-file-in-another-chain-of-ancestors.txt +git add different-file-in-another-chain-of-ancestors.txt +git commit -q -m c11 + +git checkout main +git merge different-branch-that-does-not-contain-file || true + +echo -e "line 1\nline 2\n line 3" > file-only-changed-in-branch.txt +git add file-only-changed-in-branch.txt +git commit -q -m c12 + +git checkout -b branch-that-has-one-commit + +echo -e "line 1 changed\nline 2\n line 3" > file-only-changed-in-branch.txt +git add file-only-changed-in-branch.txt +git commit -q -m c13 + +git checkout main +git merge branch-that-has-one-commit || true + +git blame --porcelain simple.txt > .git/simple.baseline +git blame --porcelain multiline-hunks.txt > .git/multiline-hunks.baseline +git blame --porcelain deleted-lines.txt > .git/deleted-lines.baseline +git blame --porcelain deleted-lines-multiple-hunks.txt > .git/deleted-lines-multiple-hunks.baseline +git blame --porcelain changed-lines.txt > .git/changed-lines.baseline +git blame --porcelain changed-line-between-unchanged-lines.txt > .git/changed-line-between-unchanged-lines.baseline +git blame --porcelain added-lines.txt > .git/added-lines.baseline +git blame --porcelain added-lines-around.txt > .git/added-lines-around.baseline +git blame --porcelain switched-lines.txt > .git/switched-lines.baseline +git blame --porcelain added-line-before-changed-line.txt > .git/added-line-before-changed-line.baseline +git blame --porcelain same-line-changed-twice.txt > .git/same-line-changed-twice.baseline +git blame --porcelain coalesce-adjacent-hunks.txt > .git/coalesce-adjacent-hunks.baseline + +git blame --porcelain resolved-conflict.txt > .git/resolved-conflict.baseline +git blame --porcelain file-in-one-chain-of-ancestors.txt > .git/file-in-one-chain-of-ancestors.baseline +git blame --porcelain different-file-in-another-chain-of-ancestors.txt > .git/different-file-in-another-chain-of-ancestors.baseline +git blame --porcelain file-only-changed-in-branch.txt > .git/file-only-changed-in-branch.baseline + +git blame --porcelain empty-lines-histogram.txt > .git/empty-lines-histogram.baseline + +git config --local diff.algorithm myers + +git blame --porcelain empty-lines-myers.txt > .git/empty-lines-myers.baseline diff --git a/gix/Cargo.toml b/gix/Cargo.toml index 2afbf8d2b59..9211dfb0960 100644 --- a/gix/Cargo.toml +++ b/gix/Cargo.toml @@ -141,6 +141,9 @@ blob-diff = ["gix-diff/blob", "attributes"] ## Add functions to specifically merge files, using the standard three-way merge that git offers. blob-merge = ["blob-diff", "dep:gix-merge", "attributes"] +## Add blame command similar to `git blame`. +blame = ["dep:gix-blame"] + ## Make it possible to turn a tree into a stream of bytes, which can be decoded to entries and turned into various other formats. worktree-stream = ["gix-worktree-stream", "attributes"] @@ -368,6 +371,7 @@ gix-command = { version = "^0.3.10", path = "../gix-command", optional = true } gix-worktree-stream = { version = "^0.16.0", path = "../gix-worktree-stream", optional = true } gix-archive = { version = "^0.16.0", path = "../gix-archive", default-features = false, optional = true } +gix-blame = { version= "^0.0.0", path ="../gix-blame", optional = true } # For communication with remotes gix-protocol = { version = "^0.46.0", path = "../gix-protocol", optional = true } diff --git a/gix/src/lib.rs b/gix/src/lib.rs index 0c2d376b4f4..e80a3baffc0 100644 --- a/gix/src/lib.rs +++ b/gix/src/lib.rs @@ -95,6 +95,8 @@ pub use gix_actor as actor; #[cfg(feature = "attributes")] pub use gix_attributes as attrs; +#[cfg(feature = "blame")] +pub use gix_blame as blame; #[cfg(feature = "command")] pub use gix_command as command; pub use gix_commitgraph as commitgraph; diff --git a/src/plumbing/main.rs b/src/plumbing/main.rs index c7859a0938f..3e90047bc57 100644 --- a/src/plumbing/main.rs +++ b/src/plumbing/main.rs @@ -1483,6 +1483,15 @@ pub fn main() -> Result<()> { }, ), }, + Subcommands::Blame { file } => prepare_and_run( + "blame", + trace, + verbose, + progress, + progress_keep_open, + None, + move |_progress, out, _err| core::repository::blame::blame_file(repository(Mode::Lenient)?, &file, out), + ), Subcommands::Completions { shell, out_dir } => { let mut app = Args::command(); diff --git a/src/plumbing/options/mod.rs b/src/plumbing/options/mod.rs index 47f92b4034f..94116df9ca1 100644 --- a/src/plumbing/options/mod.rs +++ b/src/plumbing/options/mod.rs @@ -150,6 +150,10 @@ pub enum Subcommands { /// Subcommands that need no git repository to run. #[clap(subcommand)] Free(free::Subcommands), + /// Blame lines in a file + Blame { + file: std::ffi::OsString, + }, /// Generate shell completions to stdout or a directory. #[clap(visible_alias = "generate-completions", visible_alias = "shell-completions")] Completions {