-
-
Notifications
You must be signed in to change notification settings - Fork 295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Draft: explore gix APIs, experiment with gix-blame API #1453
base: main
Are you sure you want to change the base?
Changes from 8 commits
48d6050
7c8ff66
4fb06e4
7031d9e
2001525
75ec62a
19710f1
301967c
9c6cf0a
27e21cd
1cad4df
1c595bc
07dba00
e931a65
b54ef01
87a7ae5
08838ed
30f546b
f573bef
c54dc04
ca37a03
09b1d23
857cbcc
30fbb7d
cfe40f5
0f148ac
34d7f55
f1482dc
ed4873d
06e3405
2eb1a16
1c35e06
29c2738
20c43cb
d273131
5a10add
9767ddd
f973e43
ab69d6b
de3f183
1ddc883
b9b1214
6608af5
e4d42fa
875e580
53fbe0c
bddcfd8
70cdb19
1f524cb
6ba878b
6aa23a2
82a9aa0
e846264
3de5028
125326e
a964579
125ee47
ce6b0c7
cfc0359
7ca85e2
01f747f
d64fb23
77e5f03
4b1c509
e038dad
42fa847
c1badf3
55a19cf
6474729
70d56db
9ce6d35
a2cd71b
de76eeb
83a6e03
b850da5
e6da874
6b16568
7a7fd0a
e6103df
3f0de4b
68e5f17
0b7cd03
9aff3e4
362e7e6
5909dc1
34530fd
5a8af77
060f73d
3a296e5
bb16cc1
1e4191d
381b673
817b2ce
b953eaa
76b047c
6290e10
a93323a
ca8f9e2
025ff2a
d053429
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,309 @@ | ||
use std::{ops::Range, path::PathBuf, str::FromStr}; | ||
|
||
use gix_diff::blob::intern::Token; | ||
use gix_hash::ObjectId; | ||
use gix_odb::pack::FindExt; | ||
use gix_ref::{file::ReferenceExt, store::WriteReflog}; | ||
|
||
struct Blame { | ||
_resource_cache: gix_diff::blob::Platform, | ||
} | ||
|
||
impl Blame { | ||
fn new(worktree_root: impl Into<PathBuf>) -> Self { | ||
let worktree_root: PathBuf = worktree_root.into(); | ||
let git_dir = worktree_root.join(".git"); | ||
let index = | ||
gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default()).unwrap(); | ||
|
||
let capabilities = gix_fs::Capabilities::probe(&git_dir); | ||
let stack = gix_worktree::Stack::from_state_and_ignore_case( | ||
&worktree_root, | ||
false, | ||
gix_worktree::stack::State::AttributesAndIgnoreStack { | ||
attributes: Default::default(), | ||
ignore: Default::default(), | ||
}, | ||
&index, | ||
index.path_backing(), | ||
); | ||
|
||
let resource_cache = gix_diff::blob::Platform::new( | ||
Default::default(), | ||
gix_diff::blob::Pipeline::new( | ||
gix_diff::blob::pipeline::WorktreeRoots { | ||
old_root: None, | ||
new_root: None, | ||
}, | ||
gix_filter::Pipeline::new(Default::default(), Default::default()), | ||
vec![], | ||
gix_diff::blob::pipeline::Options { | ||
large_file_threshold_bytes: 0, | ||
fs: capabilities, | ||
}, | ||
), | ||
gix_diff::blob::pipeline::Mode::ToGit, | ||
stack, | ||
); | ||
|
||
Blame { | ||
_resource_cache: resource_cache, | ||
} | ||
} | ||
} | ||
|
||
#[test] | ||
fn blame_works() { | ||
let _blame = Blame::new(fixture_path()); | ||
} | ||
|
||
#[test] | ||
fn it_works() { | ||
let _worktree = gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap(); | ||
// TODO | ||
// At a high level, what we want to do is the following: | ||
// | ||
// - get the commit that belongs to a commit id | ||
// - walk through parents | ||
// - for each parent, do a diff and mark lines that don’t have a suspect (this is the term | ||
// used in `libgit2`) yet, but that have been changed in this commit | ||
// | ||
// The algorithm in `libgit2` works by going through parents and keeping a linked list of blame | ||
// suspects. It can be visualized as follows: | ||
// | ||
// <----------------------------------------> | ||
// <---------------><-----------------------> | ||
// <---><----------><-----------------------> | ||
// <---><----------><-------><-----><-------> | ||
// <---><---><-----><-------><-----><-------> | ||
// <---><---><-----><-------><-----><-><-><-> | ||
|
||
let worktree = fixture_path(); | ||
|
||
let store = gix_ref::file::Store::at( | ||
worktree.join(".git"), | ||
gix_ref::store::init::Options { | ||
write_reflog: WriteReflog::Disable, | ||
..Default::default() | ||
}, | ||
); | ||
let odb = odb_at(""); | ||
|
||
let mut reference = gix_ref::file::Store::find(&store, "HEAD").unwrap(); | ||
cruessler marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
let mut buffer = Vec::new(); | ||
|
||
let head_id = reference.peel_to_id_in_place(&store, &odb).unwrap(); | ||
let (head, _) = odb.find_commit(&head_id, &mut buffer).unwrap(); | ||
|
||
let mut buffer = Vec::new(); | ||
let head_tree_iter = odb | ||
.find(&head.tree(), &mut buffer) | ||
.unwrap() | ||
.0 | ||
.try_into_tree_iter() | ||
.unwrap(); | ||
|
||
let mut traverse = gix_traverse::commit::Simple::new(Some(head_id), &odb); | ||
|
||
traverse.next(); | ||
|
||
let iter = traverse.commit_iter(); | ||
let parent_ids = iter.parent_ids().collect::<Vec<_>>(); | ||
|
||
let last_parent_id = parent_ids.last().unwrap(); | ||
|
||
let mut buffer = Vec::new(); | ||
|
||
let (last_parent, _) = odb.find_commit(&last_parent_id, &mut buffer).unwrap(); | ||
|
||
let mut buffer = Vec::new(); | ||
let last_parent_tree_iter = odb | ||
.find(&last_parent.tree(), &mut buffer) | ||
.unwrap() | ||
.0 | ||
.try_into_tree_iter() | ||
.unwrap(); | ||
|
||
let mut recorder = gix_diff::tree::Recorder::default(); | ||
let _result = gix_diff::tree::Changes::from(last_parent_tree_iter) | ||
.needed_to_obtain(head_tree_iter, gix_diff::tree::State::default(), &odb, &mut recorder) | ||
.unwrap(); | ||
|
||
assert!(matches!( | ||
recorder.records[..], | ||
[gix_diff::tree::recorder::Change::Modification { .. }] | ||
)); | ||
|
||
let [ref modification]: [gix_diff::tree::recorder::Change] = recorder.records[..] else { | ||
todo!() | ||
}; | ||
let gix_diff::tree::recorder::Change::Modification { previous_oid, oid, .. } = modification else { | ||
todo!() | ||
}; | ||
|
||
// The following lines are trying to get a line-diff between two commits. | ||
let git_dir = fixture_path().join(".git"); | ||
let index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, false, Default::default()).unwrap(); | ||
let stack = gix_worktree::Stack::from_state_and_ignore_case( | ||
worktree.clone(), | ||
false, | ||
gix_worktree::stack::State::AttributesAndIgnoreStack { | ||
attributes: Default::default(), | ||
ignore: Default::default(), | ||
}, | ||
&index, | ||
index.path_backing(), | ||
); | ||
let capabilities = gix_fs::Capabilities::probe(&git_dir); | ||
let mut resource_cache = gix_diff::blob::Platform::new( | ||
Default::default(), | ||
gix_diff::blob::Pipeline::new( | ||
gix_diff::blob::pipeline::WorktreeRoots { | ||
old_root: None, | ||
new_root: None, | ||
}, | ||
gix_filter::Pipeline::new(Default::default(), Default::default()), | ||
vec![], | ||
gix_diff::blob::pipeline::Options { | ||
large_file_threshold_bytes: 0, | ||
fs: capabilities, | ||
}, | ||
), | ||
gix_diff::blob::pipeline::Mode::ToGit, | ||
stack, | ||
); | ||
|
||
resource_cache | ||
.set_resource( | ||
*previous_oid, | ||
gix_object::tree::EntryKind::Blob, | ||
"file.txt".into(), | ||
gix_diff::blob::ResourceKind::OldOrSource, | ||
&odb, | ||
) | ||
.unwrap(); | ||
resource_cache | ||
.set_resource( | ||
*oid, | ||
gix_object::tree::EntryKind::Blob, | ||
"file.txt".into(), | ||
gix_diff::blob::ResourceKind::NewOrDestination, | ||
&odb, | ||
) | ||
.unwrap(); | ||
|
||
let outcome = resource_cache.prepare_diff().unwrap(); | ||
let input = outcome.interned_input(); | ||
|
||
assert_eq!(input.before, [Token(0), Token(1), Token(2),]); | ||
assert_eq!(input.after, [Token(0), Token(1), Token(2), Token(3)]); | ||
|
||
// Assumption: this works because “imara-diff will compute a line diff by default”, so each | ||
// token represents a line. | ||
let number_of_lines: u32 = input.after.len().try_into().unwrap(); | ||
|
||
assert_eq!(number_of_lines, 4); | ||
|
||
let lines_to_blame: Vec<Range<u32>> = vec![0..number_of_lines]; | ||
|
||
assert_eq!(lines_to_blame, vec![0..4]); | ||
|
||
#[derive(Debug, PartialEq)] | ||
struct BlameEntry { | ||
range: Range<u32>, | ||
oid: ObjectId, | ||
} | ||
|
||
let mut lines_blamed: Vec<BlameEntry> = vec![]; | ||
|
||
let mut lines = Vec::new(); | ||
|
||
use gix_ref::bstr::ByteSlice; | ||
|
||
// The following lines were inspired by `gix::object::blob::diff::Platform::lines`. | ||
gix_diff::blob::diff( | ||
gix_diff::blob::Algorithm::Histogram, | ||
&input, | ||
|before: Range<u32>, after: Range<u32>| { | ||
lines.clear(); | ||
lines.extend( | ||
input.before[before.start as usize..before.end as usize] | ||
.iter() | ||
.map(|&line| input.interner[line].as_bstr()), | ||
); | ||
let end_of_before = lines.len(); | ||
lines.extend( | ||
input.after[after.start as usize..after.end as usize] | ||
.iter() | ||
.map(|&line| input.interner[line].as_bstr()), | ||
); | ||
let hunk_before = &lines[..end_of_before]; | ||
let hunk_after = &lines[end_of_before..]; | ||
if hunk_after.is_empty() { | ||
// Intentionally empty. | ||
} else if hunk_before.is_empty() { | ||
assert_eq!(hunk_after, ["line 4\n"]); | ||
} else { | ||
} | ||
|
||
let mut new_lines_to_blame: Vec<Range<u32>> = Vec::new(); | ||
|
||
for range in &lines_to_blame { | ||
if range.contains(&after.start) { | ||
if range.contains(&after.end) { | ||
// <----------> | ||
// <---> | ||
// <--> <-> | ||
new_lines_to_blame.push(range.start..after.start); | ||
new_lines_to_blame.push((after.end + 1)..range.end); | ||
|
||
lines_blamed.push(BlameEntry { | ||
range: after.clone(), | ||
oid: oid.clone(), | ||
}); | ||
} else { | ||
// <--------> | ||
// <-------> | ||
// <--> | ||
new_lines_to_blame.push(range.start..after.start); | ||
|
||
lines_blamed.push(BlameEntry { | ||
range: after.start..range.end, | ||
oid: oid.clone(), | ||
}); | ||
} | ||
} else { | ||
// <-------> | ||
// <------> | ||
// <--> | ||
new_lines_to_blame.push((after.end + 1)..range.end); | ||
|
||
lines_blamed.push(BlameEntry { | ||
range: range.start..after.end, | ||
oid: oid.clone(), | ||
}); | ||
} | ||
} | ||
|
||
assert_eq!(new_lines_to_blame, vec![0..3]); | ||
assert_eq!( | ||
lines_blamed, | ||
vec![BlameEntry { | ||
range: 3..4, | ||
oid: ObjectId::from_str("9c2a7090627d0fffa9ed001bf7be98f86c2c8068").unwrap() | ||
}] | ||
); | ||
assert_eq!(lines_blamed, vec![BlameEntry { range: 3..4, oid: *oid }]); | ||
}, | ||
); | ||
|
||
assert_eq!(lines, ["line 4\n"]); | ||
} | ||
|
||
fn odb_at(name: &str) -> gix_odb::Handle { | ||
gix_odb::at(fixture_path().join(name).join(".git/objects")).unwrap() | ||
} | ||
|
||
fn fixture_path() -> PathBuf { | ||
gix_testtools::scripted_fixture_read_only("make_blame_repo.sh").unwrap() | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,18 +6,19 @@ git init -q | |
git config merge.ff false | ||
|
||
git checkout -q -b main | ||
git commit -q --allow-empty -m c1 | ||
git tag at-c1 | ||
git commit -q --allow-empty -m c2 | ||
git commit -q --allow-empty -m c3 | ||
git commit -q --allow-empty -m c4 | ||
|
||
git checkout -q -b branch1 | ||
git commit -q --allow-empty -m b1c1 | ||
git tag at-b1c1 | ||
git commit -q --allow-empty -m b1c2 | ||
echo "line 1" >> file.txt | ||
git add file.txt | ||
git commit -q -m c1 | ||
|
||
git checkout -q main | ||
git commit -q --allow-empty -m c5 | ||
git tag at-c5 | ||
git merge branch1 -m m1b1 | ||
echo "line 2" >> file.txt | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd do the same - start extremely simple, maybe do the first rough implementation so it passes this test, and then think of some tougher cases to throw at it, validating that they still come out right. Maybe it's worth investing into a baseline test which parses the output of There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for the input, sounds like a great plan! |
||
git add file.txt | ||
git commit -q -m c2 | ||
|
||
echo "line 3" >> file.txt | ||
git add file.txt | ||
git commit -q -m c3 | ||
|
||
echo "line 4" >> file.txt | ||
git add file.txt | ||
git commit -q -m c4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You'd want to use the equally named
gix_object::FindExt
, it's easier to use.