Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added new Diff on both DiffBuilders for more detailed DiffModel. #83

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 127 additions & 0 deletions DiffPlex/DiffBuilder/InlineDiffBuilder.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using DiffPlex.Chunkers;
using DiffPlex.DiffBuilder.Model;
using DiffPlex.Model;
Expand All @@ -10,6 +11,8 @@ public class InlineDiffBuilder : IInlineDiffBuilder
{
private readonly IDiffer differ;

private delegate ChangeType PieceBuilder(string oldText, string newText, List<DiffPiece> pieces, bool ignoreWhitespace, bool ignoreCase);

/// <summary>
/// Gets the default singleton instance of the inline diff builder.
/// </summary>
Expand Down Expand Up @@ -77,6 +80,130 @@ public static DiffPaneModel Diff(IDiffer differ, string oldText, string newText,
return model;
}

public static DiffPaneModel Diff(
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instread of having this logic close to twice, could we instread just share the BuildDiffPieces logic that SideBySide uses, extract it to a common place? Then let inline do one last step to flatten it?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes you are probably right. Was trying to solve it twice now.

IDiffer differ,
string oldText, string newText,
List<IChunker> detailsPack,
bool ignoreWhiteSpace = true, bool ignoreCase = false)
{
if (oldText == null) throw new ArgumentNullException(nameof(oldText));
if (newText == null) throw new ArgumentNullException(nameof(newText));

if (differ == null) return Diff(oldText, newText, ignoreWhiteSpace, ignoreCase);

LinkedList<IChunker> chunkers;
if (detailsPack == null || !detailsPack.Any())
{
chunkers = new LinkedList<IChunker>();
chunkers.AddLast(DiffPlex.Chunkers.LineChunker.Instance);
chunkers.AddLast(DiffPlex.Chunkers.WordChunker.Instance);
chunkers.AddLast(DiffPlex.Chunkers.CharacterChunker.Instance);
}
else
{
chunkers = new LinkedList<IChunker>(detailsPack);
}

var model = new DiffPaneModel();
var cnode = chunkers.First;
var diffResult = differ.CreateDiffs(oldText, newText, ignoreWhiteSpace, ignoreCase, cnode.Value);
BuildDiffPieces(diffResult, model.Lines, NextPieceBuilderInternal(differ, cnode.Next), ignoreWhiteSpace, ignoreCase);

return model;
}


private static PieceBuilder NextPieceBuilderInternal(
IDiffer differ,
LinkedListNode<IChunker> chunkerNode)
{
if (chunkerNode == null)
{
return null;
}
else
{
return (ot, nt, p, iw, ic) =>
{
var r = differ.CreateDiffs(ot, nt, iw, ic, chunkerNode.Value);
return BuildDiffPieces(r, p, NextPieceBuilderInternal(differ, chunkerNode.Next), iw, ic);
};
}
}

private static ChangeType BuildDiffPieces(
DiffResult diffResult,
List<DiffPiece> pieces, PieceBuilder subPieceBuilder,
bool ignoreWhiteSpace, bool ignoreCase)
{
int aPos = 0;
int bPos = 0;

ChangeType changeSummary = ChangeType.Unchanged;

foreach (var diffBlock in diffResult.DiffBlocks)
{
while (bPos < diffBlock.InsertStartB && aPos < diffBlock.DeleteStartA)
{
pieces.Add(new DiffPiece(diffResult.PiecesOld[aPos], ChangeType.Unchanged, aPos + 1));
aPos++;
bPos++;
}

int i = 0;
for (; i < Math.Min(diffBlock.DeleteCountA, diffBlock.InsertCountB); i++)
{
var piece = new DiffPiece(diffResult.PiecesOld[i + diffBlock.DeleteStartA], ChangeType.Deleted, aPos + 1);
//var newPiece = new DiffPiece(diffResult.PiecesNew[i + diffBlock.InsertStartB], ChangeType.Inserted, bPos + 1);

if (subPieceBuilder != null)
{
var subChangeSummary = subPieceBuilder(diffResult.PiecesOld[aPos], diffResult.PiecesNew[bPos], piece.SubPieces, ignoreWhiteSpace, ignoreCase);
piece.Type = subChangeSummary;
}

pieces.Add(piece);
aPos++;
bPos++;
}

if (diffBlock.DeleteCountA > diffBlock.InsertCountB)
{
for (; i < diffBlock.DeleteCountA; i++)
{
pieces.Add(new DiffPiece(diffResult.PiecesOld[i + diffBlock.DeleteStartA], ChangeType.Deleted, aPos + 1));

aPos++;
}
}
else
{
for (; i < diffBlock.InsertCountB; i++)
{
pieces.Add(new DiffPiece(diffResult.PiecesNew[i + diffBlock.InsertStartB], ChangeType.Inserted, bPos + 1));

bPos++;
}
}
}

while (bPos < diffResult.PiecesNew.Length && aPos < diffResult.PiecesOld.Length)
{
pieces.Add(new DiffPiece(diffResult.PiecesOld[aPos], ChangeType.Unchanged, aPos + 1));
aPos++;
bPos++;
}

// Consider the whole diff as "modified" if we found any change, otherwise we consider it unchanged
if (pieces.Any(x => x.Type == ChangeType.Modified || x.Type == ChangeType.Inserted || x.Type == ChangeType.Deleted))
{
changeSummary = ChangeType.Modified;
}

return changeSummary;
}


private static void BuildDiffPieces(DiffResult diffResult, List<DiffPiece> pieces)
{
int bPos = 0;
Expand Down
5 changes: 4 additions & 1 deletion DiffPlex/DiffBuilder/Model/DiffPaneModel.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
using System.Collections.Generic;
using System;
using System.Collections.Generic;
using System.Linq;

namespace DiffPlex.DiffBuilder.Model
{
public class DiffPaneModel
{
public List<DiffPiece> Chunks => Lines;
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why make an alias like this?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because when Line chunker isn't used as first the current name "Lines" doesn't represent its contents.

But its just a small alias np for me to delete it.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sounds fine, can you put summary comments on both maybe to clear up any confusion?


public List<DiffPiece> Lines { get; }

public bool HasDifferences
Expand Down
53 changes: 53 additions & 0 deletions DiffPlex/DiffBuilder/SideBySideDiffBuilder.cs
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,59 @@ public static SideBySideDiffModel Diff(IDiffer differ, string oldText, string ne
return model;
}

public static SideBySideDiffModel Diff(
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Couldn't we update the other methods in this file to use the new more general method here?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good point If unit tests will still pass I'll look into this.

IDiffer differ,
string oldText, string newText,
List<IChunker> detailsPack,
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I like the idea to give an array, but detailsPack name is too general. maybe called it chunkers or something

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oka then Chunkers it is.

bool ignoreWhiteSpace = true, bool ignoreCase = false)
{
if (oldText == null) throw new ArgumentNullException(nameof(oldText));
if (newText == null) throw new ArgumentNullException(nameof(newText));

if (differ == null) return Diff(oldText, newText, ignoreWhiteSpace, ignoreCase);

LinkedList<IChunker> chunkers;
if (detailsPack == null || !detailsPack.Any())
{
chunkers = new LinkedList<IChunker>();
chunkers.AddLast(DiffPlex.Chunkers.LineChunker.Instance);
chunkers.AddLast(DiffPlex.Chunkers.WordChunker.Instance);
chunkers.AddLast(DiffPlex.Chunkers.CharacterChunker.Instance);
}
else
{
chunkers = new LinkedList<IChunker>(detailsPack);
}

var model = new SideBySideDiffModel();
var cnode = chunkers.First;

var diffResult = differ.CreateDiffs(oldText, newText, ignoreWhiteSpace, ignoreCase, cnode.Value);
BuildDiffPieces(diffResult, model.OldText.Lines, model.NewText.Lines, NextPieceBuilderInternal(differ, cnode.Next), ignoreWhiteSpace, ignoreCase);

return model;
}


private static PieceBuilder NextPieceBuilderInternal(
IDiffer differ,
LinkedListNode<IChunker> chunkerNode)
{
if (chunkerNode == null)
{
return null;
}
else
{
return (ot, nt, op, np, iw, ic) =>
{
var r = differ.CreateDiffs(ot, nt, iw, ic, chunkerNode.Value);
return BuildDiffPieces(r, op, np, NextPieceBuilderInternal(differ, chunkerNode.Next), iw, ic);
};
}
}


private static ChangeType BuildWordDiffPiecesInternal(string oldText, string newText, List<DiffPiece> oldPieces, List<DiffPiece> newPieces, bool ignoreWhiteSpace, bool ignoreCase)
{
var diffResult = Differ.Instance.CreateDiffs(oldText, newText, ignoreWhiteSpace, ignoreCase, WordChunker.Instance);
Expand Down
63 changes: 63 additions & 0 deletions Facts.DiffPlex/InlineDiffBuilderFacts.cs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
using System;
using System.Collections.Generic;
using System.Linq;
using DiffPlex;
using DiffPlex.DiffBuilder;
using DiffPlex.DiffBuilder.Model;
Expand Down Expand Up @@ -344,6 +345,68 @@ public void Can_compare_whitespace()
new DiffPiece("5", ChangeType.Unchanged, 5),
});
}


[Fact]
public void Will_build_hierarchial_diffModel_lines_words_chars()
{
string textOld =
@"What is Lorem Ipsum?
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
Lorem Ipsum has been the industry's standard dummy text ever since the 1500s,
when an unknown printer took a galley of type and scrambled it to make a type
specimen book. It has survived not only five centuries, but also the leap into electronic typesetting,
remaining essentially unchanged.
It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages,
and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.";

string textNew =
@"What the heck is Lorem Ipsum?
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
when an unknown printer took a galley of type and scrambled it to make a type
specimen book. It hos survived not only five centuries, but also the leap into electronic typesatting,
remaining essentially unchanged.
It was popularised in the 1961s with the release of Letraset sheets containing Lorem Ipsum passages,
and more recently with desktop publishing software like Aldus PagesMaker including versions of Lorem Ipsum.";


var diff = InlineDiffBuilder.Diff(
new Differ(),
textOld, textNew,
detailsPack: null,
ignoreWhiteSpace: false,
ignoreCase: false
);

Assert.NotNull(diff);
Assert.True(diff.Lines.Count == 8);

Assert.True(diff.HasDifferences);

// Check on Line level
var changedLines = diff.Lines.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.Equal(5, changedLines.Count);

// Check on Word level
var changedWords = changedLines[0].SubPieces.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.NotNull(changedWords);
Assert.True(changedWords.Count == 4);
Assert.Equal(ChangeType.Inserted, changedWords[0].Type);
Assert.Equal("the", changedWords[0].Text);
Assert.Equal(ChangeType.Inserted, changedWords[1].Type);
Assert.Equal(" ", changedWords[1].Text);
Assert.Equal(ChangeType.Inserted, changedWords[2].Type);
Assert.Equal("heck", changedWords[2].Text);
Assert.Equal(ChangeType.Inserted, changedWords[3].Type);
Assert.Equal(" ", changedWords[3].Text);

// Check on Character level
var changedChars = changedLines[2].SubPieces[30].SubPieces.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.NotNull(changedChars);
Assert.Single(changedChars);
Assert.Equal(ChangeType.Deleted, changedChars[0].Type);
Assert.Equal("e", changedChars[0].Text);
}
}
}
}
85 changes: 85 additions & 0 deletions Facts.DiffPlex/SideBySideDiffBuilderFacts.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using DiffPlex.DiffBuilder.Model;
using Moq;
using Xunit;
using System.Linq;

namespace Facts.DiffPlex
{
Expand Down Expand Up @@ -333,6 +334,90 @@ public void Will_build_diffModel_for_partially_different_lines()
Assert.True(bidiff.OldText.HasDifferences && bidiff.NewText.HasDifferences);
}

[Fact]
public void Will_build_hierarchial_diffModel_lines_words_chars()
{
string textOld =
@"What is Lorem Ipsum?
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
Lorem Ipsum has been the industry's standard dummy text ever since the 1500s,
when an unknown printer took a galley of type and scrambled it to make a type
specimen book. It has survived not only five centuries, but also the leap into electronic typesetting,
remaining essentially unchanged.
It was popularised in the 1960s with the release of Letraset sheets containing Lorem Ipsum passages,
and more recently with desktop publishing software like Aldus PageMaker including versions of Lorem Ipsum.";

string textNew =
@"What the heck is Lorem Ipsum?
Lorem Ipsum is simply dummy text of the printing and typesetting industry.
when an unknown printer took a galley of type and scrambled it to make a type
specimen book. It hos survived not only five centuries, but also the leap into electronic typesatting,
remaining essentially unchanged.
It was popularised in the 1961s with the release of Letraset sheets containing Lorem Ipsum passages,
and more recently with desktop publishing software like Aldus PagesMaker including versions of Lorem Ipsum.";


var bidiff = SideBySideDiffBuilder.Diff(
new Differ(),
textOld, textNew,
detailsPack: null,
ignoreWhiteSpace: false,
ignoreCase: false
);

Assert.NotNull(bidiff);
Assert.True(bidiff.OldText.Lines.Count == 8);
Assert.True(bidiff.NewText.Lines.Count == 8);

Assert.True(bidiff.OldText.HasDifferences && bidiff.NewText.HasDifferences);

// Check on Line level
var changedOldLines = bidiff.OldText.Lines.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.Equal(5, changedOldLines.Count);

var changedNewLines = bidiff.NewText.Lines.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.Equal(5, changedNewLines.Count);

// Check on Word level
var changedOldWords = changedOldLines[0].SubPieces.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.NotNull(changedOldWords);
Assert.True(changedOldWords.Count == 4);
Assert.Equal(ChangeType.Imaginary, changedOldWords[0].Type);
Assert.Null(changedOldWords[0].Text);
Assert.Equal(ChangeType.Imaginary, changedOldWords[1].Type);
Assert.Null(changedOldWords[1].Text);
Assert.Equal(ChangeType.Imaginary, changedOldWords[2].Type);
Assert.Null(changedOldWords[2].Text);
Assert.Equal(ChangeType.Imaginary, changedOldWords[3].Type);
Assert.Null(changedOldWords[3].Text);

var changedNewWords = changedNewLines[0].SubPieces.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.NotNull(changedNewWords);
Assert.True(changedNewWords.Count == 4);
Assert.Equal(ChangeType.Inserted, changedNewWords[0].Type);
Assert.Equal("the", changedNewWords[0].Text);
Assert.Equal(ChangeType.Inserted, changedNewWords[1].Type);
Assert.Equal(" ", changedNewWords[1].Text);
Assert.Equal(ChangeType.Inserted, changedNewWords[2].Type);
Assert.Equal("heck", changedNewWords[2].Text);
Assert.Equal(ChangeType.Inserted, changedNewWords[3].Type);
Assert.Equal(" ", changedNewWords[3].Text);


// Check on Character level
var changedOldChars = changedOldLines[2].SubPieces[30].SubPieces.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.NotNull(changedOldChars);
Assert.Single(changedOldChars);
Assert.Equal(ChangeType.Deleted, changedOldChars[0].Type);
Assert.Equal("e", changedOldChars[0].Text);

var changedNewChars = changedNewLines[2].SubPieces[30].SubPieces.Where(x => x.Type != ChangeType.Unchanged).ToList();
Assert.NotNull(changedNewChars);
Assert.Single(changedNewChars);
Assert.Equal(ChangeType.Inserted, changedNewChars[0].Type);
Assert.Equal("a", changedNewChars[0].Text);
}

[Fact]
public void Will_ignore_whitespace_by_default_1()
{
Expand Down