Skip to content

Commit

Permalink
Remove SymmetrizedWordAlignmentEngine
Browse files Browse the repository at this point in the history
  • Loading branch information
Enkidu93 committed Feb 10, 2025
1 parent 92e62ca commit 6c7614a
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 176 deletions.
173 changes: 0 additions & 173 deletions src/SIL.Machine/Translation/SymmetrizedWordAlignmentEngine.cs

This file was deleted.

153 changes: 150 additions & 3 deletions src/SIL.Machine/Translation/SymmetrizedWordAlignmentModel.cs
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
using SIL.Machine.Corpora;
using System;
using System.Collections.Generic;
using System.Linq;
using SIL.Machine.Corpora;
using SIL.ObjectModel;

namespace SIL.Machine.Translation
{
public class SymmetrizedWordAlignmentModel : SymmetrizedWordAlignmentEngine, IWordAlignmentModel
public class SymmetrizedWordAlignmentModel : DisposableBase, IWordAlignmentModel
{
private readonly IWordAlignmentModel _directWordAlignmentModel;
private readonly IWordAlignmentModel _inverseWordAlignmentModel;
private readonly SymmetrizedWordAligner _aligner;

public SymmetrizedWordAlignmentModel(
IWordAlignmentModel directWordAlignmentModel,
IWordAlignmentModel inverseWordAlignmentModel
)
: base(directWordAlignmentModel, inverseWordAlignmentModel)
{
_directWordAlignmentModel = directWordAlignmentModel;
_inverseWordAlignmentModel = inverseWordAlignmentModel;
_aligner = new SymmetrizedWordAligner(DirectWordAlignmentEngine, InverseWordAlignmentEngine);
}

public ITrainer CreateTrainer(IParallelTextCorpus corpus)
Expand All @@ -27,6 +32,148 @@ public ITrainer CreateTrainer(IParallelTextCorpus corpus)
return new SymmetrizedWordAlignmentModelTrainer(directTrainer, inverseTrainer);
}

public SymmetrizationHeuristic Heuristic
{
get => _aligner.Heuristic;
set => _aligner.Heuristic = value;
}

public IWordAligner DirectWordAlignmentEngine
{
get
{
CheckDisposed();

return _directWordAlignmentModel;
}
}

public IWordAligner InverseWordAlignmentEngine
{
get
{
CheckDisposed();

return _inverseWordAlignmentModel;
}
}

public IWordVocabulary SourceWords
{
get
{
CheckDisposed();

return _directWordAlignmentModel.SourceWords;
}
}

public IWordVocabulary TargetWords
{
get
{
CheckDisposed();

return _directWordAlignmentModel.TargetWords;
}
}

public IReadOnlySet<int> SpecialSymbolIndices => _directWordAlignmentModel.SpecialSymbolIndices;

public WordAlignmentMatrix Align(IReadOnlyList<string> sourceSegment, IReadOnlyList<string> targetSegment)
{
CheckDisposed();

return _aligner.Align(sourceSegment, targetSegment);
}

public IReadOnlyList<WordAlignmentMatrix> AlignBatch(
IReadOnlyList<(IReadOnlyList<string> SourceSegment, IReadOnlyList<string> TargetSegment)> segments
)
{
CheckDisposed();

return _aligner.AlignBatch(segments);
}

public IEnumerable<(string TargetWord, double Score)> GetTranslations(string sourceWord, double threshold = 0)
{
CheckDisposed();

foreach ((string targetWord, double dirScore) in _directWordAlignmentModel.GetTranslations(sourceWord))
{
double invScore = _inverseWordAlignmentModel.GetTranslationScore(targetWord, sourceWord);
double score = Math.Max(dirScore, invScore);
if (score > threshold)
yield return (targetWord, score);
}
}

public IEnumerable<(int TargetWordIndex, double Score)> GetTranslations(
int sourceWordIndex,
double threshold = 0
)
{
CheckDisposed();

foreach (
(int targetWordIndex, double dirScore) in _directWordAlignmentModel.GetTranslations(sourceWordIndex)
)
{
double invScore = _inverseWordAlignmentModel.GetTranslationScore(targetWordIndex, sourceWordIndex);
double score = Math.Max(dirScore, invScore);
if (score > threshold)
yield return (targetWordIndex, score);
}
}

public double GetTranslationScore(string sourceWord, string targetWord)
{
CheckDisposed();

double dirScore = _directWordAlignmentModel.GetTranslationScore(sourceWord, targetWord);
double invScore = _inverseWordAlignmentModel.GetTranslationScore(targetWord, sourceWord);
return Math.Max(dirScore, invScore);
}

public double GetTranslationScore(int sourceWordIndex, int targetWordIndex)
{
CheckDisposed();

double dirScore = _directWordAlignmentModel.GetTranslationScore(sourceWordIndex, targetWordIndex);
double invScore = _inverseWordAlignmentModel.GetTranslationScore(targetWordIndex, sourceWordIndex);
return Math.Max(dirScore, invScore);
}

public IReadOnlyCollection<AlignedWordPair> GetBestAlignedWordPairs(
IReadOnlyList<string> sourceSegment,
IReadOnlyList<string> targetSegment
)
{
CheckDisposed();

WordAlignmentMatrix matrix = Align(sourceSegment, targetSegment);
IReadOnlyCollection<AlignedWordPair> wordPairs = matrix.ToAlignedWordPairs();
ComputeAlignedWordPairScores(sourceSegment, targetSegment, wordPairs);
return wordPairs;
}

public void ComputeAlignedWordPairScores(
IReadOnlyList<string> sourceSegment,
IReadOnlyList<string> targetSegment,
IReadOnlyCollection<AlignedWordPair> wordPairs
)
{
AlignedWordPair[] inverseWordPairs = wordPairs.Select(wp => wp.Invert()).ToArray();
_directWordAlignmentModel.ComputeAlignedWordPairScores(sourceSegment, targetSegment, wordPairs);
_inverseWordAlignmentModel.ComputeAlignedWordPairScores(targetSegment, sourceSegment, inverseWordPairs);
foreach (var (wordPair, inverseWordPair) in wordPairs.Zip(inverseWordPairs, (wp, invWp) => (wp, invWp)))
{
wordPair.TranslationScore = Math.Max(wordPair.TranslationScore, inverseWordPair.TranslationScore);
wordPair.AlignmentScore = Math.Max(wordPair.AlignmentScore, inverseWordPair.AlignmentScore);
}
}

protected override void DisposeManagedResources()
{
_directWordAlignmentModel.Dispose();
Expand Down

0 comments on commit 6c7614a

Please sign in to comment.