Skip to content

Commit

Permalink
Merge pull request #7 from spraakbanken/6-guard-indexerror
Browse files Browse the repository at this point in the history
Guard for index out of bounds
  • Loading branch information
kod-kristoff authored May 7, 2024
2 parents c379c7b + 5a875ad commit 77f0e5b
Showing 1 changed file with 11 additions and 3 deletions.
14 changes: 11 additions & 3 deletions src/parallel_corpus/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,14 @@ def to_char_ids(token: Token) -> List[CharIdPair]:


def edge_map(g: Graph) -> Dict[str, Edge]:
"""Map from token ids to edges
Args:
g (Graph): the Graph to build the edge map from.
Returns:
Dict[str, Edge]: a map from token ids to edges
"""
edges = {}
for e in g.edges.values():
for i in e.ids:
Expand Down Expand Up @@ -254,7 +262,7 @@ def unaligned_modify(
tokens = get_side_texts(g, side)
token_at = token.token_at(tokens, from_)
from_token, from_ix = token_at["token"], token_at["offset"]
pre = (tokens[from_token] or "")[:from_ix]
pre = (tokens[from_token] if from_token < len(tokens) else "")[:from_ix]
if to == len(get_side_text(g, side)):
return unaligned_modify_tokens(g, from_token, len(g.get_side(side)), pre + text, side)
to_token_at = token.token_at(tokens, to)
Expand All @@ -274,7 +282,7 @@ def get_side_texts(g: Graph, side: Side) -> List[str]:
def unaligned_modify_tokens( # noqa: C901
g: Graph, from_: int, to: int, text: str, side: Side = Side.target
) -> Graph:
"""# /** Replace the text at some position, merging the spans it touches upon.
"""Replace the text at some position, merging the spans it touches upon.
# const show = (g: Graph) => g.target.map(t => t.text)
# const ids = (g: Graph) => g.target.map(t => t.id).join(' ')
Expand All @@ -301,7 +309,7 @@ def unaligned_modify_tokens( # noqa: C901
# showS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => ['this ', 'test ', 'graph ', 'hello ']
# idsS(unaligned_modify_tokens(g, 0, 0, 'this ', 'source')) // => 's3 s0 s1 s2'
# Indexes are token offsets
Indexes are token offsets
""" # noqa: E501

if (
Expand Down

0 comments on commit 77f0e5b

Please sign in to comment.