Skip to content

Commit d914607

Browse files
committed
internal/encoding/yaml: encode YAML anchors as CUE definitions
This commits supports encoding YAML documents such as: a: &a 3 b: *a To this CUE document: #a: 3 a: #a b: #a Fixes #3818 Signed-off-by: Omri Steiner <[email protected]>
1 parent ee299f0 commit d914607

File tree

3 files changed

+177
-40
lines changed

3 files changed

+177
-40
lines changed

internal/encoding/yaml/decode.go

Lines changed: 119 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,14 @@ type decoder struct {
6464

6565
// forceNewline ensures that the next position will be on a new line.
6666
forceNewline bool
67+
68+
// Anchor fields which are gathered while we walk the YAML nodes,
69+
// but are only added to the AST when we're done processing the whole document.
70+
anchorFields []ast.Field
71+
// Map from anchor nodes to their names.
72+
anchorNames map[*yaml.Node]string
73+
// Keeps track of anchor names that have been taken. Used to ensure unique anchor names.
74+
anchorTakenNames map[string]struct{}
6775
}
6876

6977
// TODO(mvdan): this can be io.Reader really, except that token.Pos is offset-based,
@@ -83,9 +91,11 @@ func NewDecoder(filename string, b []byte) *decoder {
8391
tokFile := token.NewFile(filename, 0, len(b)+1)
8492
tokFile.SetLinesForContent(b)
8593
return &decoder{
86-
tokFile: tokFile,
87-
tokLines: append(tokFile.Lines(), len(b)),
88-
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
94+
tokFile: tokFile,
95+
tokLines: append(tokFile.Lines(), len(b)),
96+
yamlDecoder: *yaml.NewDecoder(bytes.NewReader(b)),
97+
anchorNames: make(map[*yaml.Node]string),
98+
anchorTakenNames: make(map[string]struct{}),
8999
}
90100
}
91101

@@ -176,24 +186,35 @@ func Unmarshal(filename string, data []byte) (ast.Expr, error) {
176186
return n, nil
177187
}
178188

179-
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
180-
d.addHeadCommentsToPending(yn)
181-
var expr ast.Expr
182-
var err error
189+
func (d *decoder) extract_no_anchor(yn *yaml.Node) (ast.Expr, error) {
183190
switch yn.Kind {
184191
case yaml.DocumentNode:
185-
expr, err = d.document(yn)
192+
return d.document(yn)
186193
case yaml.SequenceNode:
187-
expr, err = d.sequence(yn)
194+
return d.sequence(yn)
188195
case yaml.MappingNode:
189-
expr, err = d.mapping(yn)
196+
return d.mapping(yn)
190197
case yaml.ScalarNode:
191-
expr, err = d.scalar(yn)
198+
return d.scalar(yn)
192199
case yaml.AliasNode:
193-
expr, err = d.alias(yn)
200+
return d.reference_alias(yn)
194201
default:
195202
return nil, d.posErrorf(yn, "unknown yaml node kind: %d", yn.Kind)
196203
}
204+
}
205+
206+
func (d *decoder) extract(yn *yaml.Node) (ast.Expr, error) {
207+
d.addHeadCommentsToPending(yn)
208+
209+
var expr ast.Expr
210+
var err error
211+
212+
if yn.Anchor == "" {
213+
expr, err = d.extract_no_anchor(yn)
214+
} else {
215+
expr, err = d.anchor(yn)
216+
}
217+
197218
if err != nil {
198219
return nil, err
199220
}
@@ -324,7 +345,39 @@ func (d *decoder) document(yn *yaml.Node) (ast.Expr, error) {
324345
if n := len(yn.Content); n != 1 {
325346
return nil, d.posErrorf(yn, "yaml document nodes are meant to have one content node but have %d", n)
326347
}
327-
return d.extract(yn.Content[0])
348+
349+
expr, err := d.extract(yn.Content[0])
350+
if err != nil {
351+
return nil, err
352+
}
353+
354+
return d.addAnchorNodes(expr)
355+
}
356+
357+
// Adds anchors nodes at the top of the document.
358+
func (d *decoder) addAnchorNodes(expr ast.Expr) (ast.Expr, error) {
359+
elements := []ast.Decl{}
360+
361+
for _, field := range d.anchorFields {
362+
elements = append(elements, &field)
363+
}
364+
365+
switch x := expr.(type) {
366+
case *ast.StructLit:
367+
x.Elts = append(elements, x.Elts...)
368+
break
369+
case *ast.ListLit:
370+
expr = &ast.StructLit{
371+
Elts: append(elements, x),
372+
}
373+
break
374+
default:
375+
// If the whole YAML document is not a map / seq, then it can't have anchors.
376+
// maybe assert that `anchorNodes` is empty?
377+
break
378+
}
379+
380+
return expr, nil
328381
}
329382

330383
func (d *decoder) sequence(yn *yaml.Node) (ast.Expr, error) {
@@ -458,7 +511,7 @@ func (d *decoder) label(yn *yaml.Node) (ast.Label, error) {
458511
if yn.Alias.Kind != yaml.ScalarNode {
459512
return nil, d.posErrorf(yn, "invalid map key: %v", yn.Alias.ShortTag())
460513
}
461-
expr, err = d.alias(yn)
514+
expr, err = d.inline_alias(yn)
462515
value = yn.Alias.Value
463516
default:
464517
return nil, d.posErrorf(yn, "invalid map key: %v", yn.ShortTag())
@@ -639,7 +692,10 @@ func (d *decoder) makeNum(yn *yaml.Node, val string, kind token.Token) (expr ast
639692
return expr
640693
}
641694

642-
func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
695+
// Expands an alias node in place, returning the expanded node.
696+
// Sometimes we have to resort to this, for example when the alias
697+
// is inside a map key, since CUE does not support structs as map keys.
698+
func (d *decoder) inline_alias(yn *yaml.Node) (ast.Expr, error) {
643699
if d.extractingAliases[yn] {
644700
// TODO this could actually be allowed in some circumstances.
645701
return nil, d.posErrorf(yn, "anchor %q value contains itself", yn.Value)
@@ -649,11 +705,58 @@ func (d *decoder) alias(yn *yaml.Node) (ast.Expr, error) {
649705
}
650706
d.extractingAliases[yn] = true
651707
var node ast.Expr
652-
node, err := d.extract(yn.Alias)
708+
node, err := d.extract_no_anchor(yn.Alias)
653709
delete(d.extractingAliases, yn)
654710
return node, err
655711
}
656712

713+
// Replace an alias with a reference to the identifier of its anchor.
714+
func (d *decoder) reference_alias(yn *yaml.Node) (ast.Expr, error) {
715+
anchor, ok := d.anchorNames[yn.Alias]
716+
if !ok {
717+
return nil, d.posErrorf(yn, "anchor %q not found", yn.Alias.Anchor)
718+
}
719+
720+
return &ast.Ident{
721+
NamePos: d.pos(yn),
722+
Name: anchor,
723+
}, nil
724+
}
725+
726+
func (d *decoder) anchor(yn *yaml.Node) (ast.Expr, error) {
727+
var anchorIdent string
728+
729+
// Pick a non-conflicting anchor name.
730+
for i := 1; ; i++ {
731+
if i == 1 {
732+
anchorIdent = "#" + yn.Anchor
733+
} else {
734+
anchorIdent = "#" + yn.Anchor + "_" + strconv.Itoa(i)
735+
}
736+
if _, ok := d.anchorTakenNames[anchorIdent]; !ok {
737+
d.anchorTakenNames[anchorIdent] = struct{}{}
738+
break
739+
}
740+
}
741+
d.anchorNames[yn] = anchorIdent
742+
743+
// Process the node itself, but don't put it into the AST just yet,
744+
// store it for later to be used as an anchor identifier.
745+
expr, err := d.extract_no_anchor(yn)
746+
if err != nil {
747+
return nil, err
748+
}
749+
d.anchorFields = append(d.anchorFields, ast.Field{
750+
Label: &ast.Ident{Name: anchorIdent},
751+
Value: expr,
752+
})
753+
754+
return &ast.Ident{
755+
NamePos: d.pos(yn),
756+
Name: anchorIdent,
757+
}, nil
758+
}
759+
657760
func labelStr(l ast.Label) string {
658761
switch l := l.(type) {
659762
case *ast.Ident:

internal/encoding/yaml/decode_test.go

Lines changed: 43 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -474,25 +474,51 @@ Null: 1
474474
// Anchors and aliases.
475475
{
476476
"a: &x 1\nb: &y 2\nc: *x\nd: *y\n",
477-
`a: 1
478-
b: 2
479-
c: 1
480-
d: 2`,
477+
`#x: 1
478+
#y: 2
479+
a: #x
480+
b: #y
481+
c: #x
482+
d: #y`,
481483
}, {
482484
"a: &a {c: 1}\nb: *a",
483-
`a: {c: 1}
484-
b: {
485-
c: 1
486-
}`,
485+
`#a: {c: 1}
486+
a: #a
487+
b: #a`,
487488
}, {
488489
"a: &a [1, 2]\nb: *a",
489-
"a: [1, 2]\nb: [1, 2]", // TODO: a: [1, 2], b: a
490+
"#a: [1, 2]\na: #a\nb: #a",
490491
},
491492
{
492493
`a: &a "b"
493494
*a : "c"`,
494-
`a: "b"
495-
b: "c"`,
495+
`#a: "b"
496+
a: #a
497+
b: "c"`,
498+
},
499+
// Test nested anchors
500+
{
501+
`foo: &a
502+
bar: &b
503+
baz: 1
504+
a: *a
505+
b: *b
506+
`,
507+
`#b: {
508+
baz: 1
509+
}
510+
#a: {
511+
bar: #b
512+
}
513+
foo: #a
514+
a: #a
515+
b: #b`,
516+
},
517+
{
518+
`a:
519+
- &b c`,
520+
`#b: "c"
521+
a: [#b]`,
496522
},
497523

498524
{
@@ -778,10 +804,12 @@ a:
778804
// yaml-test-suite 3GZX: Spec Example 7.1. Alias Nodes
779805
{
780806
"First occurrence: &anchor Foo\nSecond occurrence: *anchor\nOverride anchor: &anchor Bar\nReuse anchor: *anchor\n",
781-
`"First occurrence": "Foo"
782-
"Second occurrence": "Foo"
783-
"Override anchor": "Bar"
784-
"Reuse anchor": "Bar"`,
807+
`#anchor: "Foo"
808+
#anchor_2: "Bar"
809+
"First occurrence": #anchor
810+
"Second occurrence": #anchor
811+
"Override anchor": #anchor_2
812+
"Reuse anchor": #anchor_2`,
785813
},
786814
}
787815

internal/encoding/yaml/testdata/merge.out

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,21 @@
1+
#CENTER: {
2+
x: 1, y: 2
3+
}
4+
#LEFT: {
5+
x: 0, y: 2
6+
}
7+
#BIG: {
8+
r: 10
9+
}
10+
#SMALL: {
11+
r: 1
12+
}
13+
114
// From http://yaml.org/type/merge.html
215
// Test
316
anchors: {
4-
list: [{
5-
x: 1, y: 2
6-
}, {
7-
x: 0, y: 2
8-
}, {
9-
r: 10
10-
}, {
11-
r: 1
12-
}]
17+
list: [#CENTER, #LEFT, #BIG, #SMALL,
18+
]
1319
}
1420

1521
// All the following maps are equal:

0 commit comments

Comments
 (0)