Skip to content

Commit

Permalink
Srt html styling (#121)
Browse files Browse the repository at this point in the history
* Setup parsing srt stylings for webvtt

* migrated tests

* clean up tests and testdata
  • Loading branch information
justin-taylor authored Nov 20, 2024
1 parent 80e6dcf commit cba5e0f
Show file tree
Hide file tree
Showing 6 changed files with 383 additions and 4 deletions.
157 changes: 154 additions & 3 deletions srt.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ import (
"strings"
"time"
"unicode/utf8"

"golang.org/x/net/html"
)

// Constants
Expand Down Expand Up @@ -116,7 +118,95 @@ func ReadFromSRT(i io.Reader) (o *Subtitles, err error) {
o.Items = append(o.Items, s)
} else {
// Add text
s.Lines = append(s.Lines, Line{Items: []LineItem{{Text: strings.TrimSpace(line)}}})
if l := parseTextSrt(strings.TrimSpace(line)); len(l.Items) > 0 {
s.Lines = append(s.Lines, l)
}
}
}
return
}

// parseTextSrt parses the input line to fill the Line
func parseTextSrt(i string) (o Line) {
// special handling needed for empty line
if strings.TrimSpace(i) == "" {
o.Items = []LineItem{{Text: ""}}
return
}

// Create tokenizer
tr := html.NewTokenizer(strings.NewReader(i))

// Loop
var (
bold bool
italic bool
underline bool
color *string
pos byte
)
for {
// Get next tag
t := tr.Next()

// Process error
if err := tr.Err(); err != nil {
break
}

// Get unmodified text
raw := string(tr.Raw())
// Get current token
token := tr.Token()

switch t {
case html.EndTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = false
case "i":
italic = false
case "u":
underline = false
case "font":
color = nil
}
case html.StartTagToken:
// Parse italic/bold/underline
switch token.Data {
case "b":
bold = true
case "i":
italic = true
case "u":
underline = true
case "font":
if c := htmlTokenAttribute(&token, "color"); c != nil {
color = c
}
}
case html.TextToken:
if s := strings.TrimSpace(raw); s != "" {
// Get style attribute
var sa *StyleAttributes
if bold || italic || underline || color != nil || pos != 0 {
sa = &StyleAttributes{
SRTBold: bold,
SRTColor: color,
SRTItalics: italic,
SRTPosition: pos,
SRTUnderline: underline,
}
sa.propagateSRTAttributes()
}

// Append item
o.Items = append(o.Items, LineItem{
InlineStyle: sa,
Text: s,
})
}
}
}
return
Expand Down Expand Up @@ -151,8 +241,7 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {

// Loop through lines
for _, l := range v.Lines {
c = append(c, []byte(l.String())...)
c = append(c, bytesLineSeparator...)
c = append(c, []byte(l.srtBytes())...)
}

// Add new line
Expand All @@ -169,3 +258,65 @@ func (s Subtitles) WriteToSRT(o io.Writer) (err error) {
}
return
}

func (l Line) srtBytes() (c []byte) {
for idx, li := range l.Items {
c = append(c, li.srtBytes()...)
// condition to avoid adding space as the last character.
if idx < len(l.Items)-1 {
c = append(c, []byte(" ")...)
}
}
c = append(c, bytesLineSeparator...)
return
}

func (li LineItem) srtBytes() (c []byte) {
// Get color
var color string
if li.InlineStyle != nil && li.InlineStyle.SRTColor != nil {
color = *li.InlineStyle.SRTColor
}

// Get bold/italics/underline
b := li.InlineStyle != nil && li.InlineStyle.SRTBold
i := li.InlineStyle != nil && li.InlineStyle.SRTItalics
u := li.InlineStyle != nil && li.InlineStyle.SRTUnderline

// Get position
var pos byte
if li.InlineStyle != nil {
pos = li.InlineStyle.SRTPosition
}

// Append
if color != "" {
c = append(c, []byte("<font color=\""+color+"\">")...)
}
if b {
c = append(c, []byte("<b>")...)
}
if i {
c = append(c, []byte("<i>")...)
}
if u {
c = append(c, []byte("<u>")...)
}
if pos != 0 {
c = append(c, []byte(fmt.Sprintf(`{\an%d}`, pos))...)
}
c = append(c, []byte(li.Text)...)
if u {
c = append(c, []byte("</u>")...)
}
if i {
c = append(c, []byte("</i>")...)
}
if b {
c = append(c, []byte("</b>")...)
}
if color != "" {
c = append(c, []byte("</font>")...)
}
return
}
76 changes: 76 additions & 0 deletions srt_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@ package astisub_test
import (
"bytes"
"io/ioutil"
"os"
"testing"
"time"

"github.com/asticode/go-astisub"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -51,3 +53,77 @@ func TestNonUTF8SRT(t *testing.T) {
_, err := astisub.OpenFile("./testdata/example-in-non-utf8.srt")
assert.Error(t, err)
}

func TestSRTStyled(t *testing.T) {
// Open
s, err := astisub.OpenFile("./testdata/example-in-styled.srt")
assert.NoError(t, err)

// assert the items are properly parsed
assert.Len(t, s.Items, 6)
assert.Equal(t, 17*time.Second+985*time.Millisecond, s.Items[0].StartAt)
assert.Equal(t, 20*time.Second+521*time.Millisecond, s.Items[0].EndAt)
assert.Equal(t, "[instrumental music]", s.Items[0].Lines[0].String())
assert.Equal(t, 47*time.Second+115*time.Millisecond, s.Items[1].StartAt)
assert.Equal(t, 48*time.Second+282*time.Millisecond, s.Items[1].EndAt)
assert.Equal(t, "[ticks]", s.Items[1].Lines[0].String())
assert.Equal(t, 58*time.Second+192*time.Millisecond, s.Items[2].StartAt)
assert.Equal(t, 59*time.Second+727*time.Millisecond, s.Items[2].EndAt)
assert.Equal(t, "[instrumental music]", s.Items[2].Lines[0].String())
assert.Equal(t, 1*time.Minute+1*time.Second+662*time.Millisecond, s.Items[3].StartAt)
assert.Equal(t, 1*time.Minute+3*time.Second+63*time.Millisecond, s.Items[3].EndAt)
assert.Equal(t, "[dog barking]", s.Items[3].Lines[0].String())
assert.Equal(t, 1*time.Minute+26*time.Second+787*time.Millisecond, s.Items[4].StartAt)
assert.Equal(t, 1*time.Minute+29*time.Second+523*time.Millisecond, s.Items[4].EndAt)
assert.Equal(t, "[beeping]", s.Items[4].Lines[0].String())
assert.Equal(t, 1*time.Minute+29*time.Second+590*time.Millisecond, s.Items[5].StartAt)
assert.Equal(t, 1*time.Minute+31*time.Second+992*time.Millisecond, s.Items[5].EndAt)
assert.Equal(t, "[automated]", s.Items[5].Lines[0].String())
assert.Equal(t, "'The time is 7:35.'", s.Items[5].Lines[1].String())

// assert the styles of the items
assert.Len(t, s.Items, 6)
assert.Equal(t, "#00ff00", *s.Items[0].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.True(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[0].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Equal(t, "#ff00ff", *s.Items[1].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[1].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Equal(t, "#00ff00", *s.Items[2].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[2].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Nil(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTPosition)
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTBold)
assert.False(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTItalics)
assert.True(t, s.Items[3].Lines[0].Items[0].InlineStyle.SRTUnderline)
assert.Nil(t, s.Items[4].Lines[0].Items[0].InlineStyle)
assert.Nil(t, s.Items[5].Lines[0].Items[0].InlineStyle)
assert.Nil(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTColor)
assert.Zero(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTPosition)
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTBold)
assert.True(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTItalics)
assert.False(t, s.Items[5].Lines[1].Items[0].InlineStyle.SRTUnderline)

// Write to srt
w := &bytes.Buffer{}
c, err := os.ReadFile("./testdata/example-out-styled.srt")
assert.NoError(t, err)
err = s.WriteToSRT(w)
assert.NoError(t, err)
assert.Equal(t, string(c), w.String())

// Write to WebVTT
w = &bytes.Buffer{}
c, err = os.ReadFile("./testdata/example-out-styled.vtt")
assert.NoError(t, err)
err = s.WriteToWebVTT(w)
assert.NoError(t, err)
assert.Equal(t, string(c), w.String())
}
80 changes: 79 additions & 1 deletion subtitles.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import (
"time"

"github.com/asticode/go-astikit"
"golang.org/x/net/html"
)

// Bytes
Expand Down Expand Up @@ -173,6 +174,11 @@ var (

// StyleAttributes represents style attributes
type StyleAttributes struct {
SRTBold bool
SRTColor *string
SRTItalics bool
SRTPosition byte // 1-9 numpad layout
SRTUnderline bool
SSAAlignment *int
SSAAlphaLevel *float64
SSAAngle *float64 // degrees
Expand Down Expand Up @@ -236,6 +242,8 @@ type StyleAttributes struct {
TTMLWritingMode *string
TTMLZIndex *int
WebVTTAlign string
WebVTTBold bool
WebVTTItalics bool
WebVTTLine string
WebVTTLines int
WebVTTPosition string
Expand All @@ -244,6 +252,7 @@ type StyleAttributes struct {
WebVTTSize string
WebVTTStyles []string
WebVTTTags []WebVTTTag
WebVTTUnderline bool
WebVTTVertical string
WebVTTViewportAnchor string
WebVTTWidth string
Expand Down Expand Up @@ -279,6 +288,56 @@ func (t WebVTTTag) endTag() string {
return "</" + t.Name + ">"
}

func (sa *StyleAttributes) propagateSRTAttributes() {
// copy relevant attrs to WebVTT ones
if sa.SRTColor != nil {
// TODO: handle non-default colors that need custom styles
sa.TTMLColor = sa.SRTColor
}

switch sa.SRTPosition {
case 7: // top-left
sa.WebVTTAlign = "left"
sa.WebVTTPosition = "10%"
case 8: // top-center
sa.WebVTTPosition = "10%"
case 9: // top-right
sa.WebVTTAlign = "right"
sa.WebVTTPosition = "10%"
case 4: // middle-left
sa.WebVTTAlign = "left"
sa.WebVTTPosition = "50%"
case 5: // middle-center
sa.WebVTTPosition = "50%"
case 6: // middle-right
sa.WebVTTAlign = "right"
sa.WebVTTPosition = "50%"
case 1: // bottom-left
sa.WebVTTAlign = "left"
sa.WebVTTPosition = "90%"
case 2: // bottom-center
sa.WebVTTPosition = "90%"
case 3: // bottom-right
sa.WebVTTAlign = "right"
sa.WebVTTPosition = "90%"
}

sa.WebVTTBold = sa.SRTBold
sa.WebVTTItalics = sa.SRTItalics
sa.WebVTTUnderline = sa.SRTUnderline

sa.WebVTTTags = make([]WebVTTTag, 0)
if sa.WebVTTBold {
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{Name: "b"})
}
if sa.WebVTTItalics {
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{Name: "i"})
}
if sa.WebVTTUnderline {
sa.WebVTTTags = append(sa.WebVTTTags, WebVTTTag{Name: "u"})
}
}

func (sa *StyleAttributes) propagateSSAAttributes() {}

func (sa *StyleAttributes) propagateSTLAttributes() {
Expand Down Expand Up @@ -352,7 +411,15 @@ func (sa *StyleAttributes) propagateTTMLAttributes() {
}
}

func (sa *StyleAttributes) propagateWebVTTAttributes() {}
func (sa *StyleAttributes) propagateWebVTTAttributes() {
// copy relevant attrs to SRT ones
if sa.TTMLColor != nil {
sa.SRTColor = sa.TTMLColor
}
sa.SRTBold = sa.WebVTTBold
sa.SRTItalics = sa.WebVTTItalics
sa.SRTUnderline = sa.WebVTTUnderline
}

// Metadata represents metadata
// TODO Merge attributes
Expand Down Expand Up @@ -835,3 +902,14 @@ func appendStringToBytesWithNewLine(i []byte, s string) (o []byte) {
o = append(o, bytesLineSeparator...)
return
}

func htmlTokenAttribute(t *html.Token, key string) *string {

for _, attr := range t.Attr {
if attr.Key == key {
return &attr.Val
}
}

return nil
}
Loading

0 comments on commit cba5e0f

Please sign in to comment.