This commit is contained in:
Florian Maury 2024-10-27 11:15:53 +01:00
commit ea2e600063
4 changed files with 261 additions and 0 deletions

5
go.mod Normal file
View file

@ -0,0 +1,5 @@
module git.broken-by-design.fr/fmaury/srt_to_yaml
go 1.22.7
require gopkg.in/yaml.v3 v3.0.1

4
go.sum Normal file
View file

@ -0,0 +1,4 @@
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

113
main.go Normal file
View file

@ -0,0 +1,113 @@
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"regexp"
"strings"
"gopkg.in/yaml.v3"
)
var (
IdentifierLine = regexp.MustCompile("^\ufeff?\\d+$")
TimeStampLine = regexp.MustCompile(`^((?:\d{2}:){2}\d{2},\d{3}) --> ((?:\d{2}:){2}\d{2},\d{3})$`)
)
type Entry struct {
Start string `yaml:"start"`
End string `yaml:"end"`
Speaker string `yaml:"speaker"`
Line1 string `yaml:"line"`
Line2 string `yaml:"line2"`
}
func parseSrt(r io.Reader) (entries []Entry, err error) {
srtScan := bufio.NewScanner(r)
for {
if !srtScan.Scan() {
break
}
firstLine := strings.TrimSpace(srtScan.Text())
if !IdentifierLine.MatchString(firstLine) {
err = fmt.Errorf("invalid identifier %q", string(firstLine))
return
}
if !srtScan.Scan() {
err = fmt.Errorf("unexpected end of file after identifier")
return
}
secondLine := strings.TrimSpace(srtScan.Text())
extractedValues := TimeStampLine.FindAllStringSubmatch(secondLine, -1)
if extractedValues == nil || len(extractedValues) != 1 {
err = fmt.Errorf("invalid timestamp line: %q", secondLine)
return
}
startTime := extractedValues[0][1]
endTime := extractedValues[0][2]
if !srtScan.Scan() {
err = fmt.Errorf("unexpected end of file after timestamps")
return
}
thirdLine := strings.TrimSpace(srtScan.Text())
if thirdLine == "" {
err = fmt.Errorf("unexpected empty line after timestamp")
return
}
fourthLine := ""
if srtScan.Scan() {
fourthLine = strings.TrimSpace(srtScan.Text())
if fourthLine != "" {
_ = srtScan.Scan()
if srtScan.Text() != "" {
err = fmt.Errorf("unexpected non empty line after the second line of text")
return
}
}
}
entry := Entry{
Start: startTime,
End: endTime,
Line1: thirdLine,
Line2: fourthLine,
}
entries = append(entries, entry)
}
return
}
func main() {
srtFile := flag.String("srt", "", "File path to the srt file to convert")
outFile := flag.String("out", "", "File path to the yaml file")
flag.Parse()
srtFd, err := os.Open(*srtFile)
if err != nil {
log.Fatalf("failed to open SRT file: %q", err.Error())
}
defer srtFd.Close()
entries, err := parseSrt(srtFd)
if err != nil {
log.Fatal(err)
}
yamlContent, err := yaml.Marshal(&entries)
if err != nil {
log.Fatalf("failed to marshal content, %q", err.Error())
}
outFd, err := os.OpenFile(*outFile, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
if err != nil {
log.Fatalf("failed to open YAML file: %q", err.Error())
}
defer outFd.Close()
outFd.Write(yamlContent)
}

139
main_test.go Normal file
View file

@ -0,0 +1,139 @@
package main
import (
"bytes"
"reflect"
"strings"
"testing"
)
func TestParseSingleEntry(t *testing.T) {
document := strings.Join([]string{
"1",
"00:00:00,000 --> 00:00:14,000",
"toto",
"titi",
}, "\n")
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
if err != nil {
t.Fatal(err)
}
expectedEntries := []Entry{
{
Start: "00:00:00,000",
End: "00:00:14,000",
Line1: "toto",
Line2: "titi",
},
}
if !reflect.DeepEqual(entries, expectedEntries) {
t.Fatalf("unexpected entries: %v", entries)
}
}
func TestParseMultipleEntries(t *testing.T) {
document := strings.Join([]string{
"1",
"00:00:00,000 --> 00:00:14,000",
"toto",
"titi",
"",
"2",
"00:01:00,000 --> 00:02:14,000",
"tutu",
"tata",
}, "\n")
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
if err != nil {
t.Fatal(err)
}
expectedEntries := []Entry{
{
Start: "00:00:00,000",
End: "00:00:14,000",
Line1: "toto",
Line2: "titi",
},
{
Start: "00:01:00,000",
End: "00:02:14,000",
Line1: "tutu",
Line2: "tata",
},
}
if !reflect.DeepEqual(entries, expectedEntries) {
t.Fatalf("unexpected entries: %v", entries)
}
}
func TestParseInvalidTimestamp(t *testing.T) {
document := strings.Join([]string{
"1",
"00:00:00.000 --> 00:00:14.000",
"toto",
"titi",
}, "\n")
_, err := parseSrt(bytes.NewBuffer([]byte(document)))
if err == nil {
t.Fatal("expected error")
} else if eStr := err.Error(); eStr != "invalid timestamp line: \"00:00:00.000 --> 00:00:14.000\"" {
t.Fatalf("unexpected error: %q", eStr)
}
}
func TestParseSingleLine(t *testing.T) {
document := strings.Join([]string{
"1",
"00:00:00,000 --> 00:00:14,000",
"toto",
}, "\n")
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
if err != nil {
t.Fatal(err)
}
expectedEntries := []Entry{
{
Start: "00:00:00,000",
End: "00:00:14,000",
Line1: "toto",
Line2: "",
},
}
if !reflect.DeepEqual(entries, expectedEntries) {
t.Fatalf("unexpected entries: %v", entries)
}
}
func TestParseMultipleEntriesSingleLine(t *testing.T) {
document := strings.Join([]string{
"1",
"00:00:00,000 --> 00:00:14,000",
"toto",
"",
"2",
"00:01:00,000 --> 00:02:14,000",
"tata",
}, "\n")
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
if err != nil {
t.Fatal(err)
}
expectedEntries := []Entry{
{
Start: "00:00:00,000",
End: "00:00:14,000",
Line1: "toto",
Line2: "",
},
{
Start: "00:01:00,000",
End: "00:02:14,000",
Line1: "tata",
Line2: "",
},
}
if !reflect.DeepEqual(entries, expectedEntries) {
t.Fatalf("unexpected entries: %v", entries)
}
}