initial
This commit is contained in:
commit
ea2e600063
4 changed files with 261 additions and 0 deletions
5
go.mod
Normal file
5
go.mod
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
module git.broken-by-design.fr/fmaury/srt_to_yaml
|
||||||
|
|
||||||
|
go 1.22.7
|
||||||
|
|
||||||
|
require gopkg.in/yaml.v3 v3.0.1
|
4
go.sum
Normal file
4
go.sum
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||||
|
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
113
main.go
Normal file
113
main.go
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
"regexp"
|
||||||
|
"strings"
|
||||||
|
|
||||||
|
"gopkg.in/yaml.v3"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
IdentifierLine = regexp.MustCompile("^\ufeff?\\d+$")
|
||||||
|
TimeStampLine = regexp.MustCompile(`^((?:\d{2}:){2}\d{2},\d{3}) --> ((?:\d{2}:){2}\d{2},\d{3})$`)
|
||||||
|
)
|
||||||
|
|
||||||
|
type Entry struct {
|
||||||
|
Start string `yaml:"start"`
|
||||||
|
End string `yaml:"end"`
|
||||||
|
Speaker string `yaml:"speaker"`
|
||||||
|
Line1 string `yaml:"line"`
|
||||||
|
Line2 string `yaml:"line2"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func parseSrt(r io.Reader) (entries []Entry, err error) {
|
||||||
|
srtScan := bufio.NewScanner(r)
|
||||||
|
for {
|
||||||
|
if !srtScan.Scan() {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
firstLine := strings.TrimSpace(srtScan.Text())
|
||||||
|
if !IdentifierLine.MatchString(firstLine) {
|
||||||
|
err = fmt.Errorf("invalid identifier %q", string(firstLine))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if !srtScan.Scan() {
|
||||||
|
err = fmt.Errorf("unexpected end of file after identifier")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
secondLine := strings.TrimSpace(srtScan.Text())
|
||||||
|
extractedValues := TimeStampLine.FindAllStringSubmatch(secondLine, -1)
|
||||||
|
if extractedValues == nil || len(extractedValues) != 1 {
|
||||||
|
err = fmt.Errorf("invalid timestamp line: %q", secondLine)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
startTime := extractedValues[0][1]
|
||||||
|
endTime := extractedValues[0][2]
|
||||||
|
|
||||||
|
if !srtScan.Scan() {
|
||||||
|
err = fmt.Errorf("unexpected end of file after timestamps")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
thirdLine := strings.TrimSpace(srtScan.Text())
|
||||||
|
if thirdLine == "" {
|
||||||
|
err = fmt.Errorf("unexpected empty line after timestamp")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fourthLine := ""
|
||||||
|
if srtScan.Scan() {
|
||||||
|
fourthLine = strings.TrimSpace(srtScan.Text())
|
||||||
|
if fourthLine != "" {
|
||||||
|
_ = srtScan.Scan()
|
||||||
|
if srtScan.Text() != "" {
|
||||||
|
err = fmt.Errorf("unexpected non empty line after the second line of text")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
entry := Entry{
|
||||||
|
Start: startTime,
|
||||||
|
End: endTime,
|
||||||
|
Line1: thirdLine,
|
||||||
|
Line2: fourthLine,
|
||||||
|
}
|
||||||
|
entries = append(entries, entry)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
srtFile := flag.String("srt", "", "File path to the srt file to convert")
|
||||||
|
outFile := flag.String("out", "", "File path to the yaml file")
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
srtFd, err := os.Open(*srtFile)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to open SRT file: %q", err.Error())
|
||||||
|
}
|
||||||
|
defer srtFd.Close()
|
||||||
|
|
||||||
|
entries, err := parseSrt(srtFd)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
yamlContent, err := yaml.Marshal(&entries)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to marshal content, %q", err.Error())
|
||||||
|
}
|
||||||
|
|
||||||
|
outFd, err := os.OpenFile(*outFile, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("failed to open YAML file: %q", err.Error())
|
||||||
|
}
|
||||||
|
defer outFd.Close()
|
||||||
|
outFd.Write(yamlContent)
|
||||||
|
}
|
139
main_test.go
Normal file
139
main_test.go
Normal file
|
@ -0,0 +1,139 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestParseSingleEntry(t *testing.T) {
|
||||||
|
document := strings.Join([]string{
|
||||||
|
"1",
|
||||||
|
"00:00:00,000 --> 00:00:14,000",
|
||||||
|
"toto",
|
||||||
|
"titi",
|
||||||
|
}, "\n")
|
||||||
|
|
||||||
|
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expectedEntries := []Entry{
|
||||||
|
{
|
||||||
|
Start: "00:00:00,000",
|
||||||
|
End: "00:00:14,000",
|
||||||
|
Line1: "toto",
|
||||||
|
Line2: "titi",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||||
|
t.Fatalf("unexpected entries: %v", entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseMultipleEntries(t *testing.T) {
|
||||||
|
document := strings.Join([]string{
|
||||||
|
"1",
|
||||||
|
"00:00:00,000 --> 00:00:14,000",
|
||||||
|
"toto",
|
||||||
|
"titi",
|
||||||
|
"",
|
||||||
|
"2",
|
||||||
|
"00:01:00,000 --> 00:02:14,000",
|
||||||
|
"tutu",
|
||||||
|
"tata",
|
||||||
|
}, "\n")
|
||||||
|
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expectedEntries := []Entry{
|
||||||
|
{
|
||||||
|
Start: "00:00:00,000",
|
||||||
|
End: "00:00:14,000",
|
||||||
|
Line1: "toto",
|
||||||
|
Line2: "titi",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Start: "00:01:00,000",
|
||||||
|
End: "00:02:14,000",
|
||||||
|
Line1: "tutu",
|
||||||
|
Line2: "tata",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||||
|
t.Fatalf("unexpected entries: %v", entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseInvalidTimestamp(t *testing.T) {
|
||||||
|
document := strings.Join([]string{
|
||||||
|
"1",
|
||||||
|
"00:00:00.000 --> 00:00:14.000",
|
||||||
|
"toto",
|
||||||
|
"titi",
|
||||||
|
}, "\n")
|
||||||
|
_, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error")
|
||||||
|
} else if eStr := err.Error(); eStr != "invalid timestamp line: \"00:00:00.000 --> 00:00:14.000\"" {
|
||||||
|
t.Fatalf("unexpected error: %q", eStr)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseSingleLine(t *testing.T) {
|
||||||
|
document := strings.Join([]string{
|
||||||
|
"1",
|
||||||
|
"00:00:00,000 --> 00:00:14,000",
|
||||||
|
"toto",
|
||||||
|
}, "\n")
|
||||||
|
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expectedEntries := []Entry{
|
||||||
|
{
|
||||||
|
Start: "00:00:00,000",
|
||||||
|
End: "00:00:14,000",
|
||||||
|
Line1: "toto",
|
||||||
|
Line2: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||||
|
t.Fatalf("unexpected entries: %v", entries)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestParseMultipleEntriesSingleLine(t *testing.T) {
|
||||||
|
document := strings.Join([]string{
|
||||||
|
"1",
|
||||||
|
"00:00:00,000 --> 00:00:14,000",
|
||||||
|
"toto",
|
||||||
|
"",
|
||||||
|
"2",
|
||||||
|
"00:01:00,000 --> 00:02:14,000",
|
||||||
|
"tata",
|
||||||
|
}, "\n")
|
||||||
|
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
expectedEntries := []Entry{
|
||||||
|
{
|
||||||
|
Start: "00:00:00,000",
|
||||||
|
End: "00:00:14,000",
|
||||||
|
Line1: "toto",
|
||||||
|
Line2: "",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Start: "00:01:00,000",
|
||||||
|
End: "00:02:14,000",
|
||||||
|
Line1: "tata",
|
||||||
|
Line2: "",
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||||
|
t.Fatalf("unexpected entries: %v", entries)
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue