initial
This commit is contained in:
commit
ea2e600063
4 changed files with 261 additions and 0 deletions
5
go.mod
Normal file
5
go.mod
Normal file
|
@ -0,0 +1,5 @@
|
|||
module git.broken-by-design.fr/fmaury/srt_to_yaml
|
||||
|
||||
go 1.22.7
|
||||
|
||||
require gopkg.in/yaml.v3 v3.0.1
|
4
go.sum
Normal file
4
go.sum
Normal file
|
@ -0,0 +1,4 @@
|
|||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
113
main.go
Normal file
113
main.go
Normal file
|
@ -0,0 +1,113 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"os"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
var (
|
||||
IdentifierLine = regexp.MustCompile("^\ufeff?\\d+$")
|
||||
TimeStampLine = regexp.MustCompile(`^((?:\d{2}:){2}\d{2},\d{3}) --> ((?:\d{2}:){2}\d{2},\d{3})$`)
|
||||
)
|
||||
|
||||
type Entry struct {
|
||||
Start string `yaml:"start"`
|
||||
End string `yaml:"end"`
|
||||
Speaker string `yaml:"speaker"`
|
||||
Line1 string `yaml:"line"`
|
||||
Line2 string `yaml:"line2"`
|
||||
}
|
||||
|
||||
func parseSrt(r io.Reader) (entries []Entry, err error) {
|
||||
srtScan := bufio.NewScanner(r)
|
||||
for {
|
||||
if !srtScan.Scan() {
|
||||
break
|
||||
}
|
||||
firstLine := strings.TrimSpace(srtScan.Text())
|
||||
if !IdentifierLine.MatchString(firstLine) {
|
||||
err = fmt.Errorf("invalid identifier %q", string(firstLine))
|
||||
return
|
||||
}
|
||||
|
||||
if !srtScan.Scan() {
|
||||
err = fmt.Errorf("unexpected end of file after identifier")
|
||||
return
|
||||
}
|
||||
secondLine := strings.TrimSpace(srtScan.Text())
|
||||
extractedValues := TimeStampLine.FindAllStringSubmatch(secondLine, -1)
|
||||
if extractedValues == nil || len(extractedValues) != 1 {
|
||||
err = fmt.Errorf("invalid timestamp line: %q", secondLine)
|
||||
return
|
||||
}
|
||||
startTime := extractedValues[0][1]
|
||||
endTime := extractedValues[0][2]
|
||||
|
||||
if !srtScan.Scan() {
|
||||
err = fmt.Errorf("unexpected end of file after timestamps")
|
||||
return
|
||||
}
|
||||
thirdLine := strings.TrimSpace(srtScan.Text())
|
||||
if thirdLine == "" {
|
||||
err = fmt.Errorf("unexpected empty line after timestamp")
|
||||
return
|
||||
}
|
||||
|
||||
fourthLine := ""
|
||||
if srtScan.Scan() {
|
||||
fourthLine = strings.TrimSpace(srtScan.Text())
|
||||
if fourthLine != "" {
|
||||
_ = srtScan.Scan()
|
||||
if srtScan.Text() != "" {
|
||||
err = fmt.Errorf("unexpected non empty line after the second line of text")
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
entry := Entry{
|
||||
Start: startTime,
|
||||
End: endTime,
|
||||
Line1: thirdLine,
|
||||
Line2: fourthLine,
|
||||
}
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func main() {
|
||||
srtFile := flag.String("srt", "", "File path to the srt file to convert")
|
||||
outFile := flag.String("out", "", "File path to the yaml file")
|
||||
flag.Parse()
|
||||
|
||||
srtFd, err := os.Open(*srtFile)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open SRT file: %q", err.Error())
|
||||
}
|
||||
defer srtFd.Close()
|
||||
|
||||
entries, err := parseSrt(srtFd)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
yamlContent, err := yaml.Marshal(&entries)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to marshal content, %q", err.Error())
|
||||
}
|
||||
|
||||
outFd, err := os.OpenFile(*outFile, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open YAML file: %q", err.Error())
|
||||
}
|
||||
defer outFd.Close()
|
||||
outFd.Write(yamlContent)
|
||||
}
|
139
main_test.go
Normal file
139
main_test.go
Normal file
|
@ -0,0 +1,139 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseSingleEntry(t *testing.T) {
|
||||
document := strings.Join([]string{
|
||||
"1",
|
||||
"00:00:00,000 --> 00:00:14,000",
|
||||
"toto",
|
||||
"titi",
|
||||
}, "\n")
|
||||
|
||||
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedEntries := []Entry{
|
||||
{
|
||||
Start: "00:00:00,000",
|
||||
End: "00:00:14,000",
|
||||
Line1: "toto",
|
||||
Line2: "titi",
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||
t.Fatalf("unexpected entries: %v", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMultipleEntries(t *testing.T) {
|
||||
document := strings.Join([]string{
|
||||
"1",
|
||||
"00:00:00,000 --> 00:00:14,000",
|
||||
"toto",
|
||||
"titi",
|
||||
"",
|
||||
"2",
|
||||
"00:01:00,000 --> 00:02:14,000",
|
||||
"tutu",
|
||||
"tata",
|
||||
}, "\n")
|
||||
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedEntries := []Entry{
|
||||
{
|
||||
Start: "00:00:00,000",
|
||||
End: "00:00:14,000",
|
||||
Line1: "toto",
|
||||
Line2: "titi",
|
||||
},
|
||||
{
|
||||
Start: "00:01:00,000",
|
||||
End: "00:02:14,000",
|
||||
Line1: "tutu",
|
||||
Line2: "tata",
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||
t.Fatalf("unexpected entries: %v", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseInvalidTimestamp(t *testing.T) {
|
||||
document := strings.Join([]string{
|
||||
"1",
|
||||
"00:00:00.000 --> 00:00:14.000",
|
||||
"toto",
|
||||
"titi",
|
||||
}, "\n")
|
||||
_, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||
if err == nil {
|
||||
t.Fatal("expected error")
|
||||
} else if eStr := err.Error(); eStr != "invalid timestamp line: \"00:00:00.000 --> 00:00:14.000\"" {
|
||||
t.Fatalf("unexpected error: %q", eStr)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSingleLine(t *testing.T) {
|
||||
document := strings.Join([]string{
|
||||
"1",
|
||||
"00:00:00,000 --> 00:00:14,000",
|
||||
"toto",
|
||||
}, "\n")
|
||||
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedEntries := []Entry{
|
||||
{
|
||||
Start: "00:00:00,000",
|
||||
End: "00:00:14,000",
|
||||
Line1: "toto",
|
||||
Line2: "",
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||
t.Fatalf("unexpected entries: %v", entries)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMultipleEntriesSingleLine(t *testing.T) {
|
||||
document := strings.Join([]string{
|
||||
"1",
|
||||
"00:00:00,000 --> 00:00:14,000",
|
||||
"toto",
|
||||
"",
|
||||
"2",
|
||||
"00:01:00,000 --> 00:02:14,000",
|
||||
"tata",
|
||||
}, "\n")
|
||||
entries, err := parseSrt(bytes.NewBuffer([]byte(document)))
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
expectedEntries := []Entry{
|
||||
{
|
||||
Start: "00:00:00,000",
|
||||
End: "00:00:14,000",
|
||||
Line1: "toto",
|
||||
Line2: "",
|
||||
},
|
||||
{
|
||||
Start: "00:01:00,000",
|
||||
End: "00:02:14,000",
|
||||
Line1: "tata",
|
||||
Line2: "",
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(entries, expectedEntries) {
|
||||
t.Fatalf("unexpected entries: %v", entries)
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue