commit ea2e6000631d7a48b80f6adc0422bb2a7427fd83 Author: Florian Maury Date: Sun Oct 27 11:15:53 2024 +0100 initial diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..67a742d --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module git.broken-by-design.fr/fmaury/srt_to_yaml + +go 1.22.7 + +require gopkg.in/yaml.v3 v3.0.1 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..a62c313 --- /dev/null +++ b/go.sum @@ -0,0 +1,4 @@ +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/main.go b/main.go new file mode 100644 index 0000000..8caf941 --- /dev/null +++ b/main.go @@ -0,0 +1,113 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "io" + "log" + "os" + "regexp" + "strings" + + "gopkg.in/yaml.v3" +) + +var ( + IdentifierLine = regexp.MustCompile("^\ufeff?\\d+$") + TimeStampLine = regexp.MustCompile(`^((?:\d{2}:){2}\d{2},\d{3}) --> ((?:\d{2}:){2}\d{2},\d{3})$`) +) + +type Entry struct { + Start string `yaml:"start"` + End string `yaml:"end"` + Speaker string `yaml:"speaker"` + Line1 string `yaml:"line"` + Line2 string `yaml:"line2"` +} + +func parseSrt(r io.Reader) (entries []Entry, err error) { + srtScan := bufio.NewScanner(r) + for { + if !srtScan.Scan() { + break + } + firstLine := strings.TrimSpace(srtScan.Text()) + if !IdentifierLine.MatchString(firstLine) { + err = fmt.Errorf("invalid identifier %q", string(firstLine)) + return + } + + if !srtScan.Scan() { + err = fmt.Errorf("unexpected end of file after identifier") + return + } + secondLine := strings.TrimSpace(srtScan.Text()) + extractedValues := TimeStampLine.FindAllStringSubmatch(secondLine, -1) + if extractedValues == nil || len(extractedValues) != 1 { + err = fmt.Errorf("invalid timestamp line: %q", secondLine) + return + } + startTime := extractedValues[0][1] + endTime := extractedValues[0][2] + + if !srtScan.Scan() { + err = fmt.Errorf("unexpected end of file after timestamps") + return + } + thirdLine := strings.TrimSpace(srtScan.Text()) + if thirdLine == "" { + err = fmt.Errorf("unexpected empty line after timestamp") + return + } + + fourthLine := "" + if srtScan.Scan() { + fourthLine = strings.TrimSpace(srtScan.Text()) + if fourthLine != "" { + _ = srtScan.Scan() + if srtScan.Text() != "" { + err = fmt.Errorf("unexpected non empty line after the second line of text") + return + } + } + } + entry := Entry{ + Start: startTime, + End: endTime, + Line1: thirdLine, + Line2: fourthLine, + } + entries = append(entries, entry) + } + return +} + +func main() { + srtFile := flag.String("srt", "", "File path to the srt file to convert") + outFile := flag.String("out", "", "File path to the yaml file") + flag.Parse() + + srtFd, err := os.Open(*srtFile) + if err != nil { + log.Fatalf("failed to open SRT file: %q", err.Error()) + } + defer srtFd.Close() + + entries, err := parseSrt(srtFd) + if err != nil { + log.Fatal(err) + } + + yamlContent, err := yaml.Marshal(&entries) + if err != nil { + log.Fatalf("failed to marshal content, %q", err.Error()) + } + + outFd, err := os.OpenFile(*outFile, os.O_CREATE|os.O_EXCL|os.O_WRONLY, 0o600) + if err != nil { + log.Fatalf("failed to open YAML file: %q", err.Error()) + } + defer outFd.Close() + outFd.Write(yamlContent) +} diff --git a/main_test.go b/main_test.go new file mode 100644 index 0000000..1d7751d --- /dev/null +++ b/main_test.go @@ -0,0 +1,139 @@ +package main + +import ( + "bytes" + "reflect" + "strings" + "testing" +) + +func TestParseSingleEntry(t *testing.T) { + document := strings.Join([]string{ + "1", + "00:00:00,000 --> 00:00:14,000", + "toto", + "titi", + }, "\n") + + entries, err := parseSrt(bytes.NewBuffer([]byte(document))) + if err != nil { + t.Fatal(err) + } + expectedEntries := []Entry{ + { + Start: "00:00:00,000", + End: "00:00:14,000", + Line1: "toto", + Line2: "titi", + }, + } + if !reflect.DeepEqual(entries, expectedEntries) { + t.Fatalf("unexpected entries: %v", entries) + } +} + +func TestParseMultipleEntries(t *testing.T) { + document := strings.Join([]string{ + "1", + "00:00:00,000 --> 00:00:14,000", + "toto", + "titi", + "", + "2", + "00:01:00,000 --> 00:02:14,000", + "tutu", + "tata", + }, "\n") + entries, err := parseSrt(bytes.NewBuffer([]byte(document))) + if err != nil { + t.Fatal(err) + } + expectedEntries := []Entry{ + { + Start: "00:00:00,000", + End: "00:00:14,000", + Line1: "toto", + Line2: "titi", + }, + { + Start: "00:01:00,000", + End: "00:02:14,000", + Line1: "tutu", + Line2: "tata", + }, + } + if !reflect.DeepEqual(entries, expectedEntries) { + t.Fatalf("unexpected entries: %v", entries) + } +} + +func TestParseInvalidTimestamp(t *testing.T) { + document := strings.Join([]string{ + "1", + "00:00:00.000 --> 00:00:14.000", + "toto", + "titi", + }, "\n") + _, err := parseSrt(bytes.NewBuffer([]byte(document))) + if err == nil { + t.Fatal("expected error") + } else if eStr := err.Error(); eStr != "invalid timestamp line: \"00:00:00.000 --> 00:00:14.000\"" { + t.Fatalf("unexpected error: %q", eStr) + } +} + +func TestParseSingleLine(t *testing.T) { + document := strings.Join([]string{ + "1", + "00:00:00,000 --> 00:00:14,000", + "toto", + }, "\n") + entries, err := parseSrt(bytes.NewBuffer([]byte(document))) + if err != nil { + t.Fatal(err) + } + expectedEntries := []Entry{ + { + Start: "00:00:00,000", + End: "00:00:14,000", + Line1: "toto", + Line2: "", + }, + } + if !reflect.DeepEqual(entries, expectedEntries) { + t.Fatalf("unexpected entries: %v", entries) + } +} + +func TestParseMultipleEntriesSingleLine(t *testing.T) { + document := strings.Join([]string{ + "1", + "00:00:00,000 --> 00:00:14,000", + "toto", + "", + "2", + "00:01:00,000 --> 00:02:14,000", + "tata", + }, "\n") + entries, err := parseSrt(bytes.NewBuffer([]byte(document))) + if err != nil { + t.Fatal(err) + } + expectedEntries := []Entry{ + { + Start: "00:00:00,000", + End: "00:00:14,000", + Line1: "toto", + Line2: "", + }, + { + Start: "00:01:00,000", + End: "00:02:14,000", + Line1: "tata", + Line2: "", + }, + } + if !reflect.DeepEqual(entries, expectedEntries) { + t.Fatalf("unexpected entries: %v", entries) + } +}