--- /dev/null
+номер,слова,частицы,иероглифы,переводы,связанные,теги
+,に・と,会う ,あう,"встречаться, видеться", 迎える,
+,,青 ・ 青い ,あお ・ あおい," синий цвет, синий, голубой,бледный
+,",,
+,,赤 ・ 赤い ,あか ・ あかい," красный цвет, красный ",,
+,,明るい, , あかるい светлый,くらい ,
+,,秋 ,,あき осень,,
+,が,開く ,," あく あける открываться, раскрываться ",,
--- /dev/null
+
+package main
+
+import (
+ "fmt"
+ "encoding/csv"
+ "os"
+ "io"
+ "unicode"
+ "unicode/utf8"
+)
+
+// Select prefix satisfying predicate. If entire string satisfies predicate,
+// index will equal to len(str) (i.e. effectively be out of range).
+func many (str string, p func (rune) bool) int {
+ i := 0
+ w := 0
+ for i = 0; i < len(str); i += w {
+ var c rune
+ c, w = utf8.DecodeRuneInString(str[i:])
+ if !p(c) {
+ break
+ }
+ }
+ return i
+}
+// Select substring satisfying predicate, _starting from the beginning_ of string
+// and _ignoring_ leading and trailing unicode spaces.
+func selectSubstring(str string, p func (rune) bool) (int, int) {
+ // Skip leading space.
+ start := many(str, unicode.IsSpace)
+ //r, _ := utf8.DecodeRuneInString(str[start:])
+ //fmt.Printf("h2: start = %v, %q\n", start, r)
+
+ cur, pos1 := start, 0
+ pos2 := many(str[start:], p)
+ //r, _ = utf8.DecodeRuneInString(str[start+pos2:])
+ //fmt.Printf("h2: pos2 = %v, %q\n", pos2, r)
+
+ // Update position only, if hiragana word follows spaces. So, trailing
+ // spaces are skipped.
+ for pos2 != 0 {
+
+ cur += pos1 + pos2
+ //r, _ = utf8.DecodeRuneInString(str[cur:])
+ //fmt.Printf("h2: cur = %v, %q\n", cur, r)
+
+ pos1 = many(str[cur:], unicode.IsSpace)
+ //r, _ = utf8.DecodeRuneInString(str[cur+pos1:])
+ //fmt.Printf("h2: pos1 = %v, %q\n", pos1, r)
+
+ pos2 = many(str[cur+pos1:], p)
+ //r, _ = utf8.DecodeRuneInString(str[cur+pos1+pos2:])
+ //fmt.Printf("h2: pos2 = %v, %q\n", pos2, r)
+ }
+
+ return start, cur
+}
+
+func isHiragana (c rune) bool {
+ return unicode.Is(unicode.Hiragana, c)
+}
+
+func isNotSpace (c rune) bool {
+ return !unicode.IsSpace(c)
+}
+
+func isAny (c rune) bool {
+ return true
+}
+
+func isEmptyField (field string) bool {
+ for _, c := range field {
+ if !unicode.IsSpace(c) {
+ return false
+ }
+ }
+ return true
+}
+
+func splitReading (record []string) ([]string) {
+ for i := 0; i < len(record); i += 1 {
+ if i == 2 && isEmptyField(record[2]) {
+ field := record[3]
+ start, end := selectSubstring(field, isHiragana)
+ //r, _ := utf8.DecodeRuneInString(field[start:])
+ //fmt.Printf("hiragana: start = %v (%q), end = %v\n", start, r, end)
+ record[2] = field[start:end]
+ record[3] = field[end:]
+ continue
+ }
+ field := record[i]
+ start, end := selectSubstring(field, isNotSpace)
+ record[i] = field[start:end]
+ }
+
+ return record
+}
+
+func main() {
+ fnIn, err := os.Open("1.csv")
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ return
+ }
+
+ fnOut, err := os.OpenFile("2.csv", os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0755)
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ return
+ }
+
+ csvIn := csv.NewReader(fnIn)
+ csvOut := csv.NewWriter(fnOut)
+ defer csvOut.Flush()
+
+ for {
+ l, err := csvIn.Read()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ fmt.Printf("Error: %v\n", err)
+ return
+ }
+ fmt.Printf("row: %v\n", l)
+ err = csvOut.Write(splitReading(l))
+ if err != nil {
+ fmt.Printf("Error: %v\n", err)
+ return
+ }
+ }
+}
+