From 2fc2196fe7e29e736e24a9c6d3c809267e27b29b Mon Sep 17 00:00:00 2001 From: sgf Date: Wed, 3 Aug 2022 17:22:35 +0300 Subject: [PATCH] Column constants. Split "hiragana fix" filter into "trim spaces" filter and "hiragana fix" filter. --- fix-columns/1.csv | 2 +- fix-columns/fix-columns.go | 40 +++++++++++++++++++++++++++----------- 2 files changed, 30 insertions(+), 12 deletions(-) diff --git a/fix-columns/1.csv b/fix-columns/1.csv index 20efb30..3354d48 100644 --- a/fix-columns/1.csv +++ b/fix-columns/1.csv @@ -1,7 +1,7 @@ номер,слова,частицы,иероглифы,переводы,связанные,теги ,に・と,会う ,あう,"встречаться, видеться", 迎える, ,,青 ・ 青い ,あお ・ あおい," синий цвет, синий, голубой,бледный -,",, +",, ,,赤 ・ 赤い ,あか ・ あかい," красный цвет, красный ",, ,,明るい, , あかるい светлый,くらい , ,,秋 ,,あき осень,, diff --git a/fix-columns/fix-columns.go b/fix-columns/fix-columns.go index ebc0273..2c74642 100644 --- a/fix-columns/fix-columns.go +++ b/fix-columns/fix-columns.go @@ -10,6 +10,16 @@ import ( "unicode/utf8" ) +const ( + fieldNumber = iota + fieldParticle + fieldKanji + fieldReading + fieldTranslation + fieldRelated + fieldTags +) + // Select prefix satisfying predicate. If entire string satisfies predicate, // index will equal to len(str) (i.e. effectively be out of range). func many (str string, p func (rune) bool) int { @@ -78,17 +88,8 @@ func isEmptyField (field string) bool { return true } -func splitReading (record []string) ([]string) { +func trimRecordSpaces (record []string) ([] string) { for i := 0; i < len(record); i += 1 { - if i == 2 && isEmptyField(record[2]) { - field := record[3] - start, end := selectSubstring(field, isHiragana) - //r, _ := utf8.DecodeRuneInString(field[start:]) - //fmt.Printf("hiragana: start = %v (%q), end = %v\n", start, r, end) - record[2] = field[start:end] - record[3] = field[end:] - continue - } field := record[i] start, end := selectSubstring(field, isNotSpace) record[i] = field[start:end] @@ -97,6 +98,19 @@ func splitReading (record []string) ([]string) { return record } +func fixHiraganaInTranslation (record []string) ([]string) { + if isEmptyField(record[fieldReading]) { + field := record[fieldTranslation] + start, end := selectSubstring(field, isHiragana) + //r, _ := utf8.DecodeRuneInString(field[start:]) + //fmt.Printf("hiragana: start = %v (%q), end = %v\n", start, r, end) + record[fieldReading] = field[start:end] + record[fieldTranslation] = field[end:] + } + + return record +} + func main() { fnIn, err := os.Open("1.csv") if err != nil { @@ -124,7 +138,11 @@ func main() { return } fmt.Printf("row: %v\n", l) - err = csvOut.Write(splitReading(l)) + f := func (record []string) []string { + return trimRecordSpaces(fixHiraganaInTranslation(record)) + } + //err = csvOut.Write(splitReading(l)) + err = csvOut.Write(f(l)) if err != nil { fmt.Printf("Error: %v\n", err) return -- 2.20.1