-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmatchRatingApproach.go
More file actions
117 lines (102 loc) · 2.46 KB
/
matchRatingApproach.go
File metadata and controls
117 lines (102 loc) · 2.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
package textdistance
import (
"errors"
"math"
"strings"
)
// NewMRA returns a MRA structure
func NewMRA() MRA {
return MRA{}
}
type MRA struct {
}
func (m MRA) Minimum(s1, s2 string) (float64, error) {
e1, err := m.Encoding(s1)
if err != nil {
return 0, err
}
e2, err := m.Encoding(s2)
if err != nil {
return 0, err
}
sl := len(e1) + len(e2)
switch {
case sl <= 4:
return 5, nil
case 4 < sl && sl <= 7:
return 4, nil
case 7 < sl && sl <= 11:
return 3, nil
case sl == 12:
return 2, nil
default:
return 0, errors.New("invalid length of strings, must be smaller than 12 combined")
}
}
func (m MRA) Distance(s1, s2 string) (float64, error) {
e1, err := m.Encoding(s1)
if err != nil {
return 0, err
}
e2, err := m.Encoding(s2)
if err != nil {
return 0, err
}
lengthDifference := math.Abs(float64(len(e1) - len(e2)))
if lengthDifference > 3 {
return 0, errors.New("encoded must have length difference less than 3")
}
//minimum := m.Minimum(e1, e2)
//if math.Abs(len(encoded1) - len(encoded2)) > 3 {
//
//}
//
//min := m.Minimum(s1, s2)
// TODO Finish
return 0, errors.New("not fully implemented")
}
var (
vowels = map[rune]bool{
'A': true,
'E': true,
'I': true,
'O': true,
'U': true,
}
)
// Encoding returns the encoded MRA string according to the match rating approach. Encoding follows the following steps:
//
// 1. Delete all vowels unless the vowel begins the word
// 2. Remove the second consonant of any double consonants present
// 3. Reduce codex to 6 letters by joining the first 3 and last 3 letters only
//
// From Wikipedia: https://en.wikipedia.org/wiki/Match_rating_approach
func (m MRA) Encoding(s string) (string, error) {
s = strings.ToUpper(s)
// step 1
var removedVowels string
for i := 0; i < len(s); i++ {
if _, ok := vowels[rune(s[i])]; !ok || i == 0 {
removedVowels += string(s[i])
}
}
// step 2
var removedDoubleConsonants string
for i := 0; i < len(removedVowels)-1; i++ {
if _, ok := vowels[rune(removedVowels[i])]; !ok {
if _, ok := vowels[rune(removedVowels[i+1])]; !ok {
removedDoubleConsonants += string(removedVowels[i])
} else {
i++
}
}
}
if _, ok := vowels[rune(removedVowels[len(removedVowels)-1])]; !ok {
removedDoubleConsonants += string(removedVowels[len(removedVowels)-1])
}
// step 3
if len(removedDoubleConsonants) > 6 {
return removedDoubleConsonants[0:3] + removedDoubleConsonants[len(removedDoubleConsonants)-3:], nil
}
return removedDoubleConsonants, nil
}