stringdist.go 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115
  1. // Copyright 2019 Yunion
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package structarg
  15. import (
  16. "fmt"
  17. "sort"
  18. "strings"
  19. "github.com/texttheater/golang-levenshtein/levenshtein"
  20. )
  21. type stringDistance struct {
  22. str string
  23. /* hanming distance */
  24. dist int
  25. /* similarity rate, 0~1: totally different ~ identical */
  26. rate float64
  27. }
  28. type LevenshteinStrings struct {
  29. target string
  30. candidates []stringDistance
  31. }
  32. func (strs LevenshteinStrings) Len() int {
  33. return len(strs.candidates)
  34. }
  35. func (strs LevenshteinStrings) Swap(i, j int) {
  36. strs.candidates[i], strs.candidates[j] = strs.candidates[j], strs.candidates[i]
  37. }
  38. func (strs LevenshteinStrings) Less(i, j int) bool {
  39. if strs.candidates[i].dist != strs.candidates[j].dist {
  40. if strs.candidates[i].dist < strs.candidates[j].dist {
  41. return true
  42. } else {
  43. return false
  44. }
  45. }
  46. if strs.candidates[i].rate != strs.candidates[j].rate {
  47. if strs.candidates[i].rate > strs.candidates[j].rate {
  48. return true
  49. } else {
  50. return false
  51. }
  52. }
  53. if strs.candidates[i].str < strs.candidates[j].str {
  54. return true
  55. }
  56. return false
  57. }
  58. /**
  59. *
  60. * minRate: minimal similarity ratio, between 0.0~1.0, 0.0: totally different, 1.0: exactly identitical
  61. */
  62. func FindSimilar(niddle string, stack []string, maxDist int, minRate float64) []string {
  63. cands := make([]stringDistance, 0)
  64. for i := 0; i < len(stack); i += 1 {
  65. cand := stringDistance{}
  66. dist := levenshtein.DistanceForStrings([]rune(stack[i]), []rune(niddle), levenshtein.DefaultOptions)
  67. rate := 1.0
  68. if len(stack[i])+len(niddle) > 0 {
  69. rate = float64(len(stack[i])+len(niddle)-dist) / float64(len(stack[i])+len(niddle))
  70. }
  71. if (maxDist < 0 || dist <= maxDist) && (minRate < 0.0 || minRate > 1.0 || rate >= minRate) {
  72. cand.str = stack[i]
  73. cand.dist = dist
  74. cand.rate = rate
  75. cands = append(cands, cand)
  76. }
  77. }
  78. lstrs := LevenshteinStrings{target: niddle, candidates: cands}
  79. sort.Sort(lstrs)
  80. result := make([]string, len(cands))
  81. for i := 0; i < len(result); i += 1 {
  82. result[i] = lstrs.candidates[i].str
  83. }
  84. return result
  85. }
  86. func ChoicesString(choices []string) string {
  87. if len(choices) == 0 {
  88. return ""
  89. }
  90. if len(choices) == 1 {
  91. return choices[0]
  92. }
  93. if len(choices) == 2 {
  94. return strings.Join(choices, " or ")
  95. }
  96. return fmt.Sprintf("%s or %s", strings.Join(choices[:len(choices)-1], ", "), choices[len(choices)-1])
  97. }
  98. func quotedChoicesString(choices []string) string {
  99. quoted := make([]string, len(choices))
  100. for i, c := range choices {
  101. quoted[i] = fmt.Sprintf("%q", c)
  102. }
  103. return ChoicesString(quoted)
  104. }