folding_test.go 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980
  1. // Copyright (C) 2017 The Syncthing Authors.
  2. //
  3. // This Source Code Form is subject to the terms of the Mozilla Public
  4. // License, v. 2.0. If a copy of the MPL was not distributed with this file,
  5. // You can obtain one at https://mozilla.org/MPL/2.0/.
  6. package fs
  7. import (
  8. "testing"
  9. )
  10. var caseCases = [][2]string{
  11. {"", ""},
  12. {"hej", "hej"},
  13. {"HeJ!@#", "hej!@#"},
  14. // Western Europe diacritical stuff is trivial.
  15. {"ÜBERRÄKSMÖRGÅS", "überräksmörgås"},
  16. // As are ligatures.
  17. {"Æglefinus", "æglefinus"},
  18. {"IJssel", "ijssel"},
  19. // Cyrillic seems regular as well.
  20. {"Привет", "привет"},
  21. // Greek has multiple lower case characters for things depending on
  22. // context; we should always choose the same one.
  23. {"Ὀδυσσεύς", "ὀδυσσεύσ"},
  24. {"ὈΔΥΣΣΕΎΣ", "ὀδυσσεύσ"},
  25. // German ß doesn't really have an upper case variant, and we
  26. // shouldn't mess things up when lower casing it either. We don't
  27. // attempt to make ß equivalent to "ss".
  28. {"Reichwaldstraße", "reichwaldstraße"},
  29. // The Turks do their thing with the Is.... Like the Greek example
  30. // we pick just the one canonicalized "i" although you can argue
  31. // with this... From what I understand most operating systems don't
  32. // get this right anyway.
  33. {"İI", "ii"},
  34. // Arabic doesn't do case folding.
  35. {"العَرَبِيَّة", "العَرَبِيَّة"},
  36. // Neither does Hebrew.
  37. {"עברית", "עברית"},
  38. // Nor Chinese, in any variant.
  39. {"汉语/漢語 or 中文", "汉语/漢語 or 中文"},
  40. // Nor katakana, as far as I can tell.
  41. {"チャーハン", "チャーハン"},
  42. // Some special Unicode characters, however, are folded by OSes.
  43. {"\u212A", "k"},
  44. // Folding renormalizes to NFC
  45. {"A\xCC\x88", "\xC3\xA4"}, // ä
  46. {"a\xCC\x88", "\xC3\xA4"}, // ä
  47. }
  48. func TestUnicodeLowercaseNormalized(t *testing.T) {
  49. for _, tc := range caseCases {
  50. res := UnicodeLowercaseNormalized(tc[0])
  51. if res != tc[1] {
  52. t.Errorf("UnicodeLowercaseNormalized(%q) => %q, expected %q", tc[0], res, tc[1])
  53. }
  54. }
  55. }
  56. func BenchmarkUnicodeLowercaseMaybeChange(b *testing.B) {
  57. b.ReportAllocs()
  58. for i := 0; i < b.N; i++ {
  59. for _, s := range caseCases {
  60. UnicodeLowercaseNormalized(s[0])
  61. }
  62. }
  63. }
  64. func BenchmarkUnicodeLowercaseNoChange(b *testing.B) {
  65. b.ReportAllocs()
  66. for i := 0; i < b.N; i++ {
  67. for _, s := range caseCases {
  68. UnicodeLowercaseNormalized(s[1])
  69. }
  70. }
  71. }