natural_sorting.sf 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. #!/usr/bin/ruby
  2. #
  3. ## https://rosettacode.org/wiki/Natural_sorting#Sidef
  4. #
  5. class String {
  6. # Sort groups of digits in number order. Sort by order of magnitude then lexically.
  7. -> naturally { self.lc.gsub(/(\d+)/, {|s1| "0" + s1.len.chr + s1 }) + "\x0" + self };
  8. # Collapse multiple ws characters to a single.
  9. -> collapse { self.gsub(/(\s)\1+/, {|s1| s1 }) };
  10. # Convert all ws characters to a space.
  11. -> normalize { self.gsub(/(\s)/, ' ') };
  12. # Ignore common leading articles for title sorts
  13. -> title { self.sub(/^(?:a|an|the)\b\s*/i, '') };
  14. # Decompose ISO-Latin1 glyphs to their base character.
  15. -> latin1_decompose {
  16. static tr = Hash.new(%w(
  17. Æ AE æ ae Þ TH þ th Ð TH ð th ß ss À A Á A Â A Ã A Ä A Å A à a á a
  18. â a ã a ä a å a Ç C ç c È E É E Ê E Ë E è e é e ê e ë e Ì I Í I Î
  19. I Ï I ì i í i î i ï i Ò O Ó O Ô O Õ O Ö O Ø O ò o ó o ô o õ o ö o
  20. ø o Ñ N ñ n Ù U Ú U Û U Ü U ù u ú u û u ü u Ý Y ÿ y ý y
  21. )...);
  22. var re = Regex.new('(' + tr.keys.join('|') + ')');
  23. self.gsub(re, {|s1| tr{s1} });
  24. }
  25. }
  26. var tests = [
  27. [
  28. "Task 1a\nSort while ignoring leading spaces.",
  29. [
  30. 'ignore leading spaces: 1', ' ignore leading spaces: 4',
  31. ' ignore leading spaces: 3', ' ignore leading spaces: 2'
  32. ],
  33. { .trim } # builtin method.
  34. ],
  35. [
  36. "Task 1b\nSort while ignoring multiple adjacent spaces.",
  37. [
  38. 'ignore m.a.s spaces: 3', 'ignore m.a.s spaces: 1',
  39. 'ignore m.a.s spaces: 4', 'ignore m.a.s spaces: 2'
  40. ],
  41. { .collapse }
  42. ],
  43. [
  44. "Task 2\nSort with all white space normalized to regular spaces.",
  45. [
  46. "Normalized\tspaces: 4", "Normalized\xa0spaces: 1",
  47. "Normalized\x20spaces: 2", "Normalized\nspaces: 3"
  48. ],
  49. { .normalize }
  50. ],
  51. [
  52. "Task 3\nSort case independently.",
  53. [
  54. 'caSE INDEPENDENT: 3', 'casE INDEPENDENT: 2',
  55. 'cASE INDEPENDENT: 4', 'case INDEPENDENT: 1'
  56. ],
  57. { .lc } # builtin method
  58. ],
  59. [
  60. "Task 4\nSort groups of digits in natural number order.",
  61. %w(Foo100bar99baz0.txt foo100bar10baz0.txt foo1000bar99baz10.txt
  62. foo1000bar99baz9.txt 201st 32nd 3rd 144th 17th 2 95),
  63. { .naturally }
  64. ],
  65. [
  66. "Task 5 ( mixed with 1, 2, 3 & 4 )\n"
  67. + "Sort titles, normalize white space, collapse multiple spaces to\n"
  68. + "single, trim leading white space, ignore common leading articles\n"
  69. + 'and sort digit groups in natural order.',
  70. [
  71. 'The Wind in the Willows 8', ' The 39 Steps 3',
  72. 'The 7th Seal 1', 'Wanda 6',
  73. 'A Fish Called Wanda 5', ' The Wind and the Lion 7',
  74. 'Any Which Way But Loose 4', '12 Monkeys 2'
  75. ],
  76. { .normalize.collapse.trim.title.naturally }
  77. ],
  78. [
  79. "Task 6, 7, 8\nMap letters in Latin1 that have accents or decompose to two\n"
  80. + 'characters to their base characters for sorting.',
  81. %w(apple Ball bald car Card above Æon æon aether
  82. niño nina e-mail Évian evoke außen autumn),
  83. { .latin1_decompose.naturally }
  84. ]
  85. ];
  86. tests.each { |case|
  87. var code = case.pop;
  88. var array = case.pop;
  89. say case.pop+"\n";
  90. say "Standard Sort:\n";
  91. array.sort.each { .say };
  92. say "\nNatural Sort:\n";
  93. array.sort_by(code).each { .say };
  94. say "\n#{'*' * 40}\n";
  95. }