SearchKey.py 5.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168
  1. #! /usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # COPYRIGHT: Openmoko Inc. 2010
  4. # LICENSE: GPL Version 3 or later
  5. # DESCRIPTION: Convert string to search key
  6. # AUTHORS: Sean Moss-Pultz <sean@openmoko.com>
  7. # Christopher Hall <hsw@openmoko.com>
  8. import os, sys
  9. import re
  10. import unicodedata
  11. # this _must_ be in ascending ASCII sequence
  12. KEYPAD_KEYS = """ !#$%&'()*+,-.0123456789=?@abcdefghijklmnopqrstuvwxyz"""
  13. # underscore and space
  14. whitespaces = re.compile(r'([\s_]+)', re.IGNORECASE)
  15. def make_key(text):
  16. """filter out only the chacters available on the keypad"""
  17. global whitespaces
  18. result = ''.join(c for c in strip_accents(text).strip().lower() if c in KEYPAD_KEYS)
  19. return compact_spaces(result)
  20. def all_characters():
  21. """string of all allowed characters in a search key"""
  22. return KEYPAD_KEYS
  23. def is_valid_character(c):
  24. """test if a single character is a valid search key character"""
  25. return c.lower() in KEYPAD_KEYS
  26. def compact_spaces(text):
  27. """condense runs of spaces"""
  28. global whitespaces
  29. return whitespaces.sub(' ', text).strip()
  30. def strip_accents(text):
  31. """convert all accented [a-zA-Z] to their unaccented form"""
  32. if type(text) == str:
  33. text = unicode(text, 'utf-8')
  34. return ''.join((c for c in unicodedata.normalize('NFD', text) if unicodedata.category(c) != 'Mn'))
  35. def test_keypad_keys():
  36. """check that data stucture is correct"""
  37. global KEYPAD_KEYS
  38. error_count = 0
  39. previous_character = 'NONE'
  40. previous_ord = 0
  41. # to check if in order: uncomment and look at result
  42. for c in KEYPAD_KEYS:
  43. value = ord(c)
  44. if value <= previous_ord:
  45. print('error "{0!r:s}" = {1:d} <= "{2!r:s}" = {3:d}'.format(c, value, previous_character, previous_ord))
  46. error_count += 1
  47. previous_ord = value
  48. print('total error count = {0:d}'.format(error_count))
  49. def test_strip_accents():
  50. """test strip_accents function"""
  51. source = u"""
  52. 0 1 2 3 4 5 6 7 8 9 A B C D E F
  53. 2x ! " # $ % & ' ( ) * + , - . /
  54. 3x 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
  55. 4x @ A B C D E F G H I J K L M N O
  56. 5x P Q R S T U V W X Y Z [ \ ] ^ _
  57. 6x ` a b c d e f g h i j k l m n o
  58. 7x p q r s t u v w x y z { | } ~
  59. Ax   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
  60. Bx ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
  61. Cx À Á Â Ã Ä Å Æ Ç È É Ê Ë Ì Í Î Ï
  62. Dx Ð Ñ Ò Ó Ô Õ Ö × Ø Ù Ú Û Ü Ý Þ ß
  63. Ex à á â ã ä å æ ç è é ê ë ì í î ï
  64. Fx ð ñ ò ó ô õ ö ÷ ø ù ú û ü ý þ ÿ
  65. Ax   Ą ĸ Ŗ ¤ Ĩ Ļ § ¨ Š Ē Ģ Ŧ ­ Ž ¯
  66. Bx ° ą ˛ ŗ ´ ĩ ļ ˇ ¸ š ē ģ ŧ Ŋ ž ŋ
  67. Cx Ā Á Â Ã Ä Å Æ Į Č É Ę Ë Ė Í Î Ī
  68. Dx Đ Ņ Ō Ķ Ô Õ Ö × Ø Ų Ú Û Ü Ũ Ū ß
  69. Ex ā á â ã ä å æ į č é ę ë ė í î ī
  70. Fx đ ņ ō ķ ô õ ö ÷ ø ų ú û ü ũ ū ˙
  71. """
  72. correct = u"""
  73. 0 1 2 3 4 5 6 7 8 9 A B C D E F
  74. 2x ! " # $ % & ' ( ) * + , - . /
  75. 3x 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
  76. 4x @ A B C D E F G H I J K L M N O
  77. 5x P Q R S T U V W X Y Z [ \ ] ^ _
  78. 6x ` a b c d e f g h i j k l m n o
  79. 7x p q r s t u v w x y z { | } ~
  80. Ax   ¡ ¢ £ ¤ ¥ ¦ § ¨ © ª « ¬ ­ ® ¯
  81. Bx ° ± ² ³ ´ µ ¶ · ¸ ¹ º » ¼ ½ ¾ ¿
  82. Cx A A A A A A Æ C E E E E I I I I
  83. Dx Ð N O O O O O × Ø U U U U Y Þ ß
  84. Ex a a a a a a æ c e e e e i i i i
  85. Fx ð n o o o o o ÷ ø u u u u y þ y
  86. Ax   A ĸ R ¤ I L § ¨ S E G Ŧ ­ Z ¯
  87. Bx ° a ˛ r ´ i l ˇ ¸ s e g ŧ Ŋ z ŋ
  88. Cx A A A A A A Æ I C E E E E I I I
  89. Dx Đ N O K O O O × Ø U U U U U U ß
  90. Ex a a a a a a æ i c e e e e i i i
  91. Fx đ n o k o o o ÷ ø u u u u u u ˙
  92. """
  93. converted = strip_accents(source)
  94. if correct == converted:
  95. print('Accents stripped sucessfully')
  96. else:
  97. print('Differences encountered')
  98. print('Source:')
  99. print(source)
  100. print('Converted:')
  101. print(converted)
  102. def test_make_key():
  103. """test make_key function"""
  104. source = """! " # $ % & ' ( ) * + , - . /
  105. 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
  106. @ A B C D E F G H I J K L M N O
  107. P Q R S T U V W X Y Z [ \ ] ^ _
  108. ` a b c d e f g h i j k l m n o
  109. p q r s t u v w x y z { | } ~
  110. """
  111. correct = """! # $ % & ' ( ) * + , - . 0 1 2 3 4 5 6 7 8 9 = ? @ a b c d e f g h i j k l m n o p q r s t u v w x y z a b c d e f g h i j k l m n o p q r s t u v w x y z"""
  112. converted = make_key(source)
  113. if correct == converted:
  114. print('Filtered sucessfully')
  115. else:
  116. print('Differences encountered')
  117. print('Source:')
  118. print(source)
  119. print('Converted:')
  120. print(converted)
  121. def main():
  122. """perform tests"""
  123. test_keypad_keys()
  124. test_strip_accents()
  125. test_make_key()
  126. # run the program
  127. if __name__ == "__main__":
  128. main()