query.py 4.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135
  1. #!/usr/bin/env python
  2. #
  3. # THE QUERIER
  4. #
  5. # The querier will act as the intermediate between the user interface and the
  6. # Crawler's database. Although it can act as a user interface itself, that is
  7. # not the goal of the querier. Rather, it focuses on being able to present the
  8. # data given by the crawler in the most diverse ways possible.
  9. #
  10. # VERSIONING:
  11. # - A text-based user interface allowing for multiple queries per session, and
  12. # an option to recrawl the filesystem.
  13. # - A text-based user interface allowing for index searching within plain text
  14. # files.
  15. # - A module that allows for the querying and preparation of the results to be
  16. # displayed in another interface module.
  17. #
  18. import os
  19. import sys
  20. import sqlite3 as sql
  21. import crawl
  22. # Session variables:
  23. query = "nothing"
  24. results = "nothing still"
  25. total = 0
  26. # For the sake of modularity, we have to set methods for compatibility:
  27. # searching for name only
  28. def simple_search(token):
  29. if token == "":
  30. return
  31. result_set = crawl.cursor.execute("SELECT path FROM fileindex WHERE name LIKE ?", ('%{}%'.format(token),))
  32. return result_set
  33. # searching for file contents:
  34. def index_search(token):
  35. if token == "":
  36. return
  37. result_set = crawl.cursor.execute("SELECT path, contents FROM fileindex WHERE contents LIKE ?", ('%{}%'.format(token),))
  38. return result_set
  39. def refresh():
  40. google = crawl.Crawler()
  41. google.crawl('/')
  42. return True
  43. if __name__ == "__main__":
  44. # Start of program:
  45. print "========="
  46. print "Linux and Unix file locating utility"
  47. print "========="
  48. # Querying session:
  49. while True:
  50. total = 0
  51. query = raw_input("\nType the name of the file you're looking for, or '.com' for a list of commands:\n>_ ")
  52. if query == ".com":
  53. print """.com - shows a list of commands
  54. .quit - exits the program
  55. .index - searches within files
  56. .refresh - refreshes the database by crawling again"""
  57. continue
  58. if query == ".quit":
  59. break
  60. if query == ".index":
  61. # Perform index searching and show a snippet of the results:
  62. query = raw_input("Type a query to search for: ")
  63. results = index_search(query)
  64. for result in results:
  65. print "=========\nIn %s, we found: " % result[0]
  66. # creating 10-word long snippets:
  67. try:
  68. before = result[1].split(query)[0].split(" ")
  69. before.reverse() # required for iteration...
  70. except IndexError:
  71. before = ['']
  72. try:
  73. after = result[1].split(query)[1].split(" ")
  74. except IndexError:
  75. after = ['']
  76. snippet_before = []
  77. snippet_after = []
  78. word_count = 0
  79. snippet_size = 5 # word limit to each side
  80. while word_count < snippet_size:
  81. try:
  82. snippet_before.append(before[word_count])
  83. except IndexError:
  84. # If we are on either "margin" of the text content, we may
  85. # not be able to extract all the required words:
  86. pass
  87. try:
  88. snippet_after.append(after[word_count])
  89. except IndexError:
  90. pass
  91. word_count += 1
  92. # Now we reverse the before part once again.
  93. snippet_before.reverse()
  94. snippet = " ".join(snippet_before) + query + \
  95. " ".join(snippet_after)
  96. print snippet + "\n"
  97. total += 1
  98. print "======\nFound %d files containing '%s'" % (total, query)
  99. continue
  100. if query == ".refresh":
  101. print "Refreshing database... this may take a while."
  102. google = crawl.Crawler()
  103. google.crawl('/')
  104. print "=======\nRefreshing complete. Please try again."
  105. continue
  106. if query == "":
  107. continue # this will avoid the huge overhead of listing everything!
  108. # standard file search
  109. results = simple_search(query)
  110. for result in results:
  111. print result[0]
  112. total += 1
  113. crawl.conn.commit()
  114. print "======\n%d files found, named '%s'" % (total, query)
  115. # Finish everything beautifully
  116. crawl.conn.close()
  117. print "Bye!"