neuro.py 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. import difflib
  2. HQues = []
  3. HAns = []
  4. def get_data():
  5. with open('data/data.txt', 'r', encoding='utf-8') as file:
  6. Q = []
  7. A = []
  8. for line in file:
  9. question, answer = line.strip().split(':')
  10. Q.append(question.strip())
  11. A.append(answer.strip())
  12. return Q, A
  13. def get_tokens():
  14. with open('data/tokens.txt', 'r', encoding='utf-8') as file:
  15. words = []
  16. tokens = []
  17. for line in file:
  18. word, token = line.strip().split(':')
  19. words.append(word.strip())
  20. tokens.append(token.strip())
  21. return words, tokens
  22. def word_to_token(word, words_list, tokens_list):
  23. if word != ' ':
  24. try:
  25. index = words_list.index(word)
  26. return tokens_list[index]
  27. except ValueError:
  28. return None
  29. else:
  30. return ' '
  31. def token_to_word(token, words_list, tokens_list):
  32. try:
  33. index = tokens_list.index(token)
  34. return words_list[index]
  35. except ValueError:
  36. return None
  37. def sentence_to_tokens(sentence, words_list, tokens_list):
  38. words = sentence.split()
  39. new_tokens = []
  40. for word in words:
  41. token = word_to_token(word, words_list, tokens_list)
  42. if token is None:
  43. new_token = str(len(tokens_list) + 1)
  44. new_tokens.append(new_token)
  45. with open("data/tokens.txt", 'a', encoding='utf-8') as file:
  46. file.write(f"{word}:{new_token}\n")
  47. words_list.append(word)
  48. tokens_list.append(new_token)
  49. else:
  50. new_tokens.append(token)
  51. return [tokens_list[int(token) - 1] for token in new_tokens]
  52. def tokens_to_sentence(tokens, words_list, tokens_list):
  53. words = [token_to_word(token, words_list, tokens_list) for token in tokens]
  54. words = [word for word in words if word is not None]
  55. if words:
  56. words[0] = words[0].capitalize()
  57. sentence = ' '.join(words)
  58. return sentence
  59. def compare_strings(string1, string2):
  60. matcher = difflib.SequenceMatcher(None, string1, string2)
  61. return matcher.ratio() * 100
  62. def neuroKGB(question):
  63. words, tokens = get_tokens()
  64. Q, A = get_data()
  65. question = question.lower()
  66. question = sentence_to_tokens(question, words, tokens)
  67. maxod = 0
  68. imaxod = -1
  69. for i in Q:
  70. if compare_strings(question, i) > maxod:
  71. maxod = compare_strings(question, i)
  72. imaxod = Q.index(i)
  73. if imaxod == -1:
  74. return "Я не знаю что на это ответить."
  75. else:
  76. HQues.append(question)
  77. HAns.append(A[imaxod])
  78. if maxod < 60:
  79. return "⚠ " + tokens_to_sentence(A[imaxod], words, tokens)
  80. else:
  81. return tokens_to_sentence(A[imaxod], words, tokens)
  82. def training(text):
  83. words, tokens = get_tokens()
  84. Q, A = get_data()
  85. try:
  86. question, answer = text.split(":")
  87. except:
  88. return "Вы неправильно написали текст для обучения."
  89. if question == '':
  90. return 'Вы не написали вопрос.'
  91. elif answer == '':
  92. return "Вы не написали ответ."
  93. else:
  94. question_in_tokens_string = ' '.join(sentence_to_tokens(question, words, tokens))
  95. answer_in_tokens_string = ' '.join(sentence_to_tokens(answer, words, tokens))
  96. new_data = question_in_tokens_string+':'+answer_in_tokens_string
  97. with open('data/data.txt', 'a') as file:
  98. file.write(f'{new_data}\n')
  99. return 'Успешно!'