client.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. import json
  2. import requests
  3. # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
  4. model = "llama3" # TODO: update this for whatever model you wish to use
  5. def chat(messages):
  6. r = requests.post(
  7. "http://0.0.0.0:11434/api/chat",
  8. json={"model": model, "messages": messages, "stream": True},
  9. stream=True
  10. )
  11. r.raise_for_status()
  12. output = ""
  13. for line in r.iter_lines():
  14. body = json.loads(line)
  15. if "error" in body:
  16. raise Exception(body["error"])
  17. if body.get("done") is False:
  18. message = body.get("message", "")
  19. content = message.get("content", "")
  20. output += content
  21. # the response streams one token at a time, print that as we receive it
  22. print(content, end="", flush=True)
  23. if body.get("done", False):
  24. message["content"] = output
  25. return message
  26. def main():
  27. messages = []
  28. while True:
  29. user_input = input("Enter a prompt: ")
  30. if not user_input:
  31. exit()
  32. print()
  33. messages.append({"role": "user", "content": user_input})
  34. message = chat(messages)
  35. messages.append(message)
  36. print("\n\n")
  37. if __name__ == "__main__":
  38. main()