12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- import json
- import requests
- # NOTE: ollama must be running for this to work, start the ollama app or run `ollama serve`
- model = "llama3" # TODO: update this for whatever model you wish to use
- def chat(messages):
- r = requests.post(
- "http://0.0.0.0:11434/api/chat",
- json={"model": model, "messages": messages, "stream": True},
- stream=True
- )
- r.raise_for_status()
- output = ""
- for line in r.iter_lines():
- body = json.loads(line)
- if "error" in body:
- raise Exception(body["error"])
- if body.get("done") is False:
- message = body.get("message", "")
- content = message.get("content", "")
- output += content
- # the response streams one token at a time, print that as we receive it
- print(content, end="", flush=True)
- if body.get("done", False):
- message["content"] = output
- return message
- def main():
- messages = []
- while True:
- user_input = input("Enter a prompt: ")
- if not user_input:
- exit()
- print()
- messages.append({"role": "user", "content": user_input})
- message = chat(messages)
- messages.append(message)
- print("\n\n")
- if __name__ == "__main__":
- main()
|