Streamlit chat UI - Tex | Memory API for agents

This recipe builds a small browser demo. Streamlit can rerun the script often, but Tex keeps the long-term memory. st.session_state only stores UI state for the current browser session.

Install packages

pip install tex-sdk streamlit openai

Bootstrap clients once

Wrap Tex and OpenAI in @st.cache_resource so Streamlit does not recreate clients on every interaction:

import os
import streamlit as st
from openai import OpenAI
from tex import Tex

st.set_page_config(page_title="Tex chat", page_icon="🧠")

@st.cache_resource
def get_clients():
    tex = Tex(
        api_key=os.environ["TEX_API_KEY"],
        base_url=os.environ.get("TEX_BASE_URL", "https://api.getmetacognition.com"),
    )
    gpt = OpenAI()
    return tex, gpt

tex, gpt = get_clients()

Pin a session id per browser tab

Store a stable sid in session state. You can also read ?uid= from the query string to test different users:

if "sid" not in st.session_state:
    st.session_state.sid = f"web-{st.query_params.get('uid', 'anon')}-default"
sid = st.session_state.sid

if "messages" not in st.session_state:
    st.session_state.messages = []

Show usage in the sidebar

Call tex.usage.today() to show current quota usage during demos:

with st.sidebar:
    st.write("## Usage today")
    today = tex.usage.today()
    pct_in  = min(1.0, today.tokens_in_used  / max(1, today.tokens_in_limit))
    pct_out = min(1.0, today.tokens_out_used / max(1, today.tokens_out_limit))
    st.progress(pct_in,  f"in:  {today.tokens_in_used:,} / {today.tokens_in_limit:,}")
    st.progress(pct_out, f"out: {today.tokens_out_used:,} / {today.tokens_out_limit:,}")
    st.caption(f"session: `{sid}`")

Render chat + handle input

Render messages from st.session_state.messages, then handle st.chat_input:

for m in st.session_state.messages:
    with st.chat_message(m["role"]):
        st.write(m["text"])

if prompt := st.chat_input("Talk to me…"):
    import datetime
    now = datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")

    with st.chat_message("user"):
        st.write(prompt)

    with st.chat_message("assistant"):
        with st.spinner("Recalling…"):
            hits = tex.recall(q=prompt, session_id=sid, top_k=5)

        if hits.hits.turns:
            st.caption(f"confidence {hits.confidence:.2f}")
            with st.expander("Memory used"):
                for h in hits.hits.turns:
                    st.write(f"`{h.score:.2f}` — {h.text}")

        memory_block = "\n".join(f"- {h.text}" for h in hits.hits.turns)
        sys_prompt = f"You are a thoughtful assistant. Memory:\n{memory_block}"

        chat = gpt.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": sys_prompt},
                {"role": "user",   "content": prompt},
            ],
            stream=True,
        )
        answer = st.write_stream(
            chunk.choices[0].delta.content or "" for chunk in chat
        )

    tex.conversations.remember(
        session_id=sid,
        turns=[
            {"role": "user", "text": prompt, "timestamp": now},
            {"role": "assistant", "text": answer, "timestamp": now},
        ],
    )
    st.session_state.messages += [
        {"role": "user", "text": prompt},
        {"role": "assistant", "text": answer},
    ]

Run the app

export TEX_API_KEY=tex_live_...
export OPENAI_API_KEY=sk-...
streamlit run app.py

Open http://localhost:8501/?uid=alice and ?uid=bob in two tabs to test isolation.

One file

If you want one block to paste, use this full app.py:

app.py

import os
import datetime
import streamlit as st
from openai import OpenAI
from tex import Tex

st.set_page_config(page_title="Tex chat", page_icon="🧠")

@st.cache_resource
def get_clients():
    tex = Tex(
        api_key=os.environ["TEX_API_KEY"],
        base_url=os.environ.get("TEX_BASE_URL", "https://api.getmetacognition.com"),
    )
    gpt = OpenAI()
    return tex, gpt

tex, gpt = get_clients()

if "sid" not in st.session_state:
    st.session_state.sid = f"web-{st.query_params.get('uid', 'anon')}-default"
sid = st.session_state.sid

if "messages" not in st.session_state:
    st.session_state.messages = []

with st.sidebar:
    st.write("## Usage today")
    today = tex.usage.today()
    pct_in  = min(1.0, today.tokens_in_used  / max(1, today.tokens_in_limit))
    pct_out = min(1.0, today.tokens_out_used / max(1, today.tokens_out_limit))
    st.progress(pct_in,  f"in:  {today.tokens_in_used:,} / {today.tokens_in_limit:,}")
    st.progress(pct_out, f"out: {today.tokens_out_used:,} / {today.tokens_out_limit:,}")
    st.caption(f"session: `{sid}`")

for m in st.session_state.messages:
    with st.chat_message(m["role"]):
        st.write(m["text"])

if prompt := st.chat_input("Talk to me…"):
    now = datetime.datetime.now(datetime.timezone.utc).isoformat().replace("+00:00", "Z")

    with st.chat_message("user"):
        st.write(prompt)

    with st.chat_message("assistant"):
        with st.spinner("Recalling…"):
            hits = tex.recall(q=prompt, session_id=sid, top_k=5)

        if hits.hits.turns:
            st.caption(f"confidence {hits.confidence:.2f}")
            with st.expander("Memory used"):
                for h in hits.hits.turns:
                    st.write(f"`{h.score:.2f}` — {h.text}")

        memory_block = "\n".join(f"- {h.text}" for h in hits.hits.turns)
        sys_prompt = f"You are a thoughtful assistant. Memory:\n{memory_block}"

        chat = gpt.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": sys_prompt},
                {"role": "user",   "content": prompt},
            ],
            stream=True,
        )
        answer = st.write_stream(chunk.choices[0].delta.content or "" for chunk in chat)

    tex.conversations.remember(
        session_id=sid,
        turns=[
            {"role": "user", "text": prompt, "timestamp": now},
            {"role": "assistant", "text": answer, "timestamp": now},
        ],
    )
    st.session_state.messages += [
        {"role": "user", "text": prompt},
        {"role": "assistant", "text": answer},
    ]

Why Streamlit + Tex

This pattern pairs a disposable browser UI with durable memory in Tex.

Refreshes keep memory: Streamlit session state can reset; Tex memory stays.
No Redis or Postgres for demos: Tex stores the long-term memory.
Visible recall: the expander shows exactly what the model saw.

​One file

​Why Streamlit + Tex

One file

Why Streamlit + Tex