Skip to content

Commit 6f7d86d

Browse files
Reset: Initial commit
0 parents  commit 6f7d86d

16 files changed

+5635
-0
lines changed

.env-template

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
AA_TOKEN=""
2+
AA_NAMESPACE=""

.gitignore

Lines changed: 168 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,168 @@
1+
2+
.vscode/*
3+
4+
# Byte-compiled / optimized / DLL files
5+
__pycache__/
6+
*.py[cod]
7+
*$py.class
8+
9+
# C extensions
10+
*.so
11+
12+
# Distribution / packaging
13+
.Python
14+
build/
15+
develop-eggs/
16+
dist/
17+
downloads/
18+
eggs/
19+
.eggs/
20+
lib/
21+
lib64/
22+
parts/
23+
sdist/
24+
var/
25+
wheels/
26+
share/python-wheels/
27+
*.egg-info/
28+
.installed.cfg
29+
*.egg
30+
MANIFEST
31+
32+
# PyInstaller
33+
# Usually these files are written by a python script from a template
34+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
35+
*.manifest
36+
*.spec
37+
38+
# Installer logs
39+
pip-log.txt
40+
pip-delete-this-directory.txt
41+
42+
# Unit test / coverage reports
43+
htmlcov/
44+
.tox/
45+
.nox/
46+
.coverage
47+
.coverage.*
48+
.cache
49+
nosetests.xml
50+
coverage.xml
51+
*.cover
52+
*.py,cover
53+
.hypothesis/
54+
.pytest_cache/
55+
cover/
56+
57+
# Translations
58+
*.mo
59+
*.pot
60+
61+
# Django stuff:
62+
*.log
63+
local_settings.py
64+
db.sqlite3
65+
db.sqlite3-journal
66+
67+
# Flask stuff:
68+
instance/
69+
.webassets-cache
70+
71+
# Scrapy stuff:
72+
.scrapy
73+
74+
# Sphinx documentation
75+
docs/_build/
76+
77+
# PyBuilder
78+
.pybuilder/
79+
target/
80+
81+
# Jupyter Notebook
82+
.ipynb_checkpoints
83+
84+
# IPython
85+
profile_default/
86+
ipython_config.py
87+
88+
# pyenv
89+
# For a library or package, you might want to ignore these files since the code is
90+
# intended to run in multiple environments; otherwise, check them in:
91+
# .python-version
92+
93+
# pipenv
94+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
95+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
96+
# having no cross-platform support, pipenv may install dependencies that don't work, or not
97+
# install all needed dependencies.
98+
#Pipfile.lock
99+
100+
# poetry
101+
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
102+
# This is especially recommended for binary packages to ensure reproducibility, and is more
103+
# commonly ignored for libraries.
104+
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
105+
#poetry.lock
106+
107+
# pdm
108+
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
109+
#pdm.lock
110+
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
111+
# in version control.
112+
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
113+
.pdm.toml
114+
.pdm-python
115+
.pdm-build/
116+
117+
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
118+
__pypackages__/
119+
120+
# Celery stuff
121+
celerybeat-schedule
122+
celerybeat.pid
123+
124+
# SageMath parsed files
125+
*.sage.py
126+
127+
# Environments
128+
.env
129+
.venv
130+
env/
131+
venv/
132+
ENV/
133+
env.bak/
134+
venv.bak/
135+
136+
# Spyder project settings
137+
.spyderproject
138+
.spyproject
139+
140+
# Rope project settings
141+
.ropeproject
142+
143+
# mkdocs documentation
144+
/site
145+
146+
# mypy
147+
.mypy_cache/
148+
.dmypy.json
149+
dmypy.json
150+
151+
# Pyre type checker
152+
.pyre/
153+
154+
# pytype static type analyzer
155+
.pytype/
156+
157+
# Cython debug symbols
158+
cython_debug/
159+
160+
# PyCharm
161+
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
162+
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
163+
# and can be added to the global gitignore or merged into this file. For a more nuclear
164+
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
165+
#.idea/
166+
167+
qdrant_storage
168+
.python-version

README.md

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Setup
2+
3+
1. Get Aleph Alpha API Token.
4+
1. Goto your personal [profile page](https://app.aleph-alpha.com/profile).
5+
2. Click on "Create Token".
6+
3. Save the generated token.
7+
2. Install and start Trace Viewer
8+
1. Artifactory Token
9+
1. Goto [Artifactory](https://alephalpha.jfrog.io/ui/login/) and use the "Forgot Password?" function to set a password.
10+
2. Log in.
11+
3. Click on your profile icon in the top-right corner and click on "Set Me Up".
12+
4. Click on "Generic".
13+
5. Enter your password and click on "Generate Token & Create Instructions".
14+
6. Save the generated token.
15+
2. Run `docker login https://alephalpha.jfrog.io --username YOUR_EMAIL --password YOUR_TOKEN` (Fill in your email and token!)
16+
3. Run `docker run -p 3000:3000 alephalpha.jfrog.io/container-images/trace-viewer:latest`
17+
3. Setup python environment
18+
1. Run `poetry install`
19+
5. Start streamlit app
20+
1. Run `poetry run streamlit run ./day_1/app.py`

day_1/app.py

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
import os
2+
from typing import Iterator
3+
from intelligence_layer.core.model import LuminousControlModel
4+
from intelligence_layer.core.text_highlight import ScoredTextHighlight
5+
from intelligence_layer.core.tracer.tracer import NoOpTracer
6+
from intelligence_layer.examples import (
7+
MultipleChunkRetrieverQa,
8+
MultipleChunkRetrieverQaOutput,
9+
RetrieverBasedQaInput,
10+
)
11+
import streamlit as st
12+
13+
from intelligence_layer.connectors import (
14+
CollectionPath,
15+
DocumentIndexClient,
16+
DocumentIndexRetriever,
17+
)
18+
from dotenv import load_dotenv, find_dotenv
19+
20+
21+
load_dotenv(find_dotenv(), override=True)
22+
23+
AA_TOKEN = os.getenv("AA_TOKEN")
24+
NAMESPACE = os.getenv("AA_NAMESPACE")
25+
26+
if AA_TOKEN is None or NAMESPACE is None:
27+
raise Exception("No AA_TOKEN or NAMESPACE provided.")
28+
29+
di_client = DocumentIndexClient(token=os.getenv("AA_TOKEN"))
30+
31+
32+
def run_task(
33+
collection_name: str, index_name: str, user_prompt: str
34+
) -> MultipleChunkRetrieverQaOutput:
35+
raise Exception("Not implemented")
36+
37+
38+
HTML_HIGHLIGHT_START = '<span style="background: yellow;">'
39+
HTML_HIGHLIGHT_END = "</span>"
40+
41+
42+
def text_ranges(
43+
source_text: str, highlights: list[ScoredTextHighlight]
44+
) -> Iterator[str]:
45+
def wrap_highlights(text: str) -> str:
46+
return HTML_HIGHLIGHT_START + text + HTML_HIGHLIGHT_END
47+
48+
if not highlights:
49+
yield source_text
50+
51+
current_pos = 0
52+
highlights.sort(key=lambda x: x.start)
53+
for highlight in highlights:
54+
if current_pos < highlight.start:
55+
yield source_text[current_pos : highlight.start]
56+
current_pos = highlight.end
57+
if highlight.start >= current_pos:
58+
raise ValueError("Overlapping Highlights detected")
59+
yield wrap_highlights(source_text[max(0, highlight.start) : current_pos])
60+
last_highlight = highlights[-1] if highlights else None
61+
if last_highlight and last_highlight.end < len(source_text):
62+
yield wrap_highlights(source_text[last_highlight.end :])
63+
64+
65+
def display_response(task_output: MultipleChunkRetrieverQaOutput):
66+
answer = task_output.answer
67+
68+
st.write(answer)
69+
st.divider()
70+
71+
for source in task_output.sources:
72+
text = source.chunk.chunk
73+
74+
highlights = sorted(source.highlights, key=lambda highlight: highlight.start)
75+
76+
highlighted_text = "".join(text_ranges(text, highlights))
77+
78+
st.write(
79+
highlighted_text,
80+
unsafe_allow_html=True,
81+
)
82+
st.divider()
83+
84+
85+
def main():
86+
st.title("Frage & Antwort")
87+
88+
with st.sidebar:
89+
st.write("### Collection")
90+
collections = []
91+
92+
collections = di_client.list_collections(NAMESPACE) # type: ignore
93+
94+
collection_name = st.selectbox(
95+
label="Collection auswählen",
96+
options=[collection.collection for collection in collections],
97+
)
98+
99+
if collection_name is None:
100+
raise Exception("No collection selected.")
101+
102+
st.write("### Index")
103+
indexes = []
104+
collection_path = CollectionPath(
105+
namespace=NAMESPACE, # type: ignore
106+
collection=collection_name,
107+
)
108+
indexes = di_client.list_assigned_index_names(collection_path)
109+
index_name = st.selectbox(label="Index auswählen", options=indexes)
110+
111+
if index_name is None:
112+
raise Exception("No index selected.")
113+
114+
user_prompt = st.text_input(
115+
"Frage", placeholder="Warum ist der Himmel blau?", label_visibility="hidden"
116+
)
117+
118+
if st.button("Antwort finden", use_container_width=True):
119+
with st.spinner("Lädt..."):
120+
task_output = run_task(collection_name, index_name, user_prompt)
121+
display_response(task_output)
122+
123+
124+
if __name__ == "__main__":
125+
main()

0 commit comments

Comments
 (0)