Aleph-Alpha
diff --git a/‎.env-template‎
Lines changed: 2 additions & 0 deletions b/‎.env-template‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 168 additions & 0 deletions b/‎.gitignore‎
Lines changed: 168 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 20 additions & 0 deletions b/‎README.md‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎day_1/app.py‎
Lines changed: 125 additions & 0 deletions b/‎day_1/app.py‎
Lines changed: 125 additions & 0 deletions
@@ -0,0 +1,2 @@
+AA_TOKEN=""
+AA_NAMESPACE=""
@@ -0,0 +1,168 @@
+
+.vscode/*
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
+.pdm.toml
+.pdm-python
+.pdm-build/
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/
+
+qdrant_storage
+.python-version
@@ -0,0 +1,20 @@
+# Setup
+
+1. Get Aleph Alpha API Token.
+    1. Goto your personal [profile page](https://app.aleph-alpha.com/profile).
+    2. Click on "Create Token".
+    3. Save the generated token.
+2. Install and start Trace Viewer
+    1. Artifactory Token
+        1. Goto [Artifactory](https://alephalpha.jfrog.io/ui/login/) and use the "Forgot Password?" function to set a password.
+        2. Log in. 
+        3. Click on your profile icon in the top-right corner and click on "Set Me Up".
+        4. Click on "Generic".
+        5. Enter your password and click on "Generate Token & Create Instructions".
+        6. Save the generated token.
+    2. Run `docker login https://alephalpha.jfrog.io --username YOUR_EMAIL --password YOUR_TOKEN` (Fill in your email and token!)
+    3. Run `docker run -p 3000:3000 alephalpha.jfrog.io/container-images/trace-viewer:latest`
+3. Setup python environment
+    1. Run `poetry install`
+5. Start streamlit app
+    1. Run `poetry run streamlit run ./day_1/app.py`
@@ -0,0 +1,125 @@
+import os
+from typing import Iterator
+from intelligence_layer.core.model import LuminousControlModel
+from intelligence_layer.core.text_highlight import ScoredTextHighlight
+from intelligence_layer.core.tracer.tracer import NoOpTracer
+from intelligence_layer.examples import (
+    MultipleChunkRetrieverQa,
+    MultipleChunkRetrieverQaOutput,
+    RetrieverBasedQaInput,
+)
+import streamlit as st
+
+from intelligence_layer.connectors import (
+    CollectionPath,
+    DocumentIndexClient,
+    DocumentIndexRetriever,
+)
+from dotenv import load_dotenv, find_dotenv
+
+
+load_dotenv(find_dotenv(), override=True)
+
+AA_TOKEN = os.getenv("AA_TOKEN")
+NAMESPACE = os.getenv("AA_NAMESPACE")
+
+if AA_TOKEN is None or NAMESPACE is None:
+    raise Exception("No AA_TOKEN or NAMESPACE provided.")
+
+di_client = DocumentIndexClient(token=os.getenv("AA_TOKEN"))
+
+
+def run_task(
+    collection_name: str, index_name: str, user_prompt: str
+) -> MultipleChunkRetrieverQaOutput:
+    raise Exception("Not implemented")
+
+
+HTML_HIGHLIGHT_START = '<span style="background: yellow;">'
+HTML_HIGHLIGHT_END = "</span>"
+
+
+def text_ranges(
+    source_text: str, highlights: list[ScoredTextHighlight]
+) -> Iterator[str]:
+    def wrap_highlights(text: str) -> str:
+        return HTML_HIGHLIGHT_START + text + HTML_HIGHLIGHT_END
+
+    if not highlights:
+        yield source_text
+
+    current_pos = 0
+    highlights.sort(key=lambda x: x.start)
+    for highlight in highlights:
+        if current_pos < highlight.start:
+            yield source_text[current_pos : highlight.start]
+        current_pos = highlight.end
+        if highlight.start >= current_pos:
+            raise ValueError("Overlapping Highlights detected")
+        yield wrap_highlights(source_text[max(0, highlight.start) : current_pos])
+    last_highlight = highlights[-1] if highlights else None
+    if last_highlight and last_highlight.end < len(source_text):
+        yield wrap_highlights(source_text[last_highlight.end :])
+
+
+def display_response(task_output: MultipleChunkRetrieverQaOutput):
+    answer = task_output.answer
+
+    st.write(answer)
+    st.divider()
+
+    for source in task_output.sources:
+        text = source.chunk.chunk
+
+        highlights = sorted(source.highlights, key=lambda highlight: highlight.start)
+
+        highlighted_text = "".join(text_ranges(text, highlights))
+
+        st.write(
+            highlighted_text,
+            unsafe_allow_html=True,
+        )
+        st.divider()
+
+
+def main():
+    st.title("Frage & Antwort")
+
+    with st.sidebar:
+        st.write("### Collection")
+        collections = []
+
+        collections = di_client.list_collections(NAMESPACE)  # type: ignore
+
+        collection_name = st.selectbox(
+            label="Collection auswählen",
+            options=[collection.collection for collection in collections],
+        )
+
+        if collection_name is None:
+            raise Exception("No collection selected.")
+
+        st.write("### Index")
+        indexes = []
+        collection_path = CollectionPath(
+            namespace=NAMESPACE,  # type: ignore
+            collection=collection_name,
+        )
+        indexes = di_client.list_assigned_index_names(collection_path)
+        index_name = st.selectbox(label="Index auswählen", options=indexes)
+
+        if index_name is None:
+            raise Exception("No index selected.")
+
+    user_prompt = st.text_input(
+        "Frage", placeholder="Warum ist der Himmel blau?", label_visibility="hidden"
+    )
+
+    if st.button("Antwort finden", use_container_width=True):
+        with st.spinner("Lädt..."):
+            task_output = run_task(collection_name, index_name, user_prompt)
+        display_response(task_output)
+
+
+if __name__ == "__main__":
+    main()