open-craft · pkulkark · Aug 8, 2025 · Aug 11, 2025 · Sep 9, 2025 · Jul 3, 2025
diff --git a/README.md b/README.md
@@ -93,6 +93,65 @@ CUSTOM_LLM_CLIENT_SECRET = "your-client-secret"
 Your custom service must implement the expected OAuth2 client‑credentials flow and provide JSON endpoints
 for listing models, obtaining completions, and fetching tokens as used by `CustomLLMService`.
 
+#### Optional provider threads (conversation IDs)
+
+For deployments using a custom LLM service, you can enable provider‑side threads to cache context between turns. This is optional and disabled by default. When enabled, the LMS/XBlock remains the canonical chat history as that ensures vendor flexibility and continuity; provider threads are treated as a cache.
+
+- Site configuration (under `ai_eval`):
+  - `PROVIDER_SUPPORTS_THREADS`: boolean, default `false`. When `true`, `CustomLLMService` attempts to reuse a provider conversation ID.
+- XBlock user state (managed automatically):
+  - `thread_map`: a dictionary mapping `tag -> conversation_id`, where `tag = provider:model:prompt_hash`. This allows multiple concurrent provider threads per learner per XBlock, one per distinct prompt/model context.
+
+Reset clears `thread_map`. If a provider ignores threads, behavior remains stateless.
+
+Compatibility and fallback
+- Not all vendors/models support `conversation_id`. The default service path (via LiteLLM chat completions) does not use provider threads; calls remain stateless.
+- If threads are unsupported or ignored by a provider, the code still works and behaves statelessly.
+- With a custom provider that supports threads, the first turn sends full context and later turns send only the latest user input along with the cached `conversation_id`.
+
+### Custom Code Execution Service (advanced)
+
+The Coding XBlock can route code execution to a third‑party service instead of Judge0. The service is expected to be asynchronous, exposing a submit endpoint that returns a submission identifier, and a results endpoint that returns the execution result when available. Configure this via Django settings:
+
+```python
+# e.g., in Tutor's extra settings
+AI_EVAL_CODE_EXECUTION_BACKEND = {
+    'backend': 'custom',
+    'custom_config': {
+        'submit_endpoint': 'https://code-exec.example.com/api/submit',
+        'results_endpoint': 'https://code-exec.example.com/api/results/{submission_id}',
+        'languages_endpoint': 'https://code-exec.example.com/api/languages',
+        'api_key': 'example-key',
+        # For Bearer tokens (default): Authorization: Bearer <token>
+        'auth_header_name': 'Authorization',
+        'auth_scheme': 'Bearer',
+        # Networking
+        'timeout': 30,
+    },
+}
+```
+
+Header examples
+- Bearer (default): `Authorization: Bearer <API_KEY>` (use `auth_header_name='Authorization'`, `auth_scheme='Bearer'`)
+- Vendor header without scheme: `X-API-Key: <API_KEY>` (use `auth_header_name='X-API-Key'`, `auth_scheme=''`)
+
+Notes
+- Asynchronous model: `submit_endpoint` should return an identifier (e.g., `submission_id` or `id`) that is later used to poll `results_endpoint`.
+- `results_endpoint` must include `{submission_id}` and return execution status and outputs when ready.
+- `languages_endpoint` is called during initialization to verify supported languages.
+- To use Judge0, remove the custom backend settings or set `backend='judge0'`. Provide the Judge0 API key in the XBlock configuration. Optionally set `judge0_config.base_url`; otherwise the default RapidAPI endpoint is used.
+
+Example Judge0 configuration
+```python
+# Optional override for Judge0 base URL; API key is set per XBlock instance
+AI_EVAL_CODE_EXECUTION_BACKEND = {
+    'backend': 'judge0',
+    'judge0_config': {
+        'base_url': 'https://judge0-ce.p.rapidapi.com',
+    },
+}
+```
+
 ## Dependencies
 - [Judge0 API](https://judge0.com/)
 - [Monaco editor](https://github.com/microsoft/monaco-editor)

diff --git a/ai_eval/__init__.py b/ai_eval/__init__.py
@@ -5,3 +5,4 @@
 from .shortanswer import ShortAnswerAIEvalXBlock
 from .coding_ai_eval import CodingAIEvalXBlock
 from .multiagent import MultiAgentAIEvalXBlock
+from .export import DataExportXBlock
diff --git a/ai_eval/backends/__init__.py b/ai_eval/backends/__init__.py
diff --git a/ai_eval/backends/base.py b/ai_eval/backends/base.py
@@ -0,0 +1,39 @@
+"""Abstract interfaces for code execution backends."""
+
+from abc import ABC, abstractmethod
+from typing import Dict, Any
+
+
+class CodeExecutionBackend(ABC):
+    """
+    Abstract base class for code execution backends.
+    """
+    @abstractmethod
+    def submit_code(self, code: str, language_label: str) -> str:
+        """
+        Submit code for execution.
+
+        Args:
+            code: The source code to execute
+            language_label: Human-readable language label (e.g., "Python (3.8.1)").
+                Implementations map this to their own representation.
+
+        Returns:
+            str: Submission ID for retrieving results
+        """
+
+    @abstractmethod
+    def get_result(self, submission_id: str) -> Dict[str, Any]:
+        """
+        Get execution result for a submission.
+
+        Args:
+            submission_id: The submission ID from submit_code()
+
+        Returns:
+            dict: Execution result containing:
+                - status: dict with 'id' and 'description'
+                - stdout: str or None
+                - stderr: str or None
+                - compile_output: str or None
+        """
diff --git a/ai_eval/backends/custom.py b/ai_eval/backends/custom.py
@@ -0,0 +1,151 @@
+"""Custom service code execution backend."""
+
+from typing import Dict, Any, Optional
+import requests
+from ai_eval.utils import SUPPORTED_LANGUAGE_MAP, LanguageLabels, DEFAULT_HTTP_TIMEOUT
+from .base import CodeExecutionBackend
+
+
+class CustomServiceBackend(CodeExecutionBackend):
+    """
+    Generic custom code execution backend.
+    """
+    def __init__(   # pylint: disable=too-many-positional-arguments
+        self,
+        submit_endpoint: str,
+        results_endpoint: str,
+        languages_endpoint: str,
+        api_key: str = "",
+        timeout: int = DEFAULT_HTTP_TIMEOUT,
+        auth_header_name: str = "Authorization",
+        auth_scheme: Optional[str] = "Bearer",
+    ):
+        self.submit_endpoint = submit_endpoint
+        self.results_endpoint = results_endpoint
+        self.languages_endpoint = languages_endpoint
+        self.api_key = api_key
+        self.timeout = timeout
+        self.auth_header_name = auth_header_name
+        self.auth_scheme = auth_scheme
+        self._languages_validated = False
+
+    def _get_headers(self) -> Dict[str, str]:
+        """
+        Get headers for API requests.
+        """
+        headers = {"Content-Type": "application/json"}
+        if self.api_key:
+            if self.auth_scheme:
+                headers[self.auth_header_name] = f"{self.auth_scheme} {self.api_key}"
+            else:
+                headers[self.auth_header_name] = self.api_key
+        return headers
+
+    def _validate_languages(self):
+        """
+        Validate that static languages are supported by the custom service.
+        """
+        try:
+            response = requests.get(
+                self.languages_endpoint,
+                headers=self._get_headers(),
+                timeout=self.timeout
+            )
+            response.raise_for_status()
+
+            service_languages = response.json()
+            # Expected format: [{"id": "92", "name": "Python"}, ...] or [{"id": "python", "name": "Python"}, ...]
+            service_language_names = {lang['name'].lower() for lang in service_languages}
+
+            static_language_names = {
+                name.lower() for name in SUPPORTED_LANGUAGE_MAP
+                if name != LanguageLabels.HTML_CSS
+            }
+
+            unsupported = static_language_names - service_language_names
+            if unsupported:
+                raise ValueError(
+                    f"Custom service does not support languages: {', '.join(unsupported)}. "
+                )
+
+        except (requests.RequestException, KeyError, ValueError) as e:
+            raise ValueError(f"Failed to validate supported languages: {e}") from e
+
+    def _ensure_languages_validated(self):
+        """
+        Validate supported languages lazily once if an endpoint is configured.
+        """
+        if self._languages_validated:
+            return
+        if not self.languages_endpoint:
+            self._languages_validated = True
+            return
+        self._validate_languages()
+        self._languages_validated = True
+
+    def submit_code(self, code: str, language_label: str) -> str:
+        """
+        Submit code to custom service for execution.
+        """
+        self._ensure_languages_validated()
+        # By default, send the language label; services will need to map as needed
+        payload = {
+            'code': code,
+            'language': language_label
+        }
+
+        try:
+            response = requests.post(
+                self.submit_endpoint,
+                json=payload,
+                headers=self._get_headers(),
+                timeout=self.timeout
+            )
+            response.raise_for_status()
+
+            # Handle different response formats
+            result = response.json()
+            if 'submission_id' in result:
+                return result['submission_id']
+            elif 'id' in result:
+                return str(result['id'])
+            else:
+                raise ValueError("Custom service response missing submission ID")
+
+        except requests.RequestException as e:
+            raise ValueError(f"Failed to submit code for execution: {e}") from e
+        except (KeyError, ValueError) as e:
+            raise ValueError(f"Invalid response from custom service: {e}") from e
+
+    def get_result(self, submission_id: str) -> Dict[str, Any]:
+        """
+        Get execution result from custom service.
+        """
+        self._ensure_languages_validated()
+        url = self.results_endpoint.format(submission_id=submission_id)
+
+        try:
+            response = requests.get(
+                url,
+                headers=self._get_headers(),
+                timeout=self.timeout
+            )
+            response.raise_for_status()
+
+            result = response.json()
+
+            # Map custom service response to standard format
+            return {
+                'status': {
+                    'id': result.get('status_code', 3),
+                    'description': result.get('status', 'Completed')
+                },
+                'stdout': result.get('stdout'),
+                'stderr': result.get('stderr'),
+                'compile_output': result.get('compile_error')
+            }
+
+        except requests.RequestException as e:
+            raise ValueError(f"Failed to get submission result: {e}") from e
+        except (KeyError, ValueError) as e:
+            raise ValueError(f"Invalid response from custom service: {e}") from e
diff --git a/ai_eval/backends/factory.py b/ai_eval/backends/factory.py
@@ -0,0 +1,44 @@
+"""Backend selection factory."""
+
+import django.conf as django_conf
+from .judge0 import Judge0Backend
+from .custom import CustomServiceBackend
+
+
+class BackendFactory:
+    """
+    Factory for creating code execution backends.
+    """
+    @classmethod
+    def get_backend(cls, api_key: str = ""):
+        """
+        Get the appropriate backend based on Django settings.
+
+        Args:
+            api_key: Judge0 API key (only used for judge0 backend)
+
+        Returns:
+            CodeExecutionBackend: Configured backend instance
+        """
+        backend_config = getattr(
+            django_conf.settings, 'AI_EVAL_CODE_EXECUTION_BACKEND', {}
+        )
+
+        if backend_config.get('backend') == 'custom':
+            config = backend_config.get('custom_config', {})
+            return CustomServiceBackend(
+                submit_endpoint=config.get('submit_endpoint', ''),
+                results_endpoint=config.get('results_endpoint', ''),
+                languages_endpoint=config.get('languages_endpoint', ''),
+                api_key=config.get('api_key', ''),
+                timeout=config.get('timeout', 30),
+                auth_header_name=config.get('auth_header_name', 'Authorization'),
+                auth_scheme=config.get('auth_scheme', 'Bearer'),
+            )
+
+        # Default to judge0 backend
+        judge0_config = backend_config.get('judge0_config', {})
+        return Judge0Backend(
+            api_key=api_key,
+            base_url=judge0_config.get('base_url')
+        )