diff --git a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb index 4e40ff8a..b5875606 100644 --- a/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb +++ b/nb/Kaggle-Llama3.1_(8B)-GRPO.ipynb @@ -336,14 +336,14 @@ " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\"^\\n.*?\\n\\n\\n.*?\\n\\n$\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n", " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\".*?\\s*.*?\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def count_xml(text) -> float:\n", diff --git a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb index 19fb6e09..a76441eb 100644 --- a/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb +++ b/nb/Kaggle-Phi_4_(14B)-GRPO.ipynb @@ -339,14 +339,14 @@ " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\"^\\n.*?\\n\\n\\n.*?\\n\\n$\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n", " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\".*?\\s*.*?\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def count_xml(text) -> float:\n", diff --git a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb index 2d2caf2d..e2a1e159 100644 --- a/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/Kaggle-Qwen2.5_(3B)-GRPO.ipynb @@ -857,14 +857,14 @@ " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\"^\\n.*?\\n\\n\\n.*?\\n\\n$\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n", " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\".*?\\s*.*?\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def count_xml(text) -> float:\n", diff --git a/nb/Llama3.1_(8B)-GRPO.ipynb b/nb/Llama3.1_(8B)-GRPO.ipynb index cbdd87eb..55bb3f6a 100644 --- a/nb/Llama3.1_(8B)-GRPO.ipynb +++ b/nb/Llama3.1_(8B)-GRPO.ipynb @@ -336,14 +336,14 @@ " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\"^\\n.*?\\n\\n\\n.*?\\n\\n$\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n", " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\".*?\\s*.*?\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def count_xml(text) -> float:\n", diff --git a/nb/Phi_4_(14B)-GRPO.ipynb b/nb/Phi_4_(14B)-GRPO.ipynb index 006100ec..87e55523 100644 --- a/nb/Phi_4_(14B)-GRPO.ipynb +++ b/nb/Phi_4_(14B)-GRPO.ipynb @@ -339,14 +339,14 @@ " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\"^\\n.*?\\n\\n\\n.*?\\n\\n$\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n", " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\".*?\\s*.*?\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def count_xml(text) -> float:\n", diff --git a/nb/Qwen2.5_(3B)-GRPO.ipynb b/nb/Qwen2.5_(3B)-GRPO.ipynb index aff93ca1..d78c58ff 100644 --- a/nb/Qwen2.5_(3B)-GRPO.ipynb +++ b/nb/Qwen2.5_(3B)-GRPO.ipynb @@ -857,14 +857,14 @@ " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\"^\\n.*?\\n\\n\\n.*?\\n\\n$\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def soft_format_reward_func(completions, **kwargs) -> list[float]:\n", " \"\"\"Reward function that checks if the completion has a specific format.\"\"\"\n", " pattern = r\".*?\\s*.*?\"\n", " responses = [completion[0][\"content\"] for completion in completions]\n", - " matches = [re.match(pattern, r) for r in responses]\n", + " [re.match(pattern, r, flags=re.DOTALL) for r in responses]\n", " return [0.5 if match else 0.0 for match in matches]\n", "\n", "def count_xml(text) -> float:\n",