@@ -19,23 +19,35 @@ def setUp(self):
1919 self .reward_en = LanguageReward (target_language = "en" )
2020 self .reward_ja = LanguageReward (target_language = "ja" )
2121 self .custom_reward = LanguageReward (
22- target_language = "ja" , full_reward = 0.9 , no_match_reward = 0.1
22+ target_language = "ja" ,
23+ full_reward = 0.9 ,
24+ partial_reward = 0.6 ,
25+ fallback_reward = 0.3 ,
26+ no_match_reward = 0.1 ,
2327 )
2428
2529 def test_init_default_values (self ):
2630 """Test LanguageReward initialization with default values."""
2731 reward = self .LanguageReward ()
2832 self .assertEqual (reward .target_language , "en" )
2933 self .assertEqual (reward .full_reward , 1.0 )
34+ self .assertEqual (reward .partial_reward , 0.5 )
35+ self .assertEqual (reward .fallback_reward , 0.2 )
3036 self .assertEqual (reward .no_match_reward , 0.0 )
3137
3238 def test_init_custom_values (self ):
3339 """Test LanguageReward initialization with custom values."""
3440 reward = self .LanguageReward (
35- target_language = "ja" , full_reward = 0.9 , no_match_reward = 0.1
41+ target_language = "ja" ,
42+ full_reward = 0.9 ,
43+ partial_reward = 0.6 ,
44+ fallback_reward = 0.3 ,
45+ no_match_reward = 0.1 ,
3646 )
3747 self .assertEqual (reward .target_language , "ja" )
3848 self .assertEqual (reward .full_reward , 0.9 )
49+ self .assertEqual (reward .partial_reward , 0.6 )
50+ self .assertEqual (reward .fallback_reward , 0.3 )
3951 self .assertEqual (reward .no_match_reward , 0.1 )
4052
4153 def test_init_missing_langid (self ):
@@ -112,10 +124,17 @@ def test_call_language_mismatch(self):
112124 self .assertEqual (result , 0.0 )
113125
114126 def test_call_with_no_thinking_tags (self ):
115- """Test __call__ with response containing no thinking tags."""
127+ """Test __call__ with response containing no thinking tags but correct language ."""
116128 result = self .reward_en (
117129 "prompt" , "This is just a regular response without any thinking tags."
118130 )
131+ # No thinking blocks but response is in English, should get fallback reward
132+ self .assertEqual (result , 0.2 )
133+
134+ def test_call_with_no_thinking_tags_wrong_language (self ):
135+ """Test __call__ with response containing no thinking tags and wrong language."""
136+ result = self .reward_en ("prompt" , "これは日本語の応答です。タグはありません。" )
137+ # No thinking blocks and wrong language, should get no_match_reward
119138 self .assertEqual (result , 0.0 )
120139
121140 def test_call_with_empty_thinking_block (self ):
@@ -145,15 +164,15 @@ def test_call_with_whitespace_in_tags(self):
145164 self .assertEqual (result , 1.0 )
146165
147166 def test_call_multiple_thinking_blocks (self ):
148- """Test __call__ with multiple thinking blocks (wrong format)."""
167+ """Test __call__ with multiple thinking blocks (wrong format but correct language )."""
149168 response = """
150169 <think>First thought in English.</think>
151170 Some text in between.
152171 <think>Second thought also in English.</think>
153172 """
154173 result = self .reward_en ("prompt" , response )
155- # Multiple blocks = wrong format, should return 0
156- self .assertEqual (result , 0.0 )
174+ # Multiple blocks = wrong format, but language is correct, should return partial_reward
175+ self .assertEqual (result , 0.5 )
157176
158177 def test_call_multiple_thinking_blocks_mixed_languages (self ):
159178 """Test __call__ with multiple thinking blocks in different languages (wrong format)."""
@@ -162,8 +181,9 @@ def test_call_multiple_thinking_blocks_mixed_languages(self):
162181 <think>これは短い日本語。</think>
163182 """
164183 result = self .reward_en ("prompt" , response )
165- # Multiple blocks = wrong format, should return 0
166- self .assertEqual (result , 0.0 )
184+ # Multiple blocks with mixed languages - langid will detect dominant language
185+ # Should return either partial_reward (if detects English) or no_match_reward (if detects Japanese)
186+ self .assertIn (result , [0.0 , 0.5 ])
167187
168188 def test_call_multiline_thinking_block (self ):
169189 """Test __call__ with multiline thinking blocks."""
@@ -192,20 +212,31 @@ def test_call_with_target_parameter(self):
192212 result = self .reward_en ("prompt" , response , target = "some target" )
193213 self .assertEqual (result , 1.0 )
194214
195- result = self .reward_en ("prompt" , "no tags" , target = "some target" )
196- self .assertEqual (result , 0.0 )
215+ # Longer English text without tags should get fallback reward
216+ result = self .reward_en (
217+ "prompt" ,
218+ "This is a response without thinking tags but in English language." ,
219+ target = "some target" ,
220+ )
221+ self .assertEqual (result , 0.2 )
197222
198223 def test_call_custom_reward_values (self ):
199224 """Test __call__ with custom reward values."""
200- response_ja = "<think>これは日本語です。</think>"
225+ response_ja_single = "<think>これは日本語です。</think>"
226+ response_ja_multiple = "<think>最初の考え。</think><think>次の考え。</think>"
227+ response_ja_no_tags = "これはタグなしの日本語です。"
201228 response_en = "<think>This is English.</think>"
202- response_none = "no thinking tags"
203-
204- # Test custom full reward
205- self .assertEqual (self .custom_reward ("prompt" , response_ja ), 0.9 )
206- # Test custom no_match reward
229+ response_none = ""
230+
231+ # Test custom full reward (single block, correct language)
232+ self .assertEqual (self .custom_reward ("prompt" , response_ja_single ), 0.9 )
233+ # Test custom partial reward (multiple blocks, correct language)
234+ self .assertEqual (self .custom_reward ("prompt" , response_ja_multiple ), 0.6 )
235+ # Test custom fallback reward (no blocks, correct language)
236+ self .assertEqual (self .custom_reward ("prompt" , response_ja_no_tags ), 0.3 )
237+ # Test custom no_match reward (wrong language)
207238 self .assertEqual (self .custom_reward ("prompt" , response_en ), 0.1 )
208- # Test no tags
239+ # Test empty response
209240 self .assertEqual (self .custom_reward ("prompt" , response_none ), 0.1 )
210241
211242 def test_call_zero_custom_values (self ):
0 commit comments