Skip to content

Conversation

aricohen93
Copy link
Collaborator

Description

The LLMSpanClassifier component is a LLM attribute predictor.

Usage : https://github.com/aphp/edsnlp/blob/llm-qualifier/docs/tutorials/qualifying-entities-with-llm.md

Checklist

  • If this PR is a bug fix, the bug is documented in the test suite.
  • Changes were documented in the changelog (pending section).
  • If necessary, changes were made to the documentation (eg new pipeline).

@aricohen93 aricohen93 requested a review from Thomzoy August 20, 2025 19:11
Copy link

Docs preview URL

https://edsnlp-llm-qualifier.vercel.app/

Copy link

Coverage Report

NameStmtsMiss∆ MissCover
edsnlp/pipes/qualifiers/llm/llm_utils.py

New missing coverage at lines 69-82 !

         # Set OpenAI's API key and API base to use vLLM's API server.
-         self.model_name = model_name
-         self.temperature = temperature
-         self.max_tokens = max_tokens
-         self.extra_body = extra_body
-         self.response_format = response_format
-         self.n_completions = n_completions
-         self.timeout = timeout
-         self.kwargs = kwargs
-         self.n_concurrent_tasks = n_concurrent_tasks
-         self.responses = []
-         self.lock = asyncio.Lock()
- 
-         self.client = AsyncOpenAI(
             api_key=api_key,
New missing coverage at lines 92-94 !
         """
-         for i, messages in enumerate(batch_messages):
-             yield (i, messages)
New missing coverage at lines 101-115 !
         """
-         if (response_format is not None) and (isinstance(response, ChatCompletion)):
-             prediction = [
-                 parse_json_response(
-                     choice.message.content.strip(), response_format=response_format
-                 )
-                 for choice in response.choices
-             ]
-             if self.n_completions == 1:
-                 prediction = prediction[0]
- 
-             return prediction
- 
-         else:
-             return response
New missing coverage at lines 137-156 !
-         raw_response = await asyncio.wait_for(
-             self.client.chat.completions.create(
-                 model=self.model_name,
-                 messages=messages,
-                 max_tokens=self.max_tokens,
-                 n=self.n_completions,
-                 temperature=self.temperature,
-                 stream=False,
-                 response_format=self.response_format,
-                 extra_body=self.extra_body,
-                 **self.kwargs,
-             ),
-             timeout=self.timeout,
-         )
- 
-         # Parse the response
-         parsed_response = self.parse_messages(raw_response, self.response_format)
- 
-         return id, parsed_response
New missing coverage at line 159 !
         """ """
-         self.responses.append((p_id, abbreviation_list))
New missing coverage at lines 168-187 !
-         async for (
-             idx,
-             message,
-         ) in id_messages_tuples:
-             logger.info(idx)
- 
-             try:
-                 idx, response = await self.call_llm(idx, message)
- 
-                 logger.info(f"Worker {name} has finished process {idx}")
-             except Exception as e:
-                 logger.error(f"[{name}] Exception raised on chunk {idx}\n{e}")
-                 if self.n_completions == 1:
-                     response = ""
-                 else:
-                     response = [""] * self.n_completions
- 
-             async with self.lock:
-                 self.store_responses(
                     idx,
New missing coverage at lines 192-205 !
     def sort_responses(self):
-         sorted_responses = []
-         for i, output in sorted(self.responses, key=lambda x: x[0]):
-             if isinstance(output, ChatCompletion):
-                 if self.n_completions == 1:
-                     sorted_responses.append(output.choices[0].message.content.strip())
-                 else:
-                     sorted_responses.append(
-                         [choice.message.content.strip() for choice in output.choices]
-                     )
-             else:
-                 sorted_responses.append(output)
- 
-         return sorted_responses
New missing coverage at lines 207-209 !
     def clean_storage(self):
-         del self.responses
-         self.responses = []
New missing coverage at lines 222-236 !
         # Shared prompt generator
-         id_messages_tuples = self.async_id_message_generator(batch_messages)
- 
-         # n concurrent tasks
-         tasks = {
-             asyncio.create_task(self.async_worker(f"Worker-{i}", id_messages_tuples))
-             for i in range(self.n_concurrent_tasks)
-         }
- 
-         await asyncio.gather(*tasks)
-         tasks.clear()
-         predictions = self.sort_responses()
-         self.clean_storage()
- 
-         return predictions
New missing coverage at lines 336-347 !
-     if (response_format is not None) and (response_format.get("type") == "json_schema"):
-         try:
-             return json.loads(response)
-         except json.JSONDecodeError:
-             if errors == "ignore":
-                 return {}
-             else:
-                 return response
-     else:
-         # If no response format is specified, return the raw response
-         return response

926110833.70%
edsnlp/pipes/qualifiers/llm/llm_qualifier.py

New missing coverage at line 234 !

     def attributes(self) -> Attributes:
-         return {qlf: labels for qlf, labels, _ in self.bindings}
New missing coverage at lines 238-250 !
     def attributes(self, value: Attributes):
-         bindings = []
-         for qlf, labels in value.items():
-             groups = [group for group in self.bindings if group[0] == qlf]
-             if len(groups) > 1:
-                 raise ValueError(
-                     f"Attribute {qlf} has different label filters: "
-                     f"{[g[0] for g in groups]}. Please use the `update_bindings` "
-                     f"method to update the labels."
-                 )
-             if groups:
-                 bindings.append((qlf, labels, groups[0][2]))
-         self.bindings = bindings
New missing coverage at line 264 !
         if self.context_getter is None or not callable(self.context_getter):
-             contexts = list(get_spans(doc, self.context_getter))
         else:
New missing coverage at line 277 !
             else:
-                 final_user_prompt = context_text
             if self.suffix_prompt:
New missing coverage at line 360 !
             else:
-                 mapped_value = None
         return mapped_value
New missing coverage at line 378 !
                     if value is None:
-                         mapped_value = None
                     elif self.response_mapping is not None:
New missing coverage at line 383 !
                     else:
-                         mapped_value = value
                     BINDING_SETTERS[qlf](span, mapped_value)

113131888.50%
edsnlp/utils/asynchronous.py

New missing coverage at lines 31-35 !

     if loop and loop.is_running():
-         import nest_asyncio
- 
-         nest_asyncio.apply()
-         return asyncio.get_running_loop().run_until_complete(coro)
     else:

133476.92%
TOTAL1176530013097.45%
Files without new missing coverage
NameStmtsMiss∆ MissCover
edsnlp/utils/torch.py

Was already missing at line 102

 def load_pruned_obj(obj, _):
-     return obj
Was already missing at line 118
     def save_align_devices_hook(pickler, obj):
-         pickler.save_reduce(load_align_devices_hook, (obj.__dict__,), obj=obj)
Was already missing at lines 121-128
     def load_align_devices_hook(state):
-         state["execution_device"] = MAP_LOCATION
  ...
-     AlignDevicesHook = None
Was already missing at line 143
             if torch.Tensor in copyreg.dispatch_table:
-                 old_dispatch[torch.Tensor] = copyreg.dispatch_table[torch.Tensor]
             copyreg.pickle(torch.Tensor, reduce_empty)

839089.16%
edsnlp/utils/span_getters.py

Was already missing at lines 78-80

     if span_getter is None:
-         yield doc[:], None
-         return
     if callable(span_getter):
Was already missing at lines 81-83
     if callable(span_getter):
-         yield from span_getter(doc)
-         return
     for key, span_filter in span_getter.items():
Was already missing at line 85
         if key == "*":
-             candidates = (
                 (span, group) for group in doc.spans.values() for span in group
Was already missing at lines 94-97
         else:
-             for span, group in candidates:
-                 if span.label_ in span_filter:
-                     yield span, group
Was already missing at line 101
     if callable(span_setter):
-         span_setter(doc, matches)
     else:
Was already missing at line 181
             elif isinstance(v, str):
-                 new_value[k] = [v]
             elif isinstance(v, list) and all(isinstance(i, str) for i in v):

23110095.67%
edsnlp/utils/resources.py

Was already missing at line 33

     if not verbs:
-         return conjugated_verbs

241095.83%
edsnlp/utils/numbers.py

Was already missing at line 34

     else:
-         string = s
     string = string.lower().strip()
Was already missing at lines 38-41
         return int(string)
-     except ValueError:
-         parsed = DIGITS_MAPPINGS.get(string, None)
-         return parsed

164075.00%
edsnlp/utils/filter.py

Was already missing at line 206

     if isinstance(label, int):
-         return [span for span in spans if span.label == label]
     else:

741098.65%
edsnlp/tune.py

Was already missing at line 169

             )
-         except RuntimeError as e:
             if "zero total variance" in str(e):  # pragma: no cover
Was already missing at line 684
         else:
-             n_trials = compute_n_trials(
                 gpu_hours, compute_time_per_trial(study, ema=True)

2892099.31%
edsnlp/training/trainer.py

Was already missing at line 88

     if result is None:
-         result = {}
     if isinstance(x, dict):
Was already missing at lines 762-769
                                     accelerator.backward(loss)
-                         except torch.cuda.OutOfMemoryError:
  ...
-                             raise
                         del loss
Was already missing at lines 808-810
                         ) > grad_max_dev * math.sqrt(grad_var):
-                             spike = True
-                             cumulated_data["spikes"] += 1
                         else:
Was already missing at line 817
                         if spike and grad_dev_policy == "clip_mean":
-                             torch.nn.utils.clip_grad_norm_(
                                 grad_params, grad_mean, norm_type=2
Was already missing at line 821
                         elif spike and grad_dev_policy == "clip_threshold":
-                             torch.nn.utils.clip_grad_norm_(
                                 grad_params,

3069097.06%
edsnlp/reducers.py

Was already missing at line 115

     if not hasattr(module, "__file__"):
-         return True
     if module.__file__ is None:
Was already missing at line 117
     if module.__file__ is None:
-         return False
     # Hack to avoid copying the full module dict

672097.01%
edsnlp/processing/spark.py

Was already missing at line 50

         getActiveSession = SparkSession.getActiveSession
-     except AttributeError:

471097.87%
edsnlp/processing/multiprocessing.py

Was already missing at lines 393-398

                 self.on_stop()
-         except BaseException as e:
  ...
-             self.main_control_queue.put(e)
         finally:
Was already missing at lines 402-404
                     pass
-             except StopSignal:
-                 pass
             for name, queue in self.consumer_queues(stage):
Was already missing at line 542
                     while schedule[task_idx] is None:
-                         task_idx = (task_idx + 1) % len(schedule)
Was already missing at lines 606-608
             if isinstance(docs, StreamSentinel):
-                 self.active_batches[stage].append([None, None, None, docs])
-                 continue
             batch_id = str(hash(tuple(id(x) for x in docs)))[-8:] + "-" + self.uid
Was already missing at lines 1121-1127
                 if out[0].kind == requires_sentinel:
-                     missing_sentinels -= 1
  ...
-                         missing_sentinels = len(self.cpu_worker_names)
                 continue

62614097.76%
edsnlp/processing/deprecated_pipe.py

Was already missing at lines 207-209

         def converter(doc):
-             res = results_extractor(doc)
-             return (
                 [{"note_id": doc._.note_id, **row} for row in res]

572096.49%
edsnlp/pipes/trainable/span_linker/span_linker.py

Was already missing at lines 402-404

             if self.reference_mode == "synonym":
-                 embeds = embeds.to(new_lin.weight)
-                 new_lin.weight.data = embeds
             else:

1732098.84%
edsnlp/pipes/trainable/span_classifier/span_classifier.py

Was already missing at line 373

         if not all(keep_bindings):
-             logger.warning(
                 "Some attributes have no labels or values and have been removed:"

1641099.39%
edsnlp/pipes/trainable/ner_crf/ner_crf.py

Was already missing at line 301

         if self.labels is not None and not self.infer_span_setter:
-             return
Was already missing at lines 309-311
             if callable(self.target_span_getter):
-                 for span in get_spans(doc, self.target_span_getter):
-                     inferred_labels.add(span.label_)
             else:

1723098.26%
edsnlp/pipes/trainable/layers/crf.py

Was already missing at line 21

     # out: 2 * N * O
-     return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).logsumexp(-2)
Was already missing at line 29
     # out: 2 * N * O
-     return (log_A.unsqueeze(-1) + log_B.unsqueeze(-3)).max(-2)
Was already missing at line 98
         if learnable_transitions:
-             self.transitions = torch.nn.Parameter(
                 torch.zeros_like(forbidden_transitions, dtype=torch.float)
Was already missing at line 108
         if learnable_transitions and with_start_end_transitions:
-             self.start_transitions = torch.nn.Parameter(
                 torch.zeros(num_tags, dtype=torch.float)
Was already missing at line 117
         if learnable_transitions and with_start_end_transitions:
-             self.end_transitions = torch.nn.Parameter(
                 torch.zeros(num_tags, dtype=torch.float)

1375096.35%
edsnlp/pipes/trainable/embeddings/transformer/transformer.py

Was already missing at line 165

         if quantization is not None:
-             kwargs["quantization_config"] = quantization
Was already missing at line 185
         if self.cls_token_id is None:
-             [self.cls_token_id] = self.tokenizer.convert_tokens_to_ids(
                 [self.tokenizer.special_tokens_map["bos_token"]]
Was already missing at line 189
         if self.sep_token_id is None:
-             [self.sep_token_id] = self.tokenizer.convert_tokens_to_ids(
                 [self.tokenizer.special_tokens_map["eos_token"]]

1663098.19%
edsnlp/pipes/qualifiers/reported_speech/reported_speech.py

Was already missing at lines 24-28

         return "REPORTED"
-     elif token._.rspeech is False:
-         return "DIRECT"
-     else:
-         return None

1003097.00%
edsnlp/pipes/qualifiers/negation/negation.py

Was already missing at line 28

     else:
-         return None

1011099.01%
edsnlp/pipes/qualifiers/hypothesis/hypothesis.py

Was already missing at line 27

     else:
-         return None

981098.98%
edsnlp/pipes/qualifiers/history/history.py

Was already missing at lines 26-32

 def history_getter(token: Union[Token, Span]) -> Optional[str]:
-     if token._.history is True:
-         return "ATCD"
-     elif token._.history is False:
-         return "CURRENT"
-     else:
-         return None
Was already missing at lines 353-359
                 )
-             except ValueError:
  ...
-                 note_datetime = None
Was already missing at lines 368-374
                 )
-             except ValueError:
  ...
-                 birth_datetime = None
Was already missing at lines 440-443
                         )
-                     except ValueError as e:
-                         absolute_date = None
-                         logger.warning(
                             "In doc {}, the following date {} raises this error: {}. "

18014092.22%
edsnlp/pipes/qualifiers/family/family.py

Was already missing at line 27

     else:
-         return None

831098.80%
edsnlp/pipes/ner/tnm/model.py

Was already missing at line 147

     def __str__(self):
-         return self.norm()
Was already missing at line 171
             )
-             exclude_unset = skip_defaults

1122098.21%
edsnlp/pipes/ner/scores/sofa/sofa.py

Was already missing at line 32

             if not assigned:
-                 continue
             if assigned.get("method_max") is not None:
Was already missing at line 40
             else:
-                 method = "Non précisée"

252092.00%
edsnlp/pipes/ner/scores/elston_ellis/patterns.py

Was already missing at line 26

         if x <= 5:
-             return 1
Was already missing at lines 32-36
         else:
-             return 3
- 
-     except ValueError:
-         return None

214080.95%
edsnlp/pipes/ner/scores/charlson/patterns.py

Was already missing at lines 21-23

             return int(extracted_score)
-     except ValueError:
-         return None

132084.62%
edsnlp/pipes/ner/disorders/solid_tumor/solid_tumor.py

Was already missing at lines 131-137

         for span in spans:
-             span.label_ = "solid_tumor"
  ...
-             yield span

386084.21%
edsnlp/pipes/ner/disorders/peripheral_vascular_disease/peripheral_vascular_disease.py

Was already missing at line 108

                 if "peripheral" not in span._.assigned.keys():
-                     continue

161093.75%
edsnlp/pipes/ner/disorders/diabetes/diabetes.py

Was already missing at line 131

                 # Mostly FP
-                 continue
Was already missing at line 134
             elif self.has_far_complications(span):
-                 span._.status = 2
Was already missing at line 145
         if next(iter(self.complication_matcher(context)), None) is not None:
-             return True
         return False

303090.00%
edsnlp/pipes/ner/disorders/connective_tissue_disease/connective_tissue_disease.py

Was already missing at line 104

                 # Huge change of FP / Title section
-                 continue

151093.33%
edsnlp/pipes/ner/disorders/ckd/ckd.py

Was already missing at lines 121-124

             dfg_value = float(dfg_span.text.replace(",", ".").strip())
-         except ValueError:
-             logger.trace(f"DFG value couldn't be extracted from {dfg_span.text}")
-             return False

303090.00%
edsnlp/pipes/ner/disorders/cerebrovascular_accident/cerebrovascular_accident.py

Was already missing at lines 112-114

             if span._.source == "ischemia":
-                 if "brain" not in span._.assigned.keys():
-                     continue

182088.89%
edsnlp/pipes/ner/adicap/models.py

Was already missing at line 15

     def norm(self) -> str:
-         return self.code
Was already missing at line 18
     def __str__(self):
-         return self.norm()

162087.50%
edsnlp/pipes/misc/split/split.py

Was already missing at lines 183-185

         if max_length <= 0 and self.regex is None:
-             yield doc
-             return

732097.26%
edsnlp/pipes/misc/sections/sections.py

Was already missing at line 126

         if sections is None:
-             sections = patterns.sections
         sections = dict(sections)

451097.78%
edsnlp/pipes/misc/quantities/quantities.py

Was already missing at lines 147-149

     def __getitem__(self, item: int):
-         assert isinstance(item, int)
-         return [self][item]
Was already missing at lines 160-163
     def __eq__(self, other: Any):
-         if isinstance(other, SimpleQuantity):
-             return self.convert_to(other.unit) == other.value
-         return False
Was already missing at line 166
         if other.unit == self.unit:
-             return SimpleQuantity(self.value + other.value, self.unit, self.registry)
         return SimpleQuantity(
Was already missing at line 193
             return self.convert_to(other_unit)
-         except KeyError:
             raise AttributeError(f"Unit {other_unit} not found")
Was already missing at line 198
     def verify(cls, ent):
-         return True
Was already missing at line 264
     def __lt__(self, other: Union[SimpleQuantity, "RangeQuantity"]):
-         return max(self.convert_to(other.unit)) < min((part.value for part in other))
Was already missing at line 275
             return self.convert_to(other.unit) == other.value
-         return False
Was already missing at line 289
     def verify(cls, ent):
-         return True
Was already missing at line 888
         if snippet.end != last and doclike.doc[last: snippet.end].text.strip() == "":
-             pseudo.append("w")
         pseudo = "".join(pseudo)
Was already missing at line 1069
                             if start_line is None:
-                                 continue
Was already missing at lines 1100-1102
                         unit_norm = self.unit_followers[unit_before.label_]
-                 except (KeyError, AttributeError, IndexError):
-                     pass
Was already missing at line 1145
             ):
-                 ent = doc[unit_text.start: number.end]
             else:
Was already missing at lines 1152-1154
                 dims = self.unit_registry.parse_unit(unit_norm)[0]
-             except KeyError:
-                 continue
Was already missing at lines 1260-1262
                     last._.set(last.label_, new_value)
-                 except (AttributeError, TypeError):
-                     merged.append(ent)
             else:

44020095.45%
edsnlp/pipes/misc/dates/models.py

Was already missing at line 165

                     else:
-                         d["month"] = note_datetime.month
                 if self.day is None:
Was already missing at lines 169-175
             else:
-                 if self.year is None:
  ...
-                     d["day"] = default_day
Was already missing at lines 183-185
                 return dt
-             except ValueError:
-                 return None
Was already missing at line 201
         else:
-             return None
Was already missing at line 217
         if self.second:
-             norm += f"{self.second:02}s"

20611094.66%
edsnlp/pipes/misc/dates/dates.py

Was already missing at line 249

         if isinstance(absolute, str):
-             absolute = [absolute]
         if isinstance(relative, str):
Was already missing at line 251
         if isinstance(relative, str):
-             relative = [relative]
         if isinstance(duration, str):
Was already missing at line 253
         if isinstance(duration, str):
-             relative = [duration]
         if isinstance(false_positive, str):
Was already missing at lines 357-366
             if self.merge_mode == "align":
-                 alignments = align_spans(matches, spans, sort_by_overlap=True)
  ...
-                         matches.append(span)
Was already missing at lines 462-464
                 if v1.mode == Mode.DURATION:
-                     m1 = Bound.FROM if v2.bound == Bound.UNTIL else Bound.UNTIL
-                     m2 = v2.mode or Bound.FROM
                 elif v2.mode == Mode.DURATION:

15314090.85%
edsnlp/pipes/misc/consultation_dates/consultation_dates.py

Was already missing at line 131

         else:
-             self.date_matcher = None
Was already missing at line 134
         if not consultation_mention:
-             consultation_mention = []
         elif consultation_mention is True:

482095.83%
edsnlp/pipes/core/normalizer/__init__.py

Was already missing at line 7

 def excluded_or_space_getter(t):
-     return t.is_space or t.tag_ == "EXCLUDED"

51080.00%
edsnlp/pipes/core/endlines/endlines.py

Was already missing at lines 160-164

         if end_lines_model is None:
-             path = build_path(__file__, "base_model.pkl")
- 
-             with open(path, "rb") as inp:
-                 self.model = pickle.load(inp)
         elif isinstance(end_lines_model, str):
Was already missing at lines 167-169
                 self.model = pickle.load(inp)
-         elif isinstance(end_lines_model, EndLinesModel):
-             self.model = end_lines_model
         else:
Was already missing at line 200
         ):
-             return "ENUMERATION"
Was already missing at line 287
         if np.isnan(sigma):
-             sigma = 1

897092.13%
edsnlp/pipes/core/contextual_matcher/contextual_matcher.py

Was already missing at lines 241-243

             ):
-                 to_keep = False
-                 break

1302098.46%
edsnlp/patch_spacy.py

Was already missing at lines 67-69

             # if module is reloaded.
-             existing_func = registry.factories.get(internal_name)
-             if not util.is_same_func(factory_func, existing_func):
                 raise ValueError(

312093.55%
edsnlp/package.py

Was already missing at lines 474-476

             version = version or pyproject["project"]["version"]
-         except (KeyError, TypeError):
-             version = "0.1.0"
         name = name or pyproject["project"]["name"]
Was already missing at line 480
         else:
-             main_package = None
         model_package = snake_case(name.lower())

2143098.60%
edsnlp/metrics/span_attribute.py

Was already missing at lines 67-69

         )
-         assert attributes is None
-         attributes = kwargs.pop("qualifiers")
     if attributes is None:

752097.33%
edsnlp/matchers/simstring.py

Was already missing at line 280

     if custom:
-         attr = attr[1:].lower()
Was already missing at line 295
             if custom:
-                 token_text = getattr(token._, attr)
             else:

1462098.63%
edsnlp/language.py

Was already missing at line 103

             if last != begin:
-                 logger.warning(
                     "Missed some characters during"

511098.04%
edsnlp/data/standoff.py

Was already missing at line 38

     def __init__(self, ann_file, line):
-         super().__init__(f"File {ann_file}, unrecognized Brat line {line}")
Was already missing at line 192
                         )
-                 except Exception:
                     raise Exception(

1862098.92%
edsnlp/data/polars.py

Was already missing at line 36

         if hasattr(data, "collect"):
-             data = data.collect()
         assert isinstance(data, pl.DataFrame)

551098.18%
edsnlp/data/json.py

Was already missing at line 81

                 return records
-         except Exception as e:
             raise Exception(f"Cannot read {file}: {e}")

1121099.11%
edsnlp/data/converters.py

Was already missing at line 428

                 elif key == "XPOS":
-                     word.tag_ = value
                 elif key == "FEATS":
Was already missing at line 822
         for attr in bool_attributes:
-             self.default_attributes[attr] = False
         self.opener = opener or self.PRESETS[preset]["opener"]
Was already missing at line 830
         if self.keep_raw_attribute_values:
-             return value
         try:
Was already missing at lines 869-872
                     )
-                 except StopIteration:
-                     warnings.warn(f"Unmatched closing tag for '{sep.group()}'")
-                     continue
                 start, start_label, start_attrs = starts.pop(idx)
Was already missing at line 900
         ):
-             if not Span.has_extension(dst):
                 Span.set_extension(dst, default=None)
Was already missing at line 907
             if span is None:
-                 continue
             for k, v in attrs.items():
Was already missing at lines 922-925
         for attr, value in self.default_attributes.items():
-             for span in spans:
-                 if span._.get(attr) is None:
-                     span._.set(attr, value)
Was already missing at line 960
     if isinstance(converter, type):
-         return converter(**kwargs), {}
     return converter, validate_kwargs(converter, kwargs)

31711096.53%
edsnlp/data/conll.py

Was already missing at lines 81-83

             )
-         except StopIteration:
-             cols = DEFAULT_COLUMNS
             warnings.warn(
Was already missing at lines 92-96
         if not line:
-             if doc["words"]:
-                 yield doc
-                 doc = {"words": []}
-             continue
         if line.startswith("#"):

766092.11%
edsnlp/core/torch_component.py

Was already missing at line 392

             if hasattr(self, "compiled"):
-                 res = self.compiled(batch)
             else:
Was already missing at line 438
         """
-         return self.preprocess(doc)
Was already missing at line 463
         if object.__repr__(self) in exclude:
-             return
         exclude.add(object.__repr__(self))

1873098.40%
edsnlp/core/stream.py

Was already missing at lines 190-192

                 if isinstance(batch, StreamSentinel):
-                     yield batch
-                     continue
                 results = []
Was already missing at lines 1007-1009
                 elif op.batch_fn is None:
-                     batch_size = op.size
-                     batch_fn = batchify
                 else:

3564098.88%
edsnlp/core/pipeline.py

Was already missing at line 605

             if name in exclude:
-                 continue
             if name not in components:
Was already missing at lines 716-719
         """
-         res = Stream.ensure_stream(docs)
-         res = res.map(functools.partial(self.preprocess, supervision=supervision))
-         return res

4464099.10%
edsnlp/connectors/omop.py

Was already missing at line 69

         if not isinstance(row.ents, list):
-             continue
Was already missing at line 87
             else:
-                 doc.spans[span.label_].append(span)
Was already missing at line 127
     if df.note_id.isna().any():
-         df["note_id"] = range(len(df))
Was already missing at line 171
         if i > 0:
-             df.term_modifiers += ";"
         df.term_modifiers += ext + "=" + df[ext].astype(str)

844095.24%

278 files skipped due to complete coverage.

Coverage failure: total of 97.45% is less than 98.07% ❌

Copy link

Quality Gate Failed Quality Gate failed

Failed conditions
6 Security Hotspots

See analysis details on SonarQube Cloud

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant