From f256346a457e90b6d0dd2c196793027ca18b836b Mon Sep 17 00:00:00 2001 From: Benjamin Date: Fri, 28 Feb 2025 13:17:56 +0000 Subject: [PATCH] Make CSV seperators raw strings to avoid invalid escape sequence errors in python 3.12 The string "\s+" throws SyntaxWarning: invalid escape sequence '\s' in python 3.12 --- src/pyannote/database/loader.py | 6 +++--- src/pyannote/database/util.py | 10 +++++----- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/src/pyannote/database/loader.py b/src/pyannote/database/loader.py index 6e5e471..10d8477 100644 --- a/src/pyannote/database/loader.py +++ b/src/pyannote/database/loader.py @@ -90,7 +90,7 @@ def load_trial(file_trial): """ trials = pd.read_table( - file_trial, sep="\s+", names=["reference", "uri1", "uri2"] + file_trial, sep=r"\s+", names=["reference", "uri1", "uri2"] ) for _, reference, uri1, uri2 in trials.itertuples(): @@ -289,7 +289,7 @@ def __init__(self, ctm: Path): "confidence": float, } self.data_ = pd.read_csv( - ctm, names=names, dtype=dtype, sep="\s+" + ctm, names=names, dtype=dtype, sep=r"\s+" ).groupby("uri") def __call__(self, current_file: ProtocolFile) -> Union["Doc", None]: @@ -354,7 +354,7 @@ def __init__(self, mapping: Path): "uri": str, } self.data_ = pd.read_csv( - mapping, names=names, dtype=dtype, sep="\s+" + mapping, names=names, dtype=dtype, sep=r"\s+" ) # get colum 'value' dtype, allowing us to acces it during subset diff --git a/src/pyannote/database/util.py b/src/pyannote/database/util.py index a5ba549..07d4bcf 100644 --- a/src/pyannote/database/util.py +++ b/src/pyannote/database/util.py @@ -179,7 +179,7 @@ def load_rttm(file_rttm, keep_type="SPEAKER"): file_rttm, names=names, dtype=dtype, - sep="\s+", + sep=r"\s+", keep_default_na=True, ) @@ -213,7 +213,7 @@ def load_stm(file_stm): dtype = {"uri": str, "speaker": str, "start": float, "end": float} data = pd.read_csv( file_stm, - sep="\s+", + sep=r"\s+", usecols=[0, 2, 3, 4], dtype=dtype, names=list(dtype), @@ -250,7 +250,7 @@ def load_mdtm(file_mdtm): file_mdtm, names=names, dtype=dtype, - sep="\s+", + sep=r"\s+", keep_default_na=False, ) @@ -281,7 +281,7 @@ def load_uem(file_uem): names = ["uri", "NA1", "start", "end"] dtype = {"uri": str, "start": float, "end": float} - data = pd.read_csv(file_uem, names=names, dtype=dtype, sep="\s+") + data = pd.read_csv(file_uem, names=names, dtype=dtype, sep=r"\s+") timelines = dict() for uri, parts in data.groupby("uri"): @@ -306,7 +306,7 @@ def load_lab(path, uri: str = None) -> Annotation: names = ["start", "end", "label"] dtype = {"start": float, "end": float, "label": str} - data = pd.read_csv(path, names=names, dtype=dtype, sep="\s+") + data = pd.read_csv(path, names=names, dtype=dtype, sep=r"\s+") annotation = Annotation(uri=uri) for i, turn in data.iterrows():