From f256346a457e90b6d0dd2c196793027ca18b836b Mon Sep 17 00:00:00 2001
From: Benjamin <benrogersnewsome@gmail.com>
Date: Fri, 28 Feb 2025 13:17:56 +0000
Subject: [PATCH] Make CSV seperators raw strings to avoid invalid escape
 sequence errors in python 3.12

The string "\s+" throws SyntaxWarning: invalid escape sequence '\s' in python 3.12
---
 src/pyannote/database/loader.py |  6 +++---
 src/pyannote/database/util.py   | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/pyannote/database/loader.py b/src/pyannote/database/loader.py
index 6e5e471..10d8477 100644
--- a/src/pyannote/database/loader.py
+++ b/src/pyannote/database/loader.py
@@ -90,7 +90,7 @@ def load_trial(file_trial):
     """
 
     trials = pd.read_table(
-        file_trial, sep="\s+", names=["reference", "uri1", "uri2"]
+        file_trial, sep=r"\s+", names=["reference", "uri1", "uri2"]
     )
 
     for _, reference, uri1, uri2 in trials.itertuples():
@@ -289,7 +289,7 @@ def __init__(self, ctm: Path):
             "confidence": float,
         }
         self.data_ = pd.read_csv(
-            ctm, names=names, dtype=dtype, sep="\s+"
+            ctm, names=names, dtype=dtype, sep=r"\s+"
         ).groupby("uri")
 
     def __call__(self, current_file: ProtocolFile) -> Union["Doc", None]:
@@ -354,7 +354,7 @@ def __init__(self, mapping: Path):
             "uri": str,
         }
         self.data_ = pd.read_csv(
-            mapping, names=names, dtype=dtype, sep="\s+"
+            mapping, names=names, dtype=dtype, sep=r"\s+"
         )
 
         # get colum 'value' dtype, allowing us to acces it during subset
diff --git a/src/pyannote/database/util.py b/src/pyannote/database/util.py
index a5ba549..07d4bcf 100644
--- a/src/pyannote/database/util.py
+++ b/src/pyannote/database/util.py
@@ -179,7 +179,7 @@ def load_rttm(file_rttm, keep_type="SPEAKER"):
         file_rttm,
         names=names,
         dtype=dtype,
-        sep="\s+",
+        sep=r"\s+",
         keep_default_na=True,
     )
 
@@ -213,7 +213,7 @@ def load_stm(file_stm):
     dtype = {"uri": str, "speaker": str, "start": float, "end": float}
     data = pd.read_csv(
         file_stm,
-        sep="\s+",
+        sep=r"\s+",
         usecols=[0, 2, 3, 4],
         dtype=dtype,
         names=list(dtype),
@@ -250,7 +250,7 @@ def load_mdtm(file_mdtm):
         file_mdtm,
         names=names,
         dtype=dtype,
-        sep="\s+",
+        sep=r"\s+",
         keep_default_na=False,
     )
 
@@ -281,7 +281,7 @@ def load_uem(file_uem):
 
     names = ["uri", "NA1", "start", "end"]
     dtype = {"uri": str, "start": float, "end": float}
-    data = pd.read_csv(file_uem, names=names, dtype=dtype, sep="\s+")
+    data = pd.read_csv(file_uem, names=names, dtype=dtype, sep=r"\s+")
 
     timelines = dict()
     for uri, parts in data.groupby("uri"):
@@ -306,7 +306,7 @@ def load_lab(path, uri: str = None) -> Annotation:
 
     names = ["start", "end", "label"]
     dtype = {"start": float, "end": float, "label": str}
-    data = pd.read_csv(path, names=names, dtype=dtype, sep="\s+")
+    data = pd.read_csv(path, names=names, dtype=dtype, sep=r"\s+")
 
     annotation = Annotation(uri=uri)
     for i, turn in data.iterrows():