From 315fc20ceb442bb8e61e684ef2a6afc0edf89d93 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Fri, 15 Jun 2018 09:58:05 +0530
Subject: [PATCH 01/15] LMNN Benchmarks

---
 config.yaml               |  77 +++++++++++++
 datasets/dataset-urls.txt |   2 +
 methods/mlpack/lmnn.py    | 227 ++++++++++++++++++++++++++++++++++++++
 methods/shogun/lmnn.py    | 114 +++++++++++++++++++
 tests/benchmark_lmnn.py   | 100 +++++++++++++++++
 5 files changed, 520 insertions(+)
 create mode 100644 methods/mlpack/lmnn.py
 create mode 100644 methods/shogun/lmnn.py
 create mode 100644 tests/benchmark_lmnn.py

diff --git a/config.yaml b/config.yaml
index 8662f8e..b473edb 100644
--- a/config.yaml
+++ b/config.yaml
@@ -794,6 +794,70 @@ methods:
                 normalize: True
                 seed: 42
 
+    LMNN:
+        run: ['metric']
+        script: methods/mlpack/lmnn.py
+        format: [csv, txt]
+        datasets:
+            - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
+                      'datasets/wine.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+              options:
+                passes: 10
+                range: 25
+                seed: 42
+
+            - files: ['datasets/letter_recognition.csv',
+                      'datasets/shuttle_train.csv', 'datasets/isolet.csv',
+                      'datasets/covtype.csv', 'datasets/corel-histogram.csv',
+                      'datasets/mnist_all.csv', 'datasets/Twitter.csv']
+              options:
+                passes: 3
+                range: 100
+                seed: 42
+
+            - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
+                      'datasets/wine.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+              options:
+                passes: 5
+                optimizer: bbsgd
+                seed: 42
+
+            - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
+                      'datasets/wine.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+              options:
+                passes: 5
+                optimizer: sgd
+                range: 50
+                step_size: 1e-07
+                seed: 42
+
+            - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
+                      'datasets/wine.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+              options:
+                num_targets: 5
+                max_iterations: 2000
+                optimizer: lbfgs
+                seed: 42
+                wolfe: 0.5
+                range: 50
+
+            - files: ['datasets/covtype.csv',
+                      'datasets/shuttle_train.csv', 'datasets/isolet.csv',
+                      'datasets/mnist_all.csv']
+              options:
+                max_iterations: 2000
+                optimizer: lbfgs
+                seed: 42
+                range: 100
+
     HMMTRAIN:
         run: ['metric']
         script: methods/mlpack/hmm_train.py
@@ -2174,6 +2238,19 @@ methods:
               options:
                 lambda1: 0.01
 
+    LMNN:
+        run: ['metric']
+        script: methods/shogun/lmnn.py
+        format: [csv, txt]
+        datasets:
+            - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
+                      'datasets/wine.csv', 'datasets/ionosphere.csv',
+                      'datasets/shuttle_train.csv', 'datasets/isolet.csv',
+                      'datasets/covtype.csv', 'datasets/corel-histogram.csv',
+                      'datasets/mnist_all.csv', 'datasets/Twitter.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+
     QDA:
             run: ['metric','metric']
             script: methods/shogun/qda.py
diff --git a/datasets/dataset-urls.txt b/datasets/dataset-urls.txt
index 1d7a0e2..6713bc9 100644
--- a/datasets/dataset-urls.txt
+++ b/datasets/dataset-urls.txt
@@ -10,6 +10,7 @@ artificial_1DSignal*.csv mlpack.org/datasets/artificial_1DSignal.tar.gz
 artificial_2DSignal*.csv mlpack.org/datasets/artificial_2DSignal.tar.gz
 artificial_40D*.csv mlpack.org/datasets/artificial_40D.tar.gz
 artificial_5DSignal*.csv mlpack.org/datasets/artificial_5DSignal.tar.gz
+balance_scale*.csv mlpack.org/datasets/balance_scale.tar.gz
 bank8FM.csv mlpack.org/datasets/bank8FM.tar.gz
 cal_housing.csv mlpack.org/datasets/cal_housing.tar.gz
 circle_data.csv mlpack.org/datasets/circle.tar.gz
@@ -25,6 +26,7 @@ faces.csv mlpack.org/datasets/faces.tar.gz
 ionosphere.csv mlpack.org/datasets/ionosphere.tar.gz
 iris*.csv mlpack.org/datasets/iris.tar.gz
 isolet*.csv mlpack.org/datasets/isolet.tar.gz
+letter_recognition*.csv http://www.mlpack.org/datasets/letter_recognition.tar.gz
 madelon*.csv mlpack.org/datasets/madelon.tar.gz
 mammography*.csv mlpack.org/datasets/mammography.tar.gz
 mnist*.csv mlpack.org/datasets/mnist.tar.gz
diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
new file mode 100644
index 0000000..00332b7
--- /dev/null
+++ b/methods/mlpack/lmnn.py
@@ -0,0 +1,227 @@
+'''
+  @file nca.py
+  @author Manish Kumar
+
+  Class to benchmark the mlpack Large Margin Nearest Neighbors method.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+from util.log import *
+from util.profiler import *
+
+import shlex
+
+try:
+  import subprocess32 as subprocess
+except ImportError:
+  import subprocess
+
+import re
+import collections
+
+'''
+This class implements the Large Margin Nearest Neighbors benchmark.
+'''
+class LMNN(object):
+
+  '''
+  Create the Large Margin Nearest Neighbors benchmark instance, show some
+  informations and return the instance.
+
+  @param dataset - Input dataset to perform LMNN on.
+  @param timeout - The time until the timeout. Default no timeout.
+  @param path - Path to the mlpack executable.
+  @param verbose - Display informational messages.
+  '''
+  def __init__(self, dataset, timeout=0, path=os.environ["BINPATH"],
+      verbose=True, debug=os.environ["DEBUGBINPATH"]):
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+    self.timeout = timeout
+    self.debug = debug
+
+    # Get description from executable.
+    cmd = shlex.split(self.path + "mlpack_LMNN -h")
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+    else:
+      # Use regular expression pattern to get the description.
+      pattern = re.compile(br"""(.*?)Optional.*?options:""",
+          re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+      match = pattern.match(s)
+      if not match:
+        Log.Warn("Can't parse description", self.verbose)
+        description = ""
+      else:
+        description = match.group(1)
+
+      self.description = description
+
+  '''
+  Destructor to clean up at the end. Use this method to remove created files.
+  '''
+  def __del__(self):
+    Log.Info("Clean up.", self.verbose)
+    filelist = ["gmon.out", "distance.csv"]
+    for f in filelist:
+      if os.path.isfile(f):
+        os.remove(f)
+
+  '''
+  Given an input dict of options, return an output string that the program can
+  use.
+  '''
+  def OptionsToStr(self, options):
+    optionsStr = ""
+    if "optimizer" in options:
+      optionsStr = "-O " + str(options.pop("optimizer"))
+    if "num_targets" in options:
+      optionsStr = optionsStr + " -k " + str(options.pop("num_targets"))
+    if "regularization" in options:
+      optionsStr = optionsStr + " -r " + str(options.pop("regularization"))
+    if "tolerance" in options:
+      optionsStr = optionsStr + " -t " + str(options.pop("tolerance"))
+    if "batch_delta" in options:
+      optionsStr = optionsStr + " -d " + str(options.pop("batch_delta"))
+    if "range" in options:
+      optionsStr = optionsStr + " -R " + str(options.pop("range"))
+    if "step_size" in options:
+      optionsStr = optionsStr + " -a " + str(options.pop("step_size"))
+    if "batch_size" in options:
+      optionsStr = optionsStr + " -b " + str(options.pop("batch_size"))
+    if "passes" in options:
+      optionsStr = optionsStr + " -p " + str(options.pop("passes"))
+    if "max_iterations" in options:
+      optionsStr = optionsStr + " -n " + str(options.pop("max_iterations"))
+    if "num_basis" in options:
+      optionsStr = optionsStr + " -B " + str(options.pop("num_basis"))
+    if "wolfe" in options:
+      optionsStr = optionsStr + " -w " + str(options.pop("wolfe"))
+    if "normalize" in options:
+      optionsStr = optionsStr + " -N"
+      options.pop("normalize")
+    if "linear_scan" in options:
+      optionsStr = optionsStr + " -L"
+      options.pop("linear_scan")
+    if "seed" in options:
+      optionsStr = optionsStr + " --seed " + str(options.pop("seed"))
+
+    if len(options) > 0:
+      Log.Fatal("Unknown parameters: " + str(options))
+      raise Exception("unknown parameters")
+
+    return optionsStr
+
+  '''
+  Run valgrind massif profiler on the Large Margin Nearest Neighbors method.
+  If the method has been successfully completed the report is saved in the
+  specified file.
+
+  @param options - Extra options for the method.
+  @param fileName - The name of the massif output file.
+  @param massifOptions - Extra massif options.
+  @return Returns False if the method was not successful, if the method was
+  successful save the report file in the specified file.
+  '''
+  def RunMemory(self, options, fileName, massifOptions="--depth=2"):
+    Log.Info("Perform LMNN Memory Profiling.", self.verbose)
+
+    # If the dataset contains two files then the second file is the labels file.
+    # In this case we add this to the command line.
+    if len(self.dataset) == 2:
+      cmd = shlex.split(self.debug + "mlpack_lmnn -i " + self.dataset[0] + " -l "
+          + self.dataset[1] + " -v -o distance.csv "
+          + self.OptionsToStr(options))
+    else:
+      cmd = shlex.split(self.debug + "mlpack_lmnn -i " + self.dataset +
+          " -v -o distance.csv " + self.OptionsToStr(options))
+
+    return Profiler.MassifMemoryUsage(cmd, fileName, self.timeout, massifOptions)
+
+  '''
+  Perform Large Margin Nearest Neighbors. If the method has been
+  successfully completed return the elapsed time in seconds.
+
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or a negative value if the method was not
+  successful.
+  '''
+  def RunMetrics(self, options):
+    Log.Info("Perform Large Margin Nearest Neighbors.", self.verbose)
+
+    # If the dataset contains two files then the second file is the labels file.
+    # In this case we add this to the command line.
+    if len(self.dataset) == 2:
+      cmd = shlex.split(self.path + "mlpack_lmnn -i " + self.dataset[0] + " -l "
+          + self.dataset[1] + " -v -o distance.csv "
+          + self.OptionsToStr(options))
+    else:
+      cmd = shlex.split(self.path + "mlpack_lmnn -i " + self.dataset +
+          " -v -o distance.csv " + self.OptionsToStr(options))
+
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disable all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
+          timeout=self.timeout)
+    except subprocess.TimeoutExpired as e:
+      Log.Warn(str(e))
+      return -2
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Datastructure to store the results.
+    metrics = {}
+
+    # Parse data: runtime.
+    timer = self.ParseTimer(s)
+
+    if timer != -1:
+      metrics['Runtime'] = timer.total_time - timer.saving_data - timer.loading_data
+
+      Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
+
+    return metrics
+
+  '''
+  Parse the timer data form a given string.
+
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data or -1 in case of an error.
+  '''
+  def ParseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.compile(br"""
+        .*?loading_data: (?P<loading_data>.*?)s.*?
+        .*?saving_data: (?P<saving_data>.*?)s.*?
+        .*?total_time: (?P<total_time>.*?)s.*?
+        """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+    match = pattern.match(data)
+    if not match:
+      Log.Fatal("Can't parse the data: wrong format")
+      return -1
+    else:
+      # Create a namedtuple and return the timer data.
+      timer = collections.namedtuple("timer", ["loading_data", "saving_data",
+          "total_time"])
+
+      return timer(float(match.group("loading_data")),
+                   float(match.group("saving_data")),
+                   float(match.group("total_time")))
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
new file mode 100644
index 0000000..1bbf962
--- /dev/null
+++ b/methods/shogun/lmnn.py
@@ -0,0 +1,114 @@
+'''
+  @file lmnn.py
+  @author Manish Kumar
+
+  Large Margin Nearest Neighbors with shogun.
+'''
+
+import os
+import sys
+import inspect
+import timeout_decorator
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+from log import *
+from timer import *
+
+import numpy as np
+from modshogun import RealFeatures
+from modshogun import MulticlassLabels
+from modshogun import LMNN as ShogunLMNN
+
+'''
+This class implements the Large Margin Nearest Neighbors benchmark.
+'''
+class LMNN(object):
+
+  '''
+  Create the Large Margin Nearest Neighbors instance.
+
+  @param dataset - Input dataset to perform LMNN on.
+  @param timeout - The time until the timeout. Default no timeout.
+  @param verbose - Display informational messages.
+  '''
+  def __init__(self, dataset, timeout=0, verbose=True):
+    self.verbose = verbose
+    self.dataset = dataset
+    self.timeout = timeout
+
+  '''
+  Use the shogun libary to implement Large Margin Nearest Neighbors.
+
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or a negative value if the method was not
+  successful.
+  '''
+  def LMNNShogun(self, options):
+    @timeout_decorator.timeout(self.timeout)
+    def RunLMNNShogun():
+      totalTimer = Timer()
+
+      # Load input dataset.
+      Log.Info("Loading dataset", self.verbose)
+      if len(self.dataset) == 2:
+          X = self.dataset[0]
+          y = self.dataset[1]
+      else:
+          # Use the last row of the training set as the responses.
+          X, y = SplitTrainData(self.dataset)
+      try:
+        feat = RealFeatures(self.X.T)
+        labels = MulticlassLabels(y.astype(numpy.float64))
+
+        with totalTimer:
+          # Get the options for running LMNN.
+          if "k" in options:
+            k = int(options.pop("k"))
+          else:
+            k = 1
+
+          if "maxiter" in options:
+            n = int(options.pop("maxiter"))
+          else:
+            n = 1000
+
+          if len(options) > 0:
+            Log.Fatal("Unknown parameters: " + str(options))
+            raise Exception("unknown parameters")
+
+          # Perform LMNN.
+          prep = ShogunLMNN(feat, labels, k)
+          prep.set_maxiter(n)
+          prep.train()
+      except Exception as e:
+        return -1
+
+      return totalTimer.ElapsedTime()
+
+    try:
+      return RunLMNNShogun()
+    except timeout_decorator.TimeoutError:
+      return -1
+
+  '''
+  Perform Large Margin Nearest Neighbors. If the method has been successfully
+  completed return the elapsed time in seconds.
+
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or a negative value if the method was not
+  successful.
+  '''
+  def RunMetrics(self, options):
+    Log.Info("Perform LMNN.", self.verbose)
+
+    results = self.LMNNShogun(options)
+    if results < 0:
+      return results
+
+    return {'Runtime' : results}
diff --git a/tests/benchmark_lmnn.py b/tests/benchmark_lmnn.py
new file mode 100644
index 0000000..3b9fcf0
--- /dev/null
+++ b/tests/benchmark_lmnn.py
@@ -0,0 +1,100 @@
+'''
+  @file benchmark_lmnn.py
+  @author Manish Kumar
+
+  Test for the Large Margin Nearest Neighbors scripts.
+'''
+
+import unittest
+
+import os, sys, inspect
+
+# Import the util path, this method even works if the path contains
+# symlinks to modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], '../util')))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+from util.loader import *
+
+'''
+Test the mlpack Large Margin Nearest Neighbors script.
+'''
+class LMNN_MLPACK_TEST(unittest.TestCase):
+
+  '''
+  Test initialization.
+  '''
+  def setUp(self):
+    self.dataset = 'datasets/iris_train.csv'
+    self.verbose = False
+    self.timeout = 240
+
+    module = Loader.ImportModuleFromPath("methods/mlpack/lmnn.py")
+    obj = getattr(module, "LMNN")
+    self.instance = obj(self.dataset, verbose=self.verbose, timeout=self.timeout)
+
+  '''
+  Test the constructor.
+  '''
+  def test_Constructor(self):
+    self.assertEqual(self.instance.verbose, self.verbose)
+    self.assertEqual(self.instance.timeout, self.timeout)
+    self.assertEqual(self.instance.dataset, self.dataset)
+
+  '''
+  Test the 'RunMetrics' function.
+  '''
+  def test_RunMetrics(self):
+    result = self.instance.RunMetrics({})
+    self.assertTrue(result["Runtime"] > 0)
+
+  '''
+  Test the destructor.
+  '''
+  def test_Destructor(self):
+    del self.instance
+
+    clean = True
+    filelist = ["gmon.out", "distance.csv"]
+    for f in filelist:
+      if os.path.isfile(f):
+        clean = False
+
+    self.assertTrue(clean)
+
+if __name__ == '__main__':
+  unittest.main()
+
+'''
+Test the shogun Large Margin Nearest Neighbors script.
+'''
+class LMNN_SHOGUN_TEST(unittest.TestCase):
+
+  '''
+  Test initialization.
+  '''
+  def setUp(self):
+    self.dataset = "datasets/iris.csv"
+    self.verbose = False
+    self.timeout = 240
+
+    module = Loader.ImportModuleFromPath("methods/shogun/lmnn.py")
+    obj = getattr(module, "LMNN")
+    self.instance = obj(self.dataset, verbose=self.verbose, timeout=self.timeout)
+
+  '''
+  Test the constructor.
+  '''
+  def test_Constructor(self):
+    self.assertEqual(self.instance.verbose, self.verbose)
+    self.assertEqual(self.instance.timeout, self.timeout)
+    self.assertEqual(self.instance.dataset, self.dataset)
+
+  '''
+  Test the 'RunMetrics' function.
+  '''
+  def test_RunMetrics(self):
+    result = self.instance.RunMetrics({})
+    self.assertTrue(result["Runtime"] > 0)

From 9902eb5bab85e85aed72ef9d2c77c8bc61eb5019 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Fri, 15 Jun 2018 19:39:04 +0530
Subject: [PATCH 02/15] update datasets

---
 config.yaml            | 46 +++++++++++++++++++++++-------------------
 methods/mlpack/lmnn.py |  8 ++++----
 methods/shogun/lmnn.py | 10 ++++-----
 3 files changed, 33 insertions(+), 31 deletions(-)

diff --git a/config.yaml b/config.yaml
index b473edb..0d5ca68 100644
--- a/config.yaml
+++ b/config.yaml
@@ -800,16 +800,16 @@ methods:
         format: [csv, txt]
         datasets:
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine.csv', 'datasets/ionosphere.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv',
+                      'datasets/diabetes.csv']
               options:
                 passes: 10
                 range: 25
                 seed: 42
 
             - files: ['datasets/letter_recognition.csv',
-                      'datasets/shuttle_train.csv', 'datasets/isolet.csv',
+                      'datasets/shuttle_train.csv', 'datasets/isolet_train.csv',
                       'datasets/covtype.csv', 'datasets/corel-histogram.csv',
                       'datasets/mnist_all.csv', 'datasets/Twitter.csv']
               options:
@@ -818,18 +818,18 @@ methods:
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine.csv', 'datasets/ionosphere.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv',
+                      'datasets/diabetes.csv']
               options:
                 passes: 5
                 optimizer: bbsgd
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine.csv', 'datasets/ionosphere.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv',
+                      'datasets/diabetes.csv']
               options:
                 passes: 5
                 optimizer: sgd
@@ -838,8 +838,7 @@ methods:
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine.csv', 'datasets/ionosphere.csv',
+                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
                       'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
               options:
                 num_targets: 5
@@ -850,8 +849,8 @@ methods:
                 range: 50
 
             - files: ['datasets/covtype.csv',
-                      'datasets/shuttle_train.csv', 'datasets/isolet.csv',
-                      'datasets/mnist_all.csv']
+                      'datasets/shuttle_train.csv', 'datasets/isolet_train.csv',
+                      'datasets/mnist_all.csv', 'datasets/diabetes.csv']
               options:
                 max_iterations: 2000
                 optimizer: lbfgs
@@ -2243,13 +2242,18 @@ methods:
         script: methods/shogun/lmnn.py
         format: [csv, txt]
         datasets:
-            - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine.csv', 'datasets/ionosphere.csv',
-                      'datasets/shuttle_train.csv', 'datasets/isolet.csv',
-                      'datasets/covtype.csv', 'datasets/corel-histogram.csv',
-                      'datasets/mnist_all.csv', 'datasets/Twitter.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+            - files: [ ['datasets/iris_train.csv'],
+                       ['datasets/diabetes.csv'],
+                       ['datasets/isolet_train.csv'],
+                       ['datasets/wine_qual.csv'],
+                       ['datasets/ionosphere.csv'],
+                       ['datasets/shuttle_train.csv'],
+                       ['datasets/covtype.csv'],
+                       ['datasets/corel-histogram.csv'],
+                       ['datasets/mnist_all.csv'],
+                       ['datasets/Twitter.csv'],
+                       ['datasets/balance_scale.csv'],
+                       ['datasets/letter_recognition.csv']]
 
     QDA:
             run: ['metric','metric']
diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index 00332b7..23c2702 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -1,5 +1,5 @@
 '''
-  @file nca.py
+  @file lmnn.py
   @author Manish Kumar
 
   Class to benchmark the mlpack Large Margin Nearest Neighbors method.
@@ -16,8 +16,8 @@
 if cmd_subfolder not in sys.path:
   sys.path.insert(0, cmd_subfolder)
 
-from util.log import *
-from util.profiler import *
+from log import *
+from profiler import *
 
 import shlex
 
@@ -52,7 +52,7 @@ def __init__(self, dataset, timeout=0, path=os.environ["BINPATH"],
     self.debug = debug
 
     # Get description from executable.
-    cmd = shlex.split(self.path + "mlpack_LMNN -h")
+    cmd = shlex.split(self.path + "mlpack_lmnn -h")
     try:
       s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False)
     except Exception as e:
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index 1bbf962..4f52ba3 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -19,6 +19,8 @@
 
 from log import *
 from timer import *
+from definitions import *
+from misc import *
 
 import numpy as np
 from modshogun import RealFeatures
@@ -56,12 +58,8 @@ def RunLMNNShogun():
 
       # Load input dataset.
       Log.Info("Loading dataset", self.verbose)
-      if len(self.dataset) == 2:
-          X = self.dataset[0]
-          y = self.dataset[1]
-      else:
-          # Use the last row of the training set as the responses.
-          X, y = SplitTrainData(self.dataset)
+      # Use the last row of the training set as the responses.
+      X, y = SplitTrainData(self.dataset)
       try:
         feat = RealFeatures(self.X.T)
         labels = MulticlassLabels(y.astype(numpy.float64))

From 45f6e1abedb2de0ccaf67cc22456c66583724537 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Fri, 15 Jun 2018 22:42:50 +0530
Subject: [PATCH 03/15] update shogun lmnn

---
 methods/shogun/lmnn.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index 4f52ba3..b72b899 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -17,6 +17,12 @@
 if cmd_subfolder not in sys.path:
   sys.path.insert(0, cmd_subfolder)
 
+#Import the metrics definitions path.
+metrics_folder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../metrics")))
+if metrics_folder not in sys.path:
+  sys.path.insert(0, metrics_folder)
+
 from log import *
 from timer import *
 from definitions import *
@@ -61,8 +67,8 @@ def RunLMNNShogun():
       # Use the last row of the training set as the responses.
       X, y = SplitTrainData(self.dataset)
       try:
-        feat = RealFeatures(self.X.T)
-        labels = MulticlassLabels(y.astype(numpy.float64))
+        feat = RealFeatures(X.T)
+        labels = MulticlassLabels(y.astype(np.float64))
 
         with totalTimer:
           # Get the options for running LMNN.
@@ -74,7 +80,7 @@ def RunLMNNShogun():
           if "maxiter" in options:
             n = int(options.pop("maxiter"))
           else:
-            n = 1000
+            n = 2000
 
           if len(options) > 0:
             Log.Fatal("Unknown parameters: " + str(options))

From 5e0c6b2657b30ad2751331efe176d5f35b81ad91 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Sat, 16 Jun 2018 18:53:42 +0530
Subject: [PATCH 04/15] Added metrics

---
 config.yaml            |  1 -
 methods/mlpack/lmnn.py | 29 ++++++++++++++++++++++++++++-
 methods/shogun/lmnn.py | 29 ++++++++++++++++++++++++++++-
 3 files changed, 56 insertions(+), 3 deletions(-)

diff --git a/config.yaml b/config.yaml
index 0d5ca68..445ccc7 100644
--- a/config.yaml
+++ b/config.yaml
@@ -2243,7 +2243,6 @@ methods:
         format: [csv, txt]
         datasets:
             - files: [ ['datasets/iris_train.csv'],
-                       ['datasets/diabetes.csv'],
                        ['datasets/isolet_train.csv'],
                        ['datasets/wine_qual.csv'],
                        ['datasets/ionosphere.csv'],
diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index 23c2702..fa674cd 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -16,16 +16,27 @@
 if cmd_subfolder not in sys.path:
   sys.path.insert(0, cmd_subfolder)
 
+#Import the metrics definitions path.
+metrics_folder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../metrics")))
+if metrics_folder not in sys.path:
+  sys.path.insert(0, metrics_folder)
+
 from log import *
 from profiler import *
+from definitions import *
+from misc import *
 
 import shlex
+from modshogun import MulticlassLabels, RealFeatures
+from modshogun import KNN, KNN_COVER_TREE, EuclideanDistance
 
 try:
   import subprocess32 as subprocess
 except ImportError:
   import subprocess
 
+import numpy as np
 import re
 import collections
 
@@ -193,9 +204,25 @@ def RunMetrics(self, options):
 
     if timer != -1:
       metrics['Runtime'] = timer.total_time - timer.saving_data - timer.loading_data
-
       Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
 
+    # Predict labels.
+    distance = LoadDataset("distance.csv")
+    feat  = RealFeatures(np.dot(distance, self.dataset[0]))
+    labels = MulticlassLabels(self.dataset[1])
+    dist = EuclideanDistance()
+    knn = KNN(1, dist, labels)
+    if "num_targets" in options:
+      knn.set_k(options.pop("num_targets"))
+
+    knn.train(feat)
+    knn.set_knn_solver_type(KNN_COVER_TREE)
+    pred = knn.apply_multiclass(feat)
+
+    predictions = pred.get_int_labels()
+    confusionMatrix = Metrics.ConfusionMatrix(self.dataset[1], predictions)
+    metrics['Avg Accuracy'] = Metrics.AverageAccuracy(confusionMatrix)
+
     return metrics
 
   '''
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index b72b899..5df0485 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -32,6 +32,7 @@
 from modshogun import RealFeatures
 from modshogun import MulticlassLabels
 from modshogun import LMNN as ShogunLMNN
+from modshogun import KNN, KNN_COVER_TREE, EuclideanDistance
 
 '''
 This class implements the Large Margin Nearest Neighbors benchmark.
@@ -49,6 +50,7 @@ def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
     self.timeout = timeout
+    self.predictions = None
 
   '''
   Use the shogun libary to implement Large Margin Nearest Neighbors.
@@ -93,7 +95,24 @@ def RunLMNNShogun():
       except Exception as e:
         return -1
 
-      return totalTimer.ElapsedTime()
+      time = totalTimer.ElapsedTime()
+
+      # Predict labels.
+      distance = prep.get_linear_transform()
+      feat  = RealFeatures(np.dot(distance, X))
+      labels = MulticlassLabels(y)
+      dist = EuclideanDistance()
+      knn = KNN(1, dist, labels)
+      if "k" in options:
+        knn.set_k(options.pop("k"))
+
+      knn.train(feat)
+      knn.set_knn_solver_type(KNN_COVER_TREE)
+      pred = knn.apply_multiclass(feat)
+
+      self.predictions = pred.get_int_labels()
+
+      return [time, self.predictions]
 
     try:
       return RunLMNNShogun()
@@ -115,4 +134,12 @@ def RunMetrics(self, options):
     if results < 0:
       return results
 
+    # Datastructure to store the results.
+    metrics = {}
+
+    X, y = SplitTrainData(self.dataset)
+    confusionMatrix = Metrics.ConfusionMatrix(y, self.predictions)
+    metrics['Runtime'] = results
+    metrics['Avg Accuracy'] = Metrics.AverageAccuracy(confusionMatrix)
+
     return {'Runtime' : results}

From 9027de2dd3bae2f501d9057c8ff56e7178e2993b Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Sun, 17 Jun 2018 00:14:16 +0530
Subject: [PATCH 05/15] update metrics

---
 methods/mlpack/lmnn.py | 15 ++++++++-------
 methods/shogun/lmnn.py | 30 ++++++++++++++----------------
 2 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index fa674cd..9f27796 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -28,7 +28,7 @@
 from misc import *
 
 import shlex
-from modshogun import MulticlassLabels, RealFeatures
+from modshogun import MulticlassLabels, RealFeatures, MulticlassAccuracy
 from modshogun import KNN, KNN_COVER_TREE, EuclideanDistance
 
 try:
@@ -208,8 +208,10 @@ def RunMetrics(self, options):
 
     # Predict labels.
     distance = LoadDataset("distance.csv")
-    feat  = RealFeatures(np.dot(distance, self.dataset[0]))
-    labels = MulticlassLabels(self.dataset[1])
+    data = np.genfromtxt(self.dataset, delimiter=',')
+    transformedData = np.dot(data[:,:-1], distance.T)
+    feat  = RealFeatures(transformedData.T)
+    labels = MulticlassLabels(data[:, (data.shape[1] - 1)].astype(np.float64))
     dist = EuclideanDistance()
     knn = KNN(1, dist, labels)
     if "num_targets" in options:
@@ -218,10 +220,9 @@ def RunMetrics(self, options):
     knn.train(feat)
     knn.set_knn_solver_type(KNN_COVER_TREE)
     pred = knn.apply_multiclass(feat)
-
-    predictions = pred.get_int_labels()
-    confusionMatrix = Metrics.ConfusionMatrix(self.dataset[1], predictions)
-    metrics['Avg Accuracy'] = Metrics.AverageAccuracy(confusionMatrix)
+    evaluator = MulticlassAccuracy()
+    accuracy = evaluator.evaluate(pred, labels)
+    metrics['Avg Accuracy'] = accuracy
 
     return metrics
 
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index 5df0485..d065e14 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -1,5 +1,5 @@
 '''
-  @file lmnn.py
+  file lmnn.py
   @author Manish Kumar
 
   Large Margin Nearest Neighbors with shogun.
@@ -30,7 +30,7 @@
 
 import numpy as np
 from modshogun import RealFeatures
-from modshogun import MulticlassLabels
+from modshogun import MulticlassLabels, MulticlassAccuracy
 from modshogun import LMNN as ShogunLMNN
 from modshogun import KNN, KNN_COVER_TREE, EuclideanDistance
 
@@ -50,7 +50,6 @@ def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
     self.timeout = timeout
-    self.predictions = None
 
   '''
   Use the shogun libary to implement Large Margin Nearest Neighbors.
@@ -99,8 +98,9 @@ def RunLMNNShogun():
 
       # Predict labels.
       distance = prep.get_linear_transform()
-      feat  = RealFeatures(np.dot(distance, X))
-      labels = MulticlassLabels(y)
+      transformedData = np.dot(X, distance.T)
+      feat  = RealFeatures(transformedData.T)
+      labels = MulticlassLabels(y.astype(np.float64))
       dist = EuclideanDistance()
       knn = KNN(1, dist, labels)
       if "k" in options:
@@ -109,10 +109,10 @@ def RunLMNNShogun():
       knn.train(feat)
       knn.set_knn_solver_type(KNN_COVER_TREE)
       pred = knn.apply_multiclass(feat)
-
-      self.predictions = pred.get_int_labels()
-
-      return [time, self.predictions]
+      evaluator = MulticlassAccuracy()
+      accuracy = evaluator.evaluate(pred, labels)
+      print(accuracy)
+      return [time, accuracy]
 
     try:
       return RunLMNNShogun()
@@ -131,15 +131,13 @@ def RunMetrics(self, options):
     Log.Info("Perform LMNN.", self.verbose)
 
     results = self.LMNNShogun(options)
-    if results < 0:
-      return results
+    if results[0] < 0:
+      return results[0]
 
     # Datastructure to store the results.
     metrics = {}
+    metrics['Runtime'] = results[0]
+    metrics['Avg Accuracy'] = results[1]
 
-    X, y = SplitTrainData(self.dataset)
-    confusionMatrix = Metrics.ConfusionMatrix(y, self.predictions)
-    metrics['Runtime'] = results
-    metrics['Avg Accuracy'] = Metrics.AverageAccuracy(confusionMatrix)
+    return metrics
 
-    return {'Runtime' : results}

From 874e862f889ea18b7150ffdbe24c6f45535220e3 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Mon, 18 Jun 2018 09:41:01 +0530
Subject: [PATCH 06/15] solve num_targets issue

---
 config.yaml            | 28 ++++++++++++++++++----------
 methods/mlpack/lmnn.py | 11 +++++------
 methods/shogun/lmnn.py | 15 +++++----------
 3 files changed, 28 insertions(+), 26 deletions(-)

diff --git a/config.yaml b/config.yaml
index 445ccc7..f4a5070 100644
--- a/config.yaml
+++ b/config.yaml
@@ -800,37 +800,41 @@ methods:
         format: [csv, txt]
         datasets:
             - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
                       'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv',
-                      'datasets/diabetes.csv']
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
               options:
+                num_targets: 5
                 passes: 10
                 range: 25
                 seed: 42
 
             - files: ['datasets/letter_recognition.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
                       'datasets/shuttle_train.csv', 'datasets/isolet_train.csv',
                       'datasets/covtype.csv', 'datasets/corel-histogram.csv',
                       'datasets/mnist_all.csv', 'datasets/Twitter.csv']
               options:
+                num_targets: 3
                 passes: 3
                 range: 100
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
-                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv',
-                      'datasets/diabetes.csv']
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
+                      'datasets/balance_scale.csv', 'datasets/ionosphere.csv']
               options:
+                num_targets: 3
                 passes: 5
                 optimizer: bbsgd
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
                       'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv',
-                      'datasets/diabetes.csv']
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
               options:
+                num_targets: 3
                 passes: 5
                 optimizer: sgd
                 range: 50
@@ -838,10 +842,11 @@ methods:
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
+                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
                       'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
                       'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
               options:
-                num_targets: 5
+                num_targets: 3
                 max_iterations: 2000
                 optimizer: lbfgs
                 seed: 42
@@ -850,8 +855,9 @@ methods:
 
             - files: ['datasets/covtype.csv',
                       'datasets/shuttle_train.csv', 'datasets/isolet_train.csv',
-                      'datasets/mnist_all.csv', 'datasets/diabetes.csv']
+                      'datasets/mnist_all.csv', 'datasets/letter_recognition.csv']
               options:
+                num_targets: 5
                 max_iterations: 2000
                 optimizer: lbfgs
                 seed: 42
@@ -2243,8 +2249,8 @@ methods:
         format: [csv, txt]
         datasets:
             - files: [ ['datasets/iris_train.csv'],
-                       ['datasets/isolet_train.csv'],
                        ['datasets/wine_qual.csv'],
+                       ['datasets/isolet_train.csv'],
                        ['datasets/ionosphere.csv'],
                        ['datasets/shuttle_train.csv'],
                        ['datasets/covtype.csv'],
@@ -2253,6 +2259,8 @@ methods:
                        ['datasets/Twitter.csv'],
                        ['datasets/balance_scale.csv'],
                        ['datasets/letter_recognition.csv']]
+              options:
+                k: 3
 
     QDA:
             run: ['metric','metric']
diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index 9f27796..9bc3ae7 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -61,6 +61,7 @@ def __init__(self, dataset, timeout=0, path=os.environ["BINPATH"],
     self.path = path
     self.timeout = timeout
     self.debug = debug
+    self.k = 1
 
     # Get description from executable.
     cmd = shlex.split(self.path + "mlpack_lmnn -h")
@@ -101,7 +102,8 @@ def OptionsToStr(self, options):
     if "optimizer" in options:
       optionsStr = "-O " + str(options.pop("optimizer"))
     if "num_targets" in options:
-      optionsStr = optionsStr + " -k " + str(options.pop("num_targets"))
+      self.k = options.pop("num_targets")
+      optionsStr = optionsStr + " -k " + str(self.k)
     if "regularization" in options:
       optionsStr = optionsStr + " -r " + str(options.pop("regularization"))
     if "tolerance" in options:
@@ -213,16 +215,13 @@ def RunMetrics(self, options):
     feat  = RealFeatures(transformedData.T)
     labels = MulticlassLabels(data[:, (data.shape[1] - 1)].astype(np.float64))
     dist = EuclideanDistance()
-    knn = KNN(1, dist, labels)
-    if "num_targets" in options:
-      knn.set_k(options.pop("num_targets"))
-
+    knn = KNN(self.k, dist, labels)
     knn.train(feat)
     knn.set_knn_solver_type(KNN_COVER_TREE)
     pred = knn.apply_multiclass(feat)
     evaluator = MulticlassAccuracy()
     accuracy = evaluator.evaluate(pred, labels)
-    metrics['Avg Accuracy'] = accuracy
+    metrics['Accuracy'] = accuracy
 
     return metrics
 
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index d065e14..cca3d98 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -50,6 +50,7 @@ def __init__(self, dataset, timeout=0, verbose=True):
     self.verbose = verbose
     self.dataset = dataset
     self.timeout = timeout
+    self.k = 1
 
   '''
   Use the shogun libary to implement Large Margin Nearest Neighbors.
@@ -74,9 +75,7 @@ def RunLMNNShogun():
         with totalTimer:
           # Get the options for running LMNN.
           if "k" in options:
-            k = int(options.pop("k"))
-          else:
-            k = 1
+            self.k = int(options.pop("k"))
 
           if "maxiter" in options:
             n = int(options.pop("maxiter"))
@@ -101,17 +100,13 @@ def RunLMNNShogun():
       transformedData = np.dot(X, distance.T)
       feat  = RealFeatures(transformedData.T)
       labels = MulticlassLabels(y.astype(np.float64))
-      dist = EuclideanDistance()
-      knn = KNN(1, dist, labels)
-      if "k" in options:
-        knn.set_k(options.pop("k"))
-
+      dist = EuclideanDistance(feat, feat)
+      knn = KNN(self.k, dist, labels)
       knn.train(feat)
       knn.set_knn_solver_type(KNN_COVER_TREE)
       pred = knn.apply_multiclass(feat)
       evaluator = MulticlassAccuracy()
       accuracy = evaluator.evaluate(pred, labels)
-      print(accuracy)
       return [time, accuracy]
 
     try:
@@ -137,7 +132,7 @@ def RunMetrics(self, options):
     # Datastructure to store the results.
     metrics = {}
     metrics['Runtime'] = results[0]
-    metrics['Avg Accuracy'] = results[1]
+    metrics['Accuracy'] = results[1]
 
     return metrics
 

From f6eb97ddb065700bb931d440b5e02d05520c1492 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Mon, 18 Jun 2018 10:39:08 +0530
Subject: [PATCH 07/15] Small update

---
 methods/mlpack/lmnn.py | 5 ++---
 methods/shogun/lmnn.py | 7 +++----
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index 9bc3ae7..b2e6d79 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -29,7 +29,7 @@
 
 import shlex
 from modshogun import MulticlassLabels, RealFeatures, MulticlassAccuracy
-from modshogun import KNN, KNN_COVER_TREE, EuclideanDistance
+from modshogun import KNN, EuclideanDistance
 
 try:
   import subprocess32 as subprocess
@@ -214,10 +214,9 @@ def RunMetrics(self, options):
     transformedData = np.dot(data[:,:-1], distance.T)
     feat  = RealFeatures(transformedData.T)
     labels = MulticlassLabels(data[:, (data.shape[1] - 1)].astype(np.float64))
-    dist = EuclideanDistance()
+    dist = EuclideanDistance(feat, feat)
     knn = KNN(self.k, dist, labels)
     knn.train(feat)
-    knn.set_knn_solver_type(KNN_COVER_TREE)
     pred = knn.apply_multiclass(feat)
     evaluator = MulticlassAccuracy()
     accuracy = evaluator.evaluate(pred, labels)
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index cca3d98..c612b50 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -32,7 +32,7 @@
 from modshogun import RealFeatures
 from modshogun import MulticlassLabels, MulticlassAccuracy
 from modshogun import LMNN as ShogunLMNN
-from modshogun import KNN, KNN_COVER_TREE, EuclideanDistance
+from modshogun import KNN, EuclideanDistance
 
 '''
 This class implements the Large Margin Nearest Neighbors benchmark.
@@ -91,7 +91,7 @@ def RunLMNNShogun():
           prep.set_maxiter(n)
           prep.train()
       except Exception as e:
-        return -1
+        return [-1, -1]
 
       time = totalTimer.ElapsedTime()
 
@@ -103,7 +103,6 @@ def RunLMNNShogun():
       dist = EuclideanDistance(feat, feat)
       knn = KNN(self.k, dist, labels)
       knn.train(feat)
-      knn.set_knn_solver_type(KNN_COVER_TREE)
       pred = knn.apply_multiclass(feat)
       evaluator = MulticlassAccuracy()
       accuracy = evaluator.evaluate(pred, labels)
@@ -112,7 +111,7 @@ def RunLMNNShogun():
     try:
       return RunLMNNShogun()
     except timeout_decorator.TimeoutError:
-      return -1
+      return [-1, -1]
 
   '''
   Perform Large Margin Nearest Neighbors. If the method has been successfully

From e34d0721585f8bbfc3005c8e8ff0d97d4b97fc7a Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Mon, 18 Jun 2018 21:12:44 +0530
Subject: [PATCH 08/15] Resolve k error

---
 methods/shogun/lmnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index c612b50..8d3ede4 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -87,7 +87,7 @@ def RunLMNNShogun():
             raise Exception("unknown parameters")
 
           # Perform LMNN.
-          prep = ShogunLMNN(feat, labels, k)
+          prep = ShogunLMNN(feat, labels, self.k)
           prep.set_maxiter(n)
           prep.train()
       except Exception as e:

From fa381795a71afef2b4812266353310e957ceb00d Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Tue, 19 Jun 2018 22:19:37 +0530
Subject: [PATCH 09/15] Add KNN accuracy predictor

---
 methods/mlpack/lmnn.py | 33 +++++++++++++++++++++++++++------
 methods/shogun/lmnn.py | 32 +++++++++++++++++++++++++++-----
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index b2e6d79..99af5e1 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -28,7 +28,7 @@
 from misc import *
 
 import shlex
-from modshogun import MulticlassLabels, RealFeatures, MulticlassAccuracy
+from modshogun import MulticlassLabels, RealFeatures
 from modshogun import KNN, EuclideanDistance
 
 try:
@@ -215,12 +215,33 @@ def RunMetrics(self, options):
     feat  = RealFeatures(transformedData.T)
     labels = MulticlassLabels(data[:, (data.shape[1] - 1)].astype(np.float64))
     dist = EuclideanDistance(feat, feat)
-    knn = KNN(self.k, dist, labels)
+    knn = KNN(self.k + 1, dist, labels)
     knn.train(feat)
-    pred = knn.apply_multiclass(feat)
-    evaluator = MulticlassAccuracy()
-    accuracy = evaluator.evaluate(pred, labels)
-    metrics['Accuracy'] = accuracy
+    # Get nearest neighbors.
+    NN =  knn.nearest_neighbors()
+    NN = np.delete(NN, 0, 0)
+    # Compute unique labels.
+    uniqueLabels = np.unique(labels)
+    # Keep count correct predictions.
+    count = 0
+    # Normalize labels
+    for i in range(data.shape[0]):
+        for j in range(len(uniqueLabels)):
+            if (labels[i] == uniqueLabels[j]):
+                labels[i] = j
+                break
+
+    for i in range(NN.shape[1]):
+        Map = [0 for x in range(len(uniqueLabels))]
+        for j in range(NN.shape[0]):
+            dist = np.linalg.norm(data[NN[j][i],:] - data[i,:])
+             # Add constant factor of 1 incase two points overlap
+            Map[int(labels[NN[j, i]])] += 1 / (dist + 1)**2
+        maxInd = np.argmax(Map)
+        if (maxInd == labels[i]):
+            count += 1
+
+    metrics['Accuracy'] = (count / NN.shape[1]) * 100
 
     return metrics
 
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index 8d3ede4..ff8de99 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -30,7 +30,7 @@
 
 import numpy as np
 from modshogun import RealFeatures
-from modshogun import MulticlassLabels, MulticlassAccuracy
+from modshogun import MulticlassLabels
 from modshogun import LMNN as ShogunLMNN
 from modshogun import KNN, EuclideanDistance
 
@@ -101,11 +101,33 @@ def RunLMNNShogun():
       feat  = RealFeatures(transformedData.T)
       labels = MulticlassLabels(y.astype(np.float64))
       dist = EuclideanDistance(feat, feat)
-      knn = KNN(self.k, dist, labels)
+      knn = KNN(self.k + 1, dist, labels)
       knn.train(feat)
-      pred = knn.apply_multiclass(feat)
-      evaluator = MulticlassAccuracy()
-      accuracy = evaluator.evaluate(pred, labels)
+      # Get nearest neighbors.
+      NN =  knn.nearest_neighbors()
+      NN = np.delete(NN, 0, 0)
+      # Compute unique labels.
+      uniqueLabels = np.unique(labels)
+      # Keep count correct predictions.
+      count = 0
+      # Normalize labels
+      for i in range(X.shape[0]):
+          for j in range(len(uniqueLabels)):
+              if (labels[i] == uniqueLabels[j]):
+                  labels[i] = j
+                  break
+
+      for i in range(NN.shape[1]):
+          Map = [0 for x in range(len(uniqueLabels))]
+          for j in range(NN.shape[0]):
+              dist = np.linalg.norm(X[NN[j][i],:] - X[i,:])
+               # Add constant factor of 1 incase two points overlap
+              Map[int(labels[NN[j, i]])] += 1 / (dist + 1)**2
+          maxInd = np.argmax(Map)
+          if (maxInd == labels[i]):
+              count += 1
+
+      accuracy = (count / NN.shape[1]) * 100
       return [time, accuracy]
 
     try:

From a488dba429425901e51327b715efa123e5362e8f Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Wed, 20 Jun 2018 10:40:23 +0530
Subject: [PATCH 10/15] Add KNNAccuracy to metrics

---
 methods/metrics/definitions.py | 51 ++++++++++++++++++++++++++++++++++
 methods/mlpack/lmnn.py         | 42 ++++++----------------------
 methods/shogun/lmnn.py         | 50 ++++++++++-----------------------
 3 files changed, 74 insertions(+), 69 deletions(-)

diff --git a/methods/metrics/definitions.py b/methods/metrics/definitions.py
index 262fc56..4f35bd8 100644
--- a/methods/metrics/definitions.py
+++ b/methods/metrics/definitions.py
@@ -9,6 +9,10 @@
 import numpy as np
 import math
 
+from modshogun import RealFeatures
+from modshogun import MulticlassLabels
+from modshogun import KNN, EuclideanDistance
+
 class Metrics(object):
 
   '''
@@ -466,3 +470,50 @@ def SimpleMeanSquaredError(truelabels, predictedlabels):
       simplemse += difference * difference
     simplemse /= n
     return simplemse
+
+  '''
+  @param distance - Matrix containing learned distance.
+  @param data - List containing data & true labels.
+  @param k - Number of targets used calculation.
+  @param flag - Switch to control whether to use distance weighted KNN or not.
+  This method computes the accuracy based on the true labels and
+  predicted labels from knn classifier.
+  '''
+  @staticmethod
+  def KNNAccuracy(distance, data, k, flag):
+    transformedData = np.dot(data[0], distance.T)
+    feat  = RealFeatures(transformedData.T)
+    labels = MulticlassLabels(data[1].astype(np.float64))
+    dist = EuclideanDistance(feat, feat)
+    knn = KNN(k + 1, dist, labels)
+    knn.train(feat)
+    # Get nearest neighbors.
+    nn = knn.nearest_neighbors()
+    nn = np.delete(nn, 0, 0)
+    # Compute unique labels.
+    uniqueLabels = np.unique(labels)
+    # Keep count correct predictions.
+    count = 0
+    # Normalize labels
+    for i in range(data[0].shape[0]):
+      for j in range(len(uniqueLabels)):
+        if (labels[i] == uniqueLabels[j]):
+          labels[i] = j
+          break
+
+    for i in range(nn.shape[1]):
+      mapLabels = [0 for x in range(len(uniqueLabels))]
+      for j in range(nn.shape[0]):
+        if (flag):
+          distPoints = np.linalg.norm(data[0][nn[j][i],:] - data[0][i,:])
+          # Add constant factor of 1 incase two points overlap
+          mapLabels[int(labels[nn[j, i]])] += 1 / (distPoints + 1)**2
+        else:
+          # Subtract a variable factor to avoid draw condition without
+          # affecting actual result.
+          mapLabels[int(labels[nn[j, i]])] += 1 - j * 1e-8
+      maxInd = np.argmax(mapLabels)
+      if (maxInd == labels[i]):
+        count += 1
+    accuracy = (count / nn.shape[1]) * 100
+    return accuracy
diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index 99af5e1..dbd0b01 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -28,8 +28,6 @@
 from misc import *
 
 import shlex
-from modshogun import MulticlassLabels, RealFeatures
-from modshogun import KNN, EuclideanDistance
 
 try:
   import subprocess32 as subprocess
@@ -208,40 +206,16 @@ def RunMetrics(self, options):
       metrics['Runtime'] = timer.total_time - timer.saving_data - timer.loading_data
       Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
 
-    # Predict labels.
+    # Get distance.
     distance = LoadDataset("distance.csv")
     data = np.genfromtxt(self.dataset, delimiter=',')
-    transformedData = np.dot(data[:,:-1], distance.T)
-    feat  = RealFeatures(transformedData.T)
-    labels = MulticlassLabels(data[:, (data.shape[1] - 1)].astype(np.float64))
-    dist = EuclideanDistance(feat, feat)
-    knn = KNN(self.k + 1, dist, labels)
-    knn.train(feat)
-    # Get nearest neighbors.
-    NN =  knn.nearest_neighbors()
-    NN = np.delete(NN, 0, 0)
-    # Compute unique labels.
-    uniqueLabels = np.unique(labels)
-    # Keep count correct predictions.
-    count = 0
-    # Normalize labels
-    for i in range(data.shape[0]):
-        for j in range(len(uniqueLabels)):
-            if (labels[i] == uniqueLabels[j]):
-                labels[i] = j
-                break
-
-    for i in range(NN.shape[1]):
-        Map = [0 for x in range(len(uniqueLabels))]
-        for j in range(NN.shape[0]):
-            dist = np.linalg.norm(data[NN[j][i],:] - data[i,:])
-             # Add constant factor of 1 incase two points overlap
-            Map[int(labels[NN[j, i]])] += 1 / (dist + 1)**2
-        maxInd = np.argmax(Map)
-        if (maxInd == labels[i]):
-            count += 1
-
-    metrics['Accuracy'] = (count / NN.shape[1]) * 100
+
+    dataList = [data[:,:-1], data[:, (data.shape[1] - 1)]]
+    metrics['Accuracy_1_NN'] = Metrics.KNNAccuracy(distance, dataList, 1, False)
+    metrics['Accuracy_3_NN'] = Metrics.KNNAccuracy(distance, dataList, 3, False)
+    metrics['Accuracy_3_NN_DW'] = Metrics.KNNAccuracy(distance, dataList, 3, True)
+    metrics['Accuracy_5_NN'] = Metrics.KNNAccuracy(distance, dataList, 5, False)
+    metrics['Accuracy_5_NN_DW'] = Metrics.KNNAccuracy(distance, dataList, 5, True)
 
     return metrics
 
diff --git a/methods/shogun/lmnn.py b/methods/shogun/lmnn.py
index ff8de99..a32fb0e 100644
--- a/methods/shogun/lmnn.py
+++ b/methods/shogun/lmnn.py
@@ -32,7 +32,6 @@
 from modshogun import RealFeatures
 from modshogun import MulticlassLabels
 from modshogun import LMNN as ShogunLMNN
-from modshogun import KNN, EuclideanDistance
 
 '''
 This class implements the Large Margin Nearest Neighbors benchmark.
@@ -95,40 +94,17 @@ def RunLMNNShogun():
 
       time = totalTimer.ElapsedTime()
 
-      # Predict labels.
+      # Get distance.
       distance = prep.get_linear_transform()
-      transformedData = np.dot(X, distance.T)
-      feat  = RealFeatures(transformedData.T)
-      labels = MulticlassLabels(y.astype(np.float64))
-      dist = EuclideanDistance(feat, feat)
-      knn = KNN(self.k + 1, dist, labels)
-      knn.train(feat)
-      # Get nearest neighbors.
-      NN =  knn.nearest_neighbors()
-      NN = np.delete(NN, 0, 0)
-      # Compute unique labels.
-      uniqueLabels = np.unique(labels)
-      # Keep count correct predictions.
-      count = 0
-      # Normalize labels
-      for i in range(X.shape[0]):
-          for j in range(len(uniqueLabels)):
-              if (labels[i] == uniqueLabels[j]):
-                  labels[i] = j
-                  break
-
-      for i in range(NN.shape[1]):
-          Map = [0 for x in range(len(uniqueLabels))]
-          for j in range(NN.shape[0]):
-              dist = np.linalg.norm(X[NN[j][i],:] - X[i,:])
-               # Add constant factor of 1 incase two points overlap
-              Map[int(labels[NN[j, i]])] += 1 / (dist + 1)**2
-          maxInd = np.argmax(Map)
-          if (maxInd == labels[i]):
-              count += 1
-
-      accuracy = (count / NN.shape[1]) * 100
-      return [time, accuracy]
+      dataList = [X, y]
+      accuracy1NN = Metrics.KNNAccuracy(distance, dataList, 1, False)
+      accuracy3NN = Metrics.KNNAccuracy(distance, dataList, 3, False)
+      accuracy3NNDW = Metrics.KNNAccuracy(distance, dataList, 3, True)
+      accuracy5NN = Metrics.KNNAccuracy(distance, dataList, 5, False)
+      accuracy5NNDW = Metrics.KNNAccuracy(distance, dataList, 5, True)
+
+      return [time, accuracy1NN, accuracy3NN, accuracy3NNDW,
+          accuracy5NN, accuracy5NNDW]
 
     try:
       return RunLMNNShogun()
@@ -153,7 +129,11 @@ def RunMetrics(self, options):
     # Datastructure to store the results.
     metrics = {}
     metrics['Runtime'] = results[0]
-    metrics['Accuracy'] = results[1]
+    metrics['Accuracy_1_NN'] = results[1]
+    metrics['Accuracy_3_NN'] = results[2]
+    metrics['Accuracy_3_NN_DW'] = results[3]
+    metrics['Accuracy_5_NN'] = results[4]
+    metrics['Accuracy_5_NN_DW'] = results[5]
 
     return metrics
 

From cd6c562095653078cb61d7fe5cdc110b73aa478c Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Wed, 20 Jun 2018 14:22:23 +0530
Subject: [PATCH 11/15] update datasets

---
 config.yaml | 39 ++++++++++++++++++++-------------------
 1 file changed, 20 insertions(+), 19 deletions(-)

diff --git a/config.yaml b/config.yaml
index f4a5070..a865532 100644
--- a/config.yaml
+++ b/config.yaml
@@ -800,19 +800,19 @@ methods:
         format: [csv, txt]
         datasets:
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
-                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
+                      'datasets/satellite_train.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/letter_recognition.csv',
+                      'datasets/oilspill_train.csv', 'datasets/shuttle_train.csv',
+                      'datasets/ecoli_train.csv', 'datasets/vehicle.csv']
               options:
-                num_targets: 5
+                num_targets: 3
                 passes: 10
-                range: 25
+                range: 20
                 seed: 42
 
             - files: ['datasets/letter_recognition.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
                       'datasets/shuttle_train.csv', 'datasets/isolet_train.csv',
-                      'datasets/covtype.csv', 'datasets/corel-histogram.csv',
+                      'datasets/covtype.csv', 'datasets/optdigits_train.csv',
                       'datasets/mnist_all.csv', 'datasets/Twitter.csv']
               options:
                 num_targets: 3
@@ -821,7 +821,7 @@ methods:
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
+                      'datasets/ecoli_train.csv', 'datasets/vehicle.csv',
                       'datasets/balance_scale.csv', 'datasets/ionosphere.csv']
               options:
                 num_targets: 3
@@ -830,8 +830,8 @@ methods:
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
+                      'datasets/satellite_train.csv', 'datasets/ionosphere.csv',
+                      'datasets/ecoli_train.csv', 'datasets/vehicle.csv',
                       'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
               options:
                 num_targets: 3
@@ -842,8 +842,8 @@ methods:
                 seed: 42
 
             - files: ['datasets/iris_train.csv',
-                      ['datasets/diabetes_X.csv', 'datasets/diabetes_y.csv'],
-                      'datasets/wine_qual.csv', 'datasets/ionosphere.csv',
+                      'datasets/satellite_train.csv', 'datasets/ionosphere.csv',
+                      'datasets/ecoli_train.csv', 'datasets/vehicle.csv',
                       'datasets/balance_scale.csv', 'datasets/letter_recognition.csv']
               options:
                 num_targets: 3
@@ -857,7 +857,7 @@ methods:
                       'datasets/shuttle_train.csv', 'datasets/isolet_train.csv',
                       'datasets/mnist_all.csv', 'datasets/letter_recognition.csv']
               options:
-                num_targets: 5
+                num_targets: 3
                 max_iterations: 2000
                 optimizer: lbfgs
                 seed: 42
@@ -2249,16 +2249,17 @@ methods:
         format: [csv, txt]
         datasets:
             - files: [ ['datasets/iris_train.csv'],
-                       ['datasets/wine_qual.csv'],
-                       ['datasets/isolet_train.csv'],
+                       ['datasets/ecoli_train.csv'],
+                       ['datasets/vehicle.csv'],
                        ['datasets/ionosphere.csv'],
                        ['datasets/shuttle_train.csv'],
-                       ['datasets/covtype.csv'],
-                       ['datasets/corel-histogram.csv'],
+                       ['datasets/letter_recognition.csv'],
+                       ['datasets/balance_scale.csv'],
+                       ['datasets/oilspill_train.csv'],
                        ['datasets/mnist_all.csv'],
                        ['datasets/Twitter.csv'],
-                       ['datasets/balance_scale.csv'],
-                       ['datasets/letter_recognition.csv']]
+                       ['datasets/isolet_train.csv'],
+                       ['datasets/covtype.csv']]
               options:
                 k: 3
 

From d6584a4f3058d4885cbeaa89dcc7effd6dab22e3 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Thu, 21 Jun 2018 14:06:50 +0530
Subject: [PATCH 12/15] Added benchmarks for matlab's LMNN

---
 config.yaml            |  17 ++++-
 methods/matlab/LMNN.m  | 168 +++++++++++++++++++++++++++++++++++++++++
 methods/matlab/lmnn.py | 146 +++++++++++++++++++++++++++++++++++
 3 files changed, 330 insertions(+), 1 deletion(-)
 create mode 100644 methods/matlab/LMNN.m
 create mode 100644 methods/matlab/lmnn.py

diff --git a/config.yaml b/config.yaml
index a865532..2ec3580 100644
--- a/config.yaml
+++ b/config.yaml
@@ -953,6 +953,21 @@ methods:
                 new_dimensionality: 2
                 scaled: True
 
+    LMNN:
+        run: ['metric']
+        script: methods/matlab/lmnn.py
+        format: [csv, txt]
+        datasets:
+            - files: ['datasets/iris_train.csv',
+                      'datasets/satellite_train.csv', 'datasets/ionosphere.csv',
+                      'datasets/balance_scale.csv', 'datasets/vehicle.csv',
+                      'datasets/oilspill_train.csv', 'datasets/ecoli_train.csv',
+                      'datasets/letter_recognition.csv', 'datasets/shuttle_train.csv',
+                      'datasets/isolet_train.csv', 'datasets/optdigits_train.csv',
+                      'datasets/covtype.csv', 'datasets/mnist_all.csv']
+              options:
+                k: 3
+
     PERCEPTRON:
         run: ['metric']
         script: methods/matlab/perceptron.py
@@ -3198,4 +3213,4 @@ methods:
                        ['datasets/sickEuthyroid_train.csv', 'datasets/sickEuthyroid_test.csv', 'datasets/sickEuthyroid_labels.csv'],
                        ['datasets/abalone7_train.csv', 'datasets/abalone7_test.csv', 'datasets/abalone7_labels.csv'],
                        ['datasets/satellite_train.csv', 'datasets/satellite_test.csv', 'datasets/satellite_labels.csv'],
-                       ['datasets/ecoli_train.csv', 'datasets/ecoli_test.csv', 'datasets/ecoli_labels.csv'] ]
\ No newline at end of file
+                       ['datasets/ecoli_train.csv', 'datasets/ecoli_test.csv', 'datasets/ecoli_labels.csv'] ]
diff --git a/methods/matlab/LMNN.m b/methods/matlab/LMNN.m
new file mode 100644
index 0000000..c7f66ba
--- /dev/null
+++ b/methods/matlab/LMNN.m
@@ -0,0 +1,168 @@
+% @file lmnn.m
+
+function lmnn(cmd)
+% LMNN Learns a metric using large-margin nearest neighbor metric learning
+%
+% The function uses large-margin nearest neighbor (LMNN) metric learning to
+% learn a metric on the data set specified by the NxD matrix X and the
+% corresponding Nx1 vector labels. The metric is returned in M.
+%
+% Required options:
+%     (-i) [string]    Input dataset to perform PLMNNCA on.
+% Options:
+%     (-k) [int]       Desired number of targets.
+%
+%
+% This file is part of the Matlab Toolbox for Dimensionality Reduction.
+% The toolbox can be obtained from http://homepage.tudelft.nl/19j49
+% You are free to use, change, or redistribute this code in any way you
+% want for non-commercial purposes. However, it is appreciated if you 
+% maintain the name of the original author.
+%
+% (C) Laurens van der Maaten, Delft University of Technology
+
+    inputFile = regexp(cmd, '.*?-i ([^\s]+)', 'tokens', 'once');
+    
+    % Load input dataset.
+    X = csvread(inputFile{:});
+		
+    % Use the last row of the data as the labels.
+    labels = X(:,end);
+    % Remove the label row.
+    X = X(:,1:end-1);
+		
+		% Variable K can't be used
+    %K = regexp(cmd, '.*?-k ([^\s]+)', 'tokens', 'once');
+    %K = str2num(K{1});
+
+    total_time = tic;
+
+    % Initialize some variables
+    [N, D] = size(X);
+    assert(length(labels) == N);
+    [lablist, ~, labels] = unique(labels);
+    K = length(lablist);
+    label_matrix = false(N, K);
+    label_matrix(sub2ind(size(label_matrix), (1:length(labels))', labels)) = true;
+    same_label = logical(double(label_matrix) * double(label_matrix'));
+    M = eye(D);
+    C = Inf; prev_C = Inf;
+    
+    % Set learning parameters
+    min_iter = 50;          % minimum number of iterations
+    max_iter = 1000;        % maximum number of iterations
+    eta = .1;               % learning rate
+    mu = .5;                % weighting of pull and push terms
+    tol = 1e-3;             % tolerance for convergence
+    best_C = Inf;           % best error obtained so far
+    best_M = M;             % best metric found so far
+    no_targets = 3;         % number of target neighbors
+    
+    % Select target neighbors
+    sum_X = sum(X .^ 2, 2);
+    DD = bsxfun(@plus, sum_X, bsxfun(@plus, sum_X', -2 * (X * X')));
+    DD(~same_label) = Inf; DD(1:N + 1:end) = Inf;
+    [~, targets_ind] = sort(DD, 2, 'ascend');
+    targets_ind = targets_ind(:,1:no_targets);
+    targets = false(N, N);
+    targets(sub2ind([N N], vec(repmat((1:N)', [1 no_targets])), vec(targets_ind))) = true;
+    
+    % Compute pulling term between target neigbhors to initialize gradient
+    slack = zeros(N, N, no_targets);        
+    G = zeros(D, D);
+    for i=1:no_targets
+        G = G + (1 - mu) .* (X - X(targets_ind(:,i),:))' * (X - X(targets_ind(:,i),:));
+    end
+    
+    % Perform main learning iterations
+    iter = 0;
+    while (prev_C - C > tol || iter < min_iter) && iter < max_iter
+        
+        % Compute pairwise distances under current metric
+        XM = X * M;
+        sum_X = sum(XM .* X, 2);
+        DD = bsxfun(@plus, sum_X, bsxfun(@plus, sum_X', -2 * (XM * X')));
+        
+        % Compute value of slack variables
+        old_slack = slack;
+        for i=1:no_targets
+            slack(:,:,i) = ~same_label .* max(0, bsxfun(@minus, 1 + DD(sub2ind([N N], (1:N)', targets_ind(:,i))), DD));
+        end
+        
+        % Compute value of cost function
+        prev_C = C;
+        C = (1 - mu) .* sum(DD(targets)) + ...  % push terms between target neighbors
+                 mu  .* sum(slack(:));          % pull terms between impostors
+        
+        % Maintain best solution found so far (subgradient method)
+        if C < best_C
+            best_C = C;
+            best_M = M;
+        end
+        
+        % Perform gradient update
+        for i=1:no_targets
+            
+            % Add terms for new violations
+            [r, c] = find(slack(:,:,i) > 0 & old_slack(:,:,i) == 0);
+            G = G + mu .* ((X(r,:) - X(targets_ind(r, i),:))' * ...
+                           (X(r,:) - X(targets_ind(r, i),:)) - ...
+                           (X(r,:) - X(c,:))' * (X(r,:) - X(c,:)));
+            
+            % Remove terms for resolved violations
+            [r, c] = find(slack(:,:,i) == 0 & old_slack(:,:,i) > 0);
+            G = G - mu .* ((X(r,:) - X(targets_ind(r, i),:))' * ...
+                           (X(r,:) - X(targets_ind(r, i),:)) - ...
+                           (X(r,:) - X(c,:))' * (X(r,:) - X(c,:)));
+        end
+        M = M - (eta ./ N) .* G;
+        
+        % Project metric back onto the PSD cone
+        [V, L] = eig(M);
+        V = real(V); L = real(L);
+        ind = find(diag(L) > 0);
+        if isempty(ind)
+            warning('Projection onto PSD cone failed. All eigenvalues were negative.'); break
+        end
+        M = V(:,ind) * L(ind, ind) * V(:,ind)';
+        if any(isinf(M(:)))
+            warning('Projection onto PSD cone failed. Metric contains Inf values.'); break
+        end
+        if any(isnan(M(:)))
+            warning('Projection onto PSD cone failed. Metric contains NaN values.'); break
+        end
+        
+        % Update learning rate
+        if prev_C > C
+            eta = eta * 1.01;
+        else
+            eta = eta * .5;
+        end
+        
+        % Print out progress
+        iter = iter + 1;
+        no_slack = sum(slack(:) > 0);
+        if rem(iter, 10) == 0
+            [~, sort_ind] = sort(DD, 2, 'ascend');
+            disp(['Iteration ' num2str(iter) ': error is ' num2str(C ./ N) ...
+                  ', nearest neighbor error is ' num2str(sum(labels(sort_ind(:,2)) ~= labels) ./ N) ...
+                  ', number of constraints: ' num2str(no_slack)]);
+        end
+    end
+    
+    % Return best metric and error
+    M = best_M;
+    C = best_C;
+    
+    % Compute mapped data
+    [L, S, ~] = svd(M);
+    L = bsxfun(@times, sqrt(diag(S)), L);
+    disp(sprintf('[INFO ]   total_time: %fs', toc(total_time)))
+
+    % Save learned distance.
+		csvwrite('distance.csv', L);
+end
+
+function x = vec(x)
+    x = x(:);
+end
diff --git a/methods/matlab/lmnn.py b/methods/matlab/lmnn.py
new file mode 100644
index 0000000..cca2054
--- /dev/null
+++ b/methods/matlab/lmnn.py
@@ -0,0 +1,146 @@
+'''
+  @file lmnn.py
+  @author Manish Kumar
+
+  Class to benchmark the matlab Large Margin Nearest Neighbors method.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+#Import the metrics definitions path.
+metrics_folder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../metrics")))
+if metrics_folder not in sys.path:
+  sys.path.insert(0, metrics_folder)
+
+from log import *
+from profiler import *
+from definitions import *
+
+import shlex
+import subprocess
+import re
+import collections
+
+'''
+This class implements the Large Margin Nearest Neighbors benchmark.
+'''
+class LMNN(object):
+
+  '''
+  Create the Large Margin Nearest Neighbors benchmark instance.
+
+  @param dataset - Input dataset to perform Logistic Regression on.
+  @param timeout - The time until the timeout. Default no timeout.
+  @param path - Path to the matlab binary.
+  @param verbose - Display informational messages.
+  '''
+  def __init__(self, dataset, timeout=0, path=os.environ["MATLAB_BIN"],
+      verbose=True):
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+    self.timeout = timeout
+    self.k = 1
+
+  '''
+  Destructor to clean up at the end. Use this method to remove created file.
+  '''
+  def __del__(self):
+    Log.Info("Clean up.", self.verbose)
+    filelist = ["distance.csv"]
+    for f in filelist:
+      if os.path.isfile(f):
+        os.remove(f)
+
+  '''
+  Large Margin Nearest Neighbors benchmark instance. If the method has been
+  successfully completed return the elapsed time in seconds.
+
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or a negative value if the method was not
+  successful.
+  '''
+  def RunMetrics(self, options):
+    Log.Info("Perform Large Margin Nearest Neighbors.", self.verbose)
+
+    if "k" in options:
+      self.k = int(options.pop("k"))
+
+    # No options accepted for this script.
+    if len(options) > 0:
+      Log.Fatal("Unknown parameters: " + str(options))
+      raise Exception("unknown parameters")
+
+    inputCmd = "-i " + self.dataset + " -k " + str(self.k)
+
+    # Split the command using shell-like syntax.
+    cmd = shlex.split(self.path + "matlab -nodisplay -nosplash -r \"try, " +
+        "LMNN('"  + inputCmd + "'), catch, exit(1), end, exit(0)\"")
+
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disable all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
+          timeout=self.timeout)
+    except subprocess.TimeoutExpired as e:
+      Log.Warn(str(e))
+      return -2
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Datastructure to store the results.
+    metrics = {}
+
+    # Parse data: runtime.
+    timer = self.parseTimer(s)
+    
+    if timer != -1:
+      metrics['Runtime'] = timer.total_time
+      distance = np.genfromtxt("distance.csv", delimiter = ',')
+      data = np.genfromtxt(self.dataset, delimiter=',')
+
+      dataList = [data[:,:-1], data[:, (data.shape[1] - 1)]]
+      metrics['Accuracy_1_NN'] = Metrics.KNNAccuracy(distance, dataList, 1, False)
+      metrics['Accuracy_3_NN'] = Metrics.KNNAccuracy(distance, dataList, 3, False)
+      metrics['Accuracy_3_NN_DW'] = Metrics.KNNAccuracy(distance, dataList, 3, True)
+      metrics['Accuracy_5_NN'] = Metrics.KNNAccuracy(distance, dataList, 5, False)
+      metrics['Accuracy_5_NN_DW'] = Metrics.KNNAccuracy(distance, dataList, 5, True)
+
+      Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
+
+    return metrics
+
+  '''
+  Parse the timer data form a given string.
+
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data or -1 in case of an error.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.compile(br"""
+        .*?total_time: (?P<total_time>.*?)s.*?
+        """, re.VERBOSE|re.MULTILINE|re.DOTALL)
+
+    match = pattern.match(data)
+    if not match:
+      Log.Fatal("Can't parse the data: wrong format")
+      return -1
+    else:
+      # Create a namedtuple and return the timer data.
+      timer = collections.namedtuple("timer", ["total_time"])
+
+      return timer(float(match.group("total_time")))
+

From 9e47ebae3543e2c9f16e6def392c81e778e7f938 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Thu, 21 Jun 2018 14:14:14 +0530
Subject: [PATCH 13/15] typo rectify

---
 methods/matlab/LMNN.m | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/methods/matlab/LMNN.m b/methods/matlab/LMNN.m
index c7f66ba..69c534e 100644
--- a/methods/matlab/LMNN.m
+++ b/methods/matlab/LMNN.m
@@ -8,7 +8,7 @@ function lmnn(cmd)
 % corresponding Nx1 vector labels. The metric is returned in M.
 %
 % Required options:
-%     (-i) [string]    Input dataset to perform PLMNNCA on.
+%     (-i) [string]    Input dataset to perform LMNN on.
 % Options:
 %     (-k) [int]       Desired number of targets.
 %
@@ -31,9 +31,9 @@ function lmnn(cmd)
     % Remove the label row.
     X = X(:,1:end-1);
 		
-		% Variable K can't be used
-    %K = regexp(cmd, '.*?-k ([^\s]+)', 'tokens', 'once');
-    %K = str2num(K{1});
+    % Variable K can't be used
+    % K = regexp(cmd, '.*?-k ([^\s]+)', 'tokens', 'once');
+    % K = str2num(K{1});
 
     total_time = tic;
 

From 4d27754099c5808a5aa7bbe80afad71738b7a809 Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Wed, 27 Jun 2018 14:15:40 +0530
Subject: [PATCH 14/15] removed a parameter

---
 methods/mlpack/lmnn.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index dbd0b01..a225de5 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -106,8 +106,6 @@ def OptionsToStr(self, options):
       optionsStr = optionsStr + " -r " + str(options.pop("regularization"))
     if "tolerance" in options:
       optionsStr = optionsStr + " -t " + str(options.pop("tolerance"))
-    if "batch_delta" in options:
-      optionsStr = optionsStr + " -d " + str(options.pop("batch_delta"))
     if "range" in options:
       optionsStr = optionsStr + " -R " + str(options.pop("range"))
     if "step_size" in options:

From e993b25eb43dcc408a7ca6dc57307abfaa30dada Mon Sep 17 00:00:00 2001
From: Manish <manish887kr@gmail.com>
Date: Thu, 28 Jun 2018 15:50:45 +0530
Subject: [PATCH 15/15] removed extra L-BFGS related parameters

---
 config.yaml            | 1 -
 methods/mlpack/lmnn.py | 6 ++----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/config.yaml b/config.yaml
index 2ec3580..34bdf1d 100644
--- a/config.yaml
+++ b/config.yaml
@@ -850,7 +850,6 @@ methods:
                 max_iterations: 2000
                 optimizer: lbfgs
                 seed: 42
-                wolfe: 0.5
                 range: 50
 
             - files: ['datasets/covtype.csv',
diff --git a/methods/mlpack/lmnn.py b/methods/mlpack/lmnn.py
index a225de5..6936740 100644
--- a/methods/mlpack/lmnn.py
+++ b/methods/mlpack/lmnn.py
@@ -116,10 +116,8 @@ def OptionsToStr(self, options):
       optionsStr = optionsStr + " -p " + str(options.pop("passes"))
     if "max_iterations" in options:
       optionsStr = optionsStr + " -n " + str(options.pop("max_iterations"))
-    if "num_basis" in options:
-      optionsStr = optionsStr + " -B " + str(options.pop("num_basis"))
-    if "wolfe" in options:
-      optionsStr = optionsStr + " -w " + str(options.pop("wolfe"))
+    if "rank" in options:
+      optionsStr = optionsStr + " -A " + str(options.pop("rank"))
     if "normalize" in options:
       optionsStr = optionsStr + " -N"
       options.pop("normalize")