mlpack
diff --git a/‎Makefile
Lines changed: 1 addition & 0 deletions b/‎Makefile
Lines changed: 1 addition & 0 deletions
diff --git a/‎config.yaml
Lines changed: 180 additions & 28 deletions b/‎config.yaml
Lines changed: 180 additions & 28 deletions
diff --git a/‎libraries/dtimeout_install.sh
Lines changed: 25 additions & 0 deletions b/‎libraries/dtimeout_install.sh
Lines changed: 25 additions & 0 deletions
diff --git a/‎libraries/install_all.sh
Lines changed: 6 additions & 0 deletions b/‎libraries/install_all.sh
Lines changed: 6 additions & 0 deletions
diff --git a/‎libraries/install_r_packages.r
Lines changed: 9 additions & 0 deletions b/‎libraries/install_r_packages.r
Lines changed: 9 additions & 0 deletions
diff --git a/‎libraries/package-urls.txt
Lines changed: 1 addition & 0 deletions b/‎libraries/package-urls.txt
Lines changed: 1 addition & 0 deletions
diff --git a/‎libraries/r_install.sh
Lines changed: 2 additions & 0 deletions b/‎libraries/r_install.sh
Lines changed: 2 additions & 0 deletions
diff --git a/‎methods/R/adaboost.py
Lines changed: 131 additions & 0 deletions b/‎methods/R/adaboost.py
Lines changed: 131 additions & 0 deletions
diff --git a/‎methods/R/adaboost.r
Lines changed: 38 additions & 0 deletions b/‎methods/R/adaboost.r
Lines changed: 38 additions & 0 deletions
diff --git a/‎methods/R/dtc.py
Lines changed: 140 additions & 0 deletions b/‎methods/R/dtc.py
Lines changed: 140 additions & 0 deletions
@@ -187,6 +187,7 @@ endif
 	# git version of mlpack is used.)
 	#cd methods/mlpack/src/ && ./build_scripts.sh
 	# Compile the DLIBML scripts.
+	g++ -O2 -std=c++11 methods/dlibml/src/SVM.cpp -o methods/dlibml/dlibml_svm -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack 
 	g++ -O2 -std=c++11 methods/dlibml/src/ANN.cpp -o methods/dlibml/dlibml_ann -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack
 	g++ -O2 -std=c++11 methods/dlibml/src/ALLKNN.cpp -o methods/dlibml/dlibml_allknn -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack
 	g++ -O2 -std=c++11 methods/dlibml/src/KMEANS.cpp -o methods/dlibml/dlibml_kmeans -I"$(INCLUDEPATH)" -L"$(LIBPATH)" -ldlib -lmlpack -lboost_program_options -lblas -llapack 
 
@@ -0,0 +1,25 @@
+#!/bin/bash
+#
+# Wrapper script to unpack and build dtimeout.
+#
+# Include files will be installed to ../include/.
+# Library files will be installed to ../lib/.
+#
+# One dtimeout.tar.gz file should be located in this directory.
+tars=`ls dtimeout.tar.gz | wc -l`;
+if [ "$tars" -eq "0" ];
+then
+  echo "No source dtimeout.tar.gz found in libraries/!"
+  exit 1
+fi
+
+# Remove any old directory.
+rm -rf dtimeout/
+mkdir dtimeout/
+tar -xzpf dtimeout.tar.gz --strip-components=1 -C dtimeout/
+
+cd dtimeout/
+python3 setup.py build
+PYVER=`python3 -c 'import sys; print("python" + sys.version[0:3])'`;
+mkdir -p ../lib/$PYVER/site-packages/
+PYTHONPATH=../lib/$PYVER/site-packages/ python3 setup.py install --prefix=../ -O2
@@ -85,3 +85,9 @@ if [ "$?" -ne "0" ]; then
   echo "Error installing R!";
   exit 1;
 fi
+
+./dtimeout_install.sh $1
+if [ "$?" -ne "0" ]; then
+  echo "Error installing R!";
+  exit 1;
+fi
@@ -0,0 +1,9 @@
+install.packages('mlr', repos='http://cran.us.r-project.org')
+install.packages('tictoc', repos='http://cran.us.r-project.org')
+install.packages('LiblineaR', repos='http://cran.us.r-project.org')
+install.packages('adabag', repos='http://cran.us.r-project.org')
+install.packages('rpart', repos='http://cran.us.r-project.org')
+install.packages('class', repos='http://cran.us.r-project.org')
+install.packages('randomForest', repos='http://cran.us.r-project.org')
+install.packages('e1071', repos='http://cran.us.r-project.org')
+install.packages('penalized', repos='http://cran.us.r-project.org')
@@ -1,3 +1,4 @@
+dtimeout https://github.com/pnpnpn/timeout-decorator/archive/master.tar.gz
 ann https://www.cs.umd.edu/~mount/ANN/Files/1.1.2/ann_1.1.2.tar.gz
 flann https://github.com/mariusmuja/flann/archive/1.9.1.tar.gz
 HLearn https://github.com/mikeizbicki/HLearn/archive/2.0.0.0.tar.gz
 
@@ -24,3 +24,5 @@ prefix_path="$(readlink -m ../)"
 ./configure --prefix=$prefix_path --enable-R-shlib
 make
 make install
+cd ..
+bin/Rscript install_r_packages.r
@@ -0,0 +1,131 @@
+'''
+  @file adaboost.py
+  Class to benchmark the R Adaboost method.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+#Import the metrics definitions path.
+metrics_folder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../metrics")))
+if metrics_folder not in sys.path:
+  sys.path.insert(0, metrics_folder)
+
+from log import *
+from profiler import *
+from definitions import *
+from misc import *
+
+import shlex
+import subprocess
+import re
+import collections
+import numpy as np
+
+'''
+This class implements the adaboost benchmark.
+'''
+class ADABOOST(object):
+
+  '''
+  Create the adaboost benchmark instance.
+  @param dataset - Input dataset to perform adaboost on.
+  @param timeout - The time until the timeout. Default no timeout.
+  @param path - Path to the R executable.
+  @param verbose - Display informational messages.
+  '''
+  def __init__(self, dataset, timeout=0, path=os.environ["R_PATH"],
+      verbose=True):
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+    self.timeout = timeout
+    
+  def __del__(self):
+    Log.Info("Clean up.", self.verbose)
+    filelist = ["predictions.csv", "log.txt"]
+    for f in filelist:
+      if os.path.isfile(f):
+        os.remove(f)
+
+  '''
+  Adaboost. If the method has been successfully completed return
+  the elapsed time in seconds.
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or a negative value if the method was not
+  successful.
+  '''
+  def RunMetrics(self, options):
+    Log.Info("Perform Adaboost.", self.verbose)
+    
+    opts = {}
+    
+    if "max_iterations" in options:
+      opts["max_iterations"] = int(options.pop("max_iterations"))
+    else:
+      opts["max_iterations"] = 100
+
+    if len(options) > 0:
+      Log.Fatal("Unknown parameters: " + str(options))
+      raise Exception("unknown parameters")
+
+    if len(self.dataset) < 2:
+      Log.Fatal("This method requires two or more datasets.")
+      return -1
+
+    # Split the command using shell-like syntax.
+    cmd = shlex.split("libraries/bin/Rscript " + self.path + "adaboost.r" +
+        " -t " + self.dataset[0] + " -T " + self.dataset[1] + " -m " + 
+	str(opts["max_iterations"]))
+
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disable all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
+          timeout=self.timeout)
+    except subprocess.TimeoutExpired as e:
+      Log.Warn(str(e))
+      return -2
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Datastructure to store the results.
+    metrics = {}
+    # Parse data: runtime.
+    timer = self.parseTimer(str(s))
+    if timer != -1:
+      metrics['Runtime'] = timer
+      predictions = np.genfromtxt("predictions.csv", delimiter = ',')
+      predictions = predictions[1:]
+      truelabels = np.genfromtxt(self.dataset[2], delimiter = ',')
+      confusionMatrix = Metrics.ConfusionMatrix(truelabels, predictions)
+      metrics['ACC'] = Metrics.AverageAccuracy(confusionMatrix)
+      metrics['MCC'] = Metrics.MCCMultiClass(confusionMatrix)
+      metrics['Precision'] = Metrics.AvgPrecision(confusionMatrix)
+      metrics['Recall'] = Metrics.AvgRecall(confusionMatrix)
+      metrics['MSE'] = Metrics.SimpleMeanSquaredError(truelabels, predictions)
+
+      Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
+
+    return metrics
+
+  '''
+  Parse the timer data form a given string.
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data or -1 in case of an error.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.findall("(\d+\.\d+). *sec elapsed", data)
+    return float(pattern[0])
@@ -0,0 +1,38 @@
+# Read the command line arguments in a vector.
+library(mlr)
+library(tictoc)
+myArgs <- commandArgs(trailingOnly = TRUE)
+
+trainFile <- myArgs[2]
+testFile <- myArgs[4]
+maxiter <- as.integer(myArgs[6])
+
+trainData <- read.csv(trainFile, header = FALSE, sep = ",")
+testData <- read.csv(testFile, header = FALSE, sep = ",")
+
+names = character()
+for ( i in 1:ncol(trainData) )
+{
+  names[length(names) + 1] = paste("V", toString(i), sep = "")
+}
+names(trainData) = names
+testData[, ncol(trainData)] = sample(0:1, size = nrow(testData), replace = T)
+names(testData) = names
+
+tar = paste("V", toString(ncol(trainData)), sep = "")
+
+tic()
+trainTask <- makeClassifTask(data = trainData, target = tar)
+testTask <- makeClassifTask(data = testData, target = tar)
+adaboost.learner <- makeLearner("classif.boosting", 
+				par.vals = list(mfinal = maxiter), 
+				predict.type = "response")
+fmodel <- train(adaboost.learner, trainTask)
+fpmodel <- predict(fmodel, testTask)
+toc(log = TRUE)
+
+out <- capture.output(tic.log(format = TRUE))
+cat(out, file="log.txt", append=FALSE)
+
+pred <- as.numeric(fpmodel$data$response)
+write.csv(pred, "predictions.csv", row.names = F)
@@ -0,0 +1,140 @@
+'''
+  @file dtc.py
+  Class to benchmark the R Decision Tree method.
+'''
+
+import os
+import sys
+import inspect
+
+# Import the util path, this method even works if the path contains symlinks to
+# modules.
+cmd_subfolder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../../util")))
+if cmd_subfolder not in sys.path:
+  sys.path.insert(0, cmd_subfolder)
+
+#Import the metrics definitions path.
+metrics_folder = os.path.realpath(os.path.abspath(os.path.join(
+  os.path.split(inspect.getfile(inspect.currentframe()))[0], "../metrics")))
+if metrics_folder not in sys.path:
+  sys.path.insert(0, metrics_folder)
+
+from log import *
+from profiler import *
+from definitions import *
+from misc import *
+
+import shlex
+import subprocess
+import re
+import collections
+import numpy as np
+
+'''
+This class implements the Decision Tree benchmark.
+'''
+class DTC(object):
+
+  '''
+  Create the Decision Tree benchmark instance.
+  @param dataset - Input dataset to perform DTC on.
+  @param timeout - The time until the timeout. Default no timeout.
+  @param path - Path to the R executable.
+  @param verbose - Display informational messages.
+  '''
+  def __init__(self, dataset, timeout=0, path=os.environ["R_PATH"],
+      verbose=True):
+    self.verbose = verbose
+    self.dataset = dataset
+    self.path = path
+    self.timeout = timeout
+    self.build_opts = {}
+    
+  def __del__(self):
+    Log.Info("Clean up.", self.verbose)
+    filelist = ["predictions.csv", "log.txt"]
+    for f in filelist:
+      if os.path.isfile(f):
+        os.remove(f)
+
+  '''
+  DTC. If the method has been successfully completed return
+  the elapsed time in seconds.
+  @param options - Extra options for the method.
+  @return - Elapsed time in seconds or a negative value if the method was not
+  successful.
+  '''
+  def RunMetrics(self, options):
+    Log.Info("Perform DTC.", self.verbose)
+
+    # Get all the parameters.
+    self.build_opts = {}
+    if "max_depth" in options:
+      self.build_opts["max_depth"] = int(options.pop("max_depth"))
+    else:
+      self.build_opts["max_depth"] = 30
+
+    if "minimum_samples_split" in options:
+      self.build_opts["min_samples_split"] = \
+	int(options.pop("minimum_samples_split"))
+    else:
+      self.build_opts["min_samples_split"] = 20
+
+
+    if len(options) > 0:
+      Log.Fatal("Unknown parameters: " + str(options))
+      raise Exception("unknown parameters")
+
+    if len(self.dataset) < 2:
+      Log.Fatal("This method requires two or more datasets.")
+      return -1
+
+    # Split the command using shell-like syntax.
+    cmd = shlex.split("libraries/bin/Rscript " + self.path + "dtc.r" +
+        " -t " + self.dataset[0] + " -T " +
+        self.dataset[1] + " -md " + str(self.build_opts["max_depth"]) + 
+        " -ms " + str(self.build_opts["min_samples_split"]) )
+
+    # Run command with the nessecary arguments and return its output as a byte
+    # string. We have untrusted input so we disable all shell based features.
+    try:
+      s = subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=False,
+          timeout=self.timeout)
+    except subprocess.TimeoutExpired as e:
+      Log.Warn(str(e))
+      return -2
+    except Exception as e:
+      Log.Fatal("Could not execute command: " + str(cmd))
+      return -1
+
+    # Datastructure to store the results.
+    metrics = {}
+    # Parse data: runtime.
+    timer = self.parseTimer(str(s))
+    if timer != -1:
+      metrics['Runtime'] = timer
+      predictions = np.genfromtxt("predictions.csv", delimiter = ',')
+      predictions = predictions[1:]
+      truelabels = np.genfromtxt(self.dataset[2], delimiter = ',')
+      confusionMatrix = Metrics.ConfusionMatrix(truelabels, predictions)
+      metrics['ACC'] = Metrics.AverageAccuracy(confusionMatrix)
+      metrics['MCC'] = Metrics.MCCMultiClass(confusionMatrix)
+      metrics['Precision'] = Metrics.AvgPrecision(confusionMatrix)
+      metrics['Recall'] = Metrics.AvgRecall(confusionMatrix)
+      metrics['MSE'] = Metrics.SimpleMeanSquaredError(truelabels, predictions)
+
+      Log.Info(("total time: %fs" % (metrics['Runtime'])), self.verbose)
+
+    return metrics
+
+  '''
+  Parse the timer data form a given string.
+  @param data - String to parse timer data from.
+  @return - Namedtuple that contains the timer data or -1 in case of an error.
+  '''
+  def parseTimer(self, data):
+    # Compile the regular expression pattern into a regular expression object to
+    # parse the timer data.
+    pattern = re.findall("(\d+\.\d+). *sec elapsed", data)
+    return float(pattern[0])
Original file line number	Diff line number	Diff line change
`@@ -1,3 +1,4 @@`
	`1`	`+dtimeout https://github.com/pnpnpn/timeout-decorator/archive/master.tar.gz`
`1`	`2`	`ann https://www.cs.umd.edu/~mount/ANN/Files/1.1.2/ann_1.1.2.tar.gz`
`2`	`3`	`flann https://github.com/mariusmuja/flann/archive/1.9.1.tar.gz`
`3`	`4`	`HLearn https://github.com/mikeizbicki/HLearn/archive/2.0.0.0.tar.gz`