JuliaML
diff --git a/‎docs/src/advanced/developer.md‎
Lines changed: 1 addition & 3 deletions b/‎docs/src/advanced/developer.md‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎docs/src/advanced/extend.md‎
Lines changed: 6 additions & 6 deletions b/‎docs/src/advanced/extend.md‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎docs/src/introduction/gettingstarted.md‎
Lines changed: 4 additions & 4 deletions b/‎docs/src/introduction/gettingstarted.md‎
Lines changed: 4 additions & 4 deletions
diff --git a/‎docs/src/user/aggregate.md‎
Lines changed: 7 additions & 4 deletions b/‎docs/src/user/aggregate.md‎
Lines changed: 7 additions & 4 deletions
diff --git a/‎docs/src/user/interface.md‎
Lines changed: 9 additions & 96 deletions b/‎docs/src/user/interface.md‎
Lines changed: 9 additions & 96 deletions
diff --git a/‎src/LossFunctions.jl‎
Lines changed: 1 addition & 1 deletion b/‎src/LossFunctions.jl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/losses.jl‎
Lines changed: 4 additions & 7 deletions b/‎src/losses.jl‎
Lines changed: 4 additions & 7 deletions
diff --git a/‎src/losses/distance.jl‎
Lines changed: 10 additions & 10 deletions b/‎src/losses/distance.jl‎
Lines changed: 10 additions & 10 deletions
@@ -60,9 +60,7 @@ MarginLoss
 
 Each of the three abstract types listed above serves a purpose
 other than dispatch. All losses that belong to the same family
-share functionality to some degree. For example all subtypes of
-[`SupervisedLoss`](@ref) share the same implementations for the
-vectorized versions of [`value`](@ref) and [`deriv`](@ref).
+share functionality to some degree.
 
 More interestingly, the abstract types [`DistanceLoss`](@ref) and
 [`MarginLoss`](@ref), serve an additional purpose aside from
 
@@ -42,10 +42,10 @@ ScaledLoss
 julia> lsloss = 1/2 * L2DistLoss()
 ScaledLoss{L2DistLoss, 0.5}(L2DistLoss())
 
-julia> value(L2DistLoss(), 4.0, 0.0)
+julia> L2DistLoss()(4.0, 0.0)
 16.0
 
-julia> value(lsloss, 4.0, 0.0)
+julia> lsloss(4.0, 0.0)
 8.0
 ```
 
@@ -102,16 +102,16 @@ WeightedMarginLoss
 julia> myloss = WeightedMarginLoss(HingeLoss(), 0.8)
 WeightedMarginLoss{L1HingeLoss, 0.8}(L1HingeLoss())
 
-julia> value(myloss, -4.0, 1.0) # positive class
+julia> myloss(-4.0, 1.0) # positive class
 4.0
 
-julia> value(HingeLoss(), -4.0, 1.0)
+julia> HingeLoss()(-4.0, 1.0)
 5.0
 
-julia> value(myloss, 4.0, -1.0) # negative class
+julia> myloss(4.0, -1.0) # negative class
 0.9999999999999998
 
-julia> value(HingeLoss(), 4.0, -1.0)
+julia> HingeLoss()(4.0, -1.0)
 5.0
 ```
 
 
@@ -60,12 +60,12 @@ From an implementation perspective, we should point out that all
 the concrete loss "functions" that this package provides are
 actually defined as immutable types, instead of native Julia
 functions. We can compute the value of some type of loss using
-the function [`value`](@ref). Let us start with an example of how
+the functor interface. Let us start with an example of how
 to compute the loss of a single observation (i.e. two numbers).
 
 ```julia-repl
-#                loss       ŷ    y
-julia> value(L2DistLoss(), 0.5, 1.0)
+#         loss       ŷ    y
+julia> L2DistLoss()(0.5, 1.0)
 0.25
 ```
 
@@ -78,7 +78,7 @@ julia> true_targets = [  1,  0, -2];
 
 julia> pred_outputs = [0.5,  2, -1];
 
-julia> value.(L2DistLoss(), pred_outputs, true_targets)
+julia> L2DistLoss().(pred_outputs, true_targets)
 3-element Vector{Float64}:
  0.25
  4.0
 
@@ -34,18 +34,21 @@ say "naive", because it will not give us an acceptable
 performance.
 
 ```jldoctest
-julia> value.(L1DistLoss(), [2,5,-2], [1.,2,3])
+julia> loss = L1DistLoss()
+L1DistLoss()
+
+julia> loss.([2,5,-2], [1.,2,3])
 3-element Vector{Float64}:
  1.0
  3.0
  5.0
 
-julia> sum(value.(L1DistLoss(), [2,5,-2], [1.,2,3])) # WARNING: Bad code
+julia> sum(loss.([2,5,-2], [1.,2,3])) # WARNING: Bad code
 9.0
 ```
 
 This works as expected, but there is a price for it. Before the
-sum can be computed, [`value`](@ref) will allocate a temporary
+sum can be computed, the solution will allocate a temporary
 array and fill it with the element-wise results. After that,
 `sum` will iterate over this temporary array and accumulate the
 values accordingly. Bottom line: we allocate temporary memory
@@ -82,7 +85,7 @@ the results, we will see that the loss of the second observation
 was effectively counted twice.
 
 ```jldoctest
-julia> result = value.(L1DistLoss(), [2,5,-2], [1.,2,3]) .* [1,2,1]
+julia> result = L1DistLoss().([2,5,-2], [1.,2,3]) .* [1,2,1]
 3-element Vector{Float64}:
  1.0
  6.0
 
@@ -49,7 +49,7 @@ than one place.
 julia> loss = L2DistLoss()
 L2DistLoss()
 
-julia> value(loss, 3, 2)
+julia> loss(3, 2)
 1
 ```
 
@@ -66,9 +66,9 @@ yourself in the code below. As such they are zero-cost
 abstractions.
 
 ```julia-repl
-julia> v1(loss,y,t) = value(loss,y,t)
+julia> v1(loss,y,t) = loss(y,t)
 
-julia> v2(y,t) = value(L2DistLoss(),y,t)
+julia> v2(y,t) = L2DistLoss()(y,t)
 
 julia> @code_llvm v1(loss, 3, 2)
 define i64 @julia_v1_70944(i64, i64) #0 {
@@ -115,46 +115,17 @@ performance overhead, and zero memory allocations on the heap.
 
 The first thing we may want to do is compute the loss for some
 observation (singular). In fact, all losses are implemented on
-single observations under the hood. The core function to compute
-the value of a loss is `value`. We will see throughout the
-documentation that this function allows for a lot of different
-method signatures to accomplish a variety of tasks.
-
-```@docs
-value
-```
-
-It may be interesting to note, that this function also supports
-broadcasting and all the syntax benefits that come with it. Thus,
-it is quite simple to make use of preallocated memory for storing
-the element-wise results.
+single observations under the hood, and are functors.
 
 ```jldoctest bcast1
-julia> value.(L1DistLoss(), [2,5,-2], [1,2,3])
+julia> loss = L1DistLoss()
+L1DistLoss()
+
+julia> loss.([2,5,-2], [1,2,3])
 3-element Vector{Int64}:
  1
  3
  5
-
-julia> buffer = zeros(3); # preallocate a buffer
-
-julia> buffer .= value.(L1DistLoss(), [2,5,-2], [1.,2,3])
-3-element Vector{Float64}:
- 1.0
- 3.0
- 5.0
-```
-
-Furthermore, with the loop fusion changes that were introduced in
-Julia 0.6, one can also easily weight the influence of each
-observation without allocating a temporary array.
-
-```jldoctest bcast1
-julia> buffer .= value.(L1DistLoss(), [2,5,-2], [1.,2,3]) .* [2,1,0.5]
-3-element Vector{Float64}:
- 2.0
- 3.0
- 2.5
 ```
 
 ## Computing the 1st Derivatives
@@ -166,8 +137,7 @@ derivatives of the loss in one way or the other during the
 training process.
 
 To compute the derivative of some loss we expose the function
-[`deriv`](@ref). It supports the same exact method signatures as
-[`value`](@ref). It may be interesting to note explicitly, that
+[`deriv`](@ref). It may be interesting to note explicitly, that
 we always compute the derivative in respect to the predicted
 `output`, since we are interested in deducing in which direction
 the output should change.
@@ -176,39 +146,6 @@ the output should change.
 deriv
 ```
 
-Similar to [`value`](@ref), this function also supports
-broadcasting and all the syntax benefits that come with it. Thus,
-one can make use of preallocated memory for storing the
-element-wise derivatives.
-
-```jldoctest bcast2
-julia> deriv.(L2DistLoss(), [2,5,-2], [1,2,3])
-3-element Vector{Int64}:
-   2
-   6
- -10
-
-julia> buffer = zeros(3); # preallocate a buffer
-
-julia> buffer .= deriv.(L2DistLoss(), [2,5,-2], [1.,2,3])
-3-element Vector{Float64}:
-   2.0
-   6.0
- -10.0
-```
-
-Furthermore, with the loop fusion changes that were introduced in
-Julia 0.6, one can also easily weight the influence of each
-observation without allocating a temporary array.
-
-```jldoctest bcast2
-julia> buffer .= deriv.(L2DistLoss(), [2,5,-2], [1.,2,3]) .* [2,1,0.5]
-3-element Vector{Float64}:
-  4.0
-  6.0
- -5.0
-```
-
 ## Computing the 2nd Derivatives
 
 Additionally to the first derivative, we also provide the
@@ -220,30 +157,6 @@ derivative in respect to the predicted `output`.
 deriv2
 ```
 
-Just like [`deriv`](@ref) and [`value`](@ref), this function also
-supports broadcasting and all the syntax benefits that come with
-it. Thus, one can make use of preallocated memory for storing the
-element-wise derivatives.
-
-```jldoctest
-julia> deriv2.(LogitDistLoss(), [0.3, 2.3, -2], [-0.5, 1.2, 3])
-3-element Vector{Float64}:
- 0.42781939304058886
- 0.3747397590950413
- 0.013296113341580313
-
-julia> buffer = zeros(3); # preallocate a buffer
-
-julia> buffer .= deriv2.(LogitDistLoss(), [0.3, 2.3, -2], [-0.5, 1.2, 3])
-3-element Vector{Float64}:
- 0.42781939304058886
- 0.3747397590950413
- 0.013296113341580313
-```
-
-Furthermore [`deriv2`](@ref) supports all the same method
-signatures as [`deriv`](@ref) does.
-
 ## Properties of a Loss
 
 In some situations it can be quite useful to assert certain
 
@@ -21,7 +21,7 @@ export
     SupervisedLoss,
     MarginLoss,
     DistanceLoss,
-    value, deriv, deriv2,
+    deriv, deriv2,
     isdistancebased, ismarginbased,
     isminimizable, isdifferentiable,
     istwicedifferentiable,
 
@@ -1,17 +1,14 @@
 # type alias to make code more readable
 Scalar = Union{Number,CategoricalValue}
 
-# convenient functor interface
-(loss::SupervisedLoss)(output::Scalar, target::Scalar) = value(loss, output, target)
-
 # fallback to unary evaluation
-value(loss::DistanceLoss, output::Number, target::Number)  = value(loss, output - target)
+(loss::DistanceLoss)(output::Number, target::Number)       = loss(output - target)
 deriv(loss::DistanceLoss, output::Number, target::Number)  = deriv(loss, output - target)
 deriv2(loss::DistanceLoss, output::Number, target::Number) = deriv2(loss, output - target)
 
-value(loss::MarginLoss, output::Number, target::Number)  = value(loss, target * output)
-deriv(loss::MarginLoss, output::Number, target::Number)  = target * deriv(loss, target * output)
-deriv2(loss::MarginLoss, output::Number, target::Number) = deriv2(loss, target * output)
+(loss::MarginLoss)(output::Number, target::Number)         = loss(target * output)
+deriv(loss::MarginLoss, output::Number, target::Number)    = target * deriv(loss, target * output)
+deriv2(loss::MarginLoss, output::Number, target::Number)   = deriv2(loss, target * output)
 
 # broadcasting behavior
 Broadcast.broadcastable(loss::SupervisedLoss) = Ref(loss)
 
@@ -13,7 +13,7 @@ struct LPDistLoss{P} <: DistanceLoss end
 
 LPDistLoss(p::Number) = LPDistLoss{p}()
 
-value(loss::LPDistLoss{P}, difference::Number) where {P} = abs(difference)^P
+(loss::LPDistLoss{P})(difference::Number) where {P} = abs(difference)^P
 function deriv(loss::LPDistLoss{P}, difference::T)::promote_type(typeof(P),T) where {P,T<:Number}
     if difference == 0
         zero(difference)
@@ -73,7 +73,7 @@ L(r) = |r|
 """
 const L1DistLoss = LPDistLoss{1}
 
-value(loss::L1DistLoss, difference::Number) = abs(difference)
+(loss::L1DistLoss)(difference::Number) = abs(difference)
 deriv(loss::L1DistLoss, difference::T) where {T<:Number} = convert(T, sign(difference))
 deriv2(loss::L1DistLoss, difference::T) where {T<:Number} = zero(T)
 
@@ -118,7 +118,7 @@ L(r) = |r|^2
 """
 const L2DistLoss = LPDistLoss{2}
 
-value(loss::L2DistLoss, difference::Number) = abs2(difference)
+(loss::L2DistLoss)(difference::Number) = abs2(difference)
 deriv(loss::L2DistLoss, difference::T) where {T<:Number} = convert(T,2) * difference
 deriv2(loss::L2DistLoss, difference::T) where {T<:Number} = convert(T,2)
 
@@ -152,7 +152,7 @@ end
 PeriodicLoss(circ::T=1.0) where {T<:AbstractFloat} = PeriodicLoss{T}(circ)
 PeriodicLoss(circ) = PeriodicLoss{Float64}(Float64(circ))
 
-value(loss::PeriodicLoss, difference::T) where {T<:Number} = 1 - cos(difference*loss.k)
+(loss::PeriodicLoss)(difference::T) where {T<:Number} = 1 - cos(difference*loss.k)
 deriv(loss::PeriodicLoss, difference::T) where {T<:Number} = loss.k * sin(difference*loss.k)
 deriv2(loss::PeriodicLoss, difference::T) where {T<:Number} = abs2(loss.k) * cos(difference*loss.k)
 
@@ -207,7 +207,7 @@ end
 HuberLoss(d::T=1.0) where {T<:AbstractFloat} = HuberLoss{T}(d)
 HuberLoss(d) = HuberLoss{Float64}(Float64(d))
 
-function value(loss::HuberLoss{T1}, difference::T2) where {T1,T2<:Number}
+function (loss::HuberLoss{T1})(difference::T2) where {T1,T2<:Number}
     T = promote_type(T1,T2)
     abs_diff = abs(difference)
     if abs_diff <= loss.d
@@ -282,7 +282,7 @@ const EpsilonInsLoss = L1EpsilonInsLoss
 @inline L1EpsilonInsLoss(ε::T) where {T<:AbstractFloat} = L1EpsilonInsLoss{T}(ε)
 @inline L1EpsilonInsLoss(ε::Number) = L1EpsilonInsLoss{Float64}(Float64(ε))
 
-function value(loss::L1EpsilonInsLoss{T1}, difference::T2) where {T1,T2<:Number}
+function (loss::L1EpsilonInsLoss{T1})(difference::T2) where {T1,T2<:Number}
     T = promote_type(T1,T2)
     max(zero(T), abs(difference) - loss.ε)
 end
@@ -344,7 +344,7 @@ end
 L2EpsilonInsLoss(ε::T) where {T<:AbstractFloat} = L2EpsilonInsLoss{T}(ε)
 L2EpsilonInsLoss(ε) = L2EpsilonInsLoss{Float64}(Float64(ε))
 
-function value(loss::L2EpsilonInsLoss{T1}, difference::T2) where {T1,T2<:Number}
+function (loss::L2EpsilonInsLoss{T1})(difference::T2) where {T1,T2<:Number}
     T = promote_type(T1,T2)
     abs2(max(zero(T), abs(difference) - loss.ε))
 end
@@ -399,7 +399,7 @@ L(r) = - \ln \frac{4 e^r}{(1 + e^r)^2}
 """
 struct LogitDistLoss <: DistanceLoss end
 
-function value(loss::LogitDistLoss, difference::Number)
+function (loss::LogitDistLoss)(difference::Number)
     er = exp(difference)
     T = typeof(er)
     -log(convert(T,4)) - difference + 2log(one(T) + er)
@@ -458,7 +458,7 @@ struct QuantileLoss{T <: AbstractFloat} <: DistanceLoss
     τ::T
 end
 
-function value(loss::QuantileLoss{T1}, diff::T2) where {T1, T2 <: Number}
+function (loss::QuantileLoss{T1})(diff::T2) where {T1, T2 <: Number}
     T = promote_type(T1, T2)
     diff * (convert(T,diff > 0) - loss.τ)
 end
@@ -512,7 +512,7 @@ struct LogCoshLoss <: DistanceLoss end
 _softplus(x::T) where T<:Number = x > zero(T) ? x + log1p(exp(-x)) : log1p(exp(x))
 _log_cosh(x::T) where T<:Number = x + _softplus(-2x) - log(convert(T, 2))
 
-function value(loss::LogCoshLoss, diff::T) where {T <: Number}
+function (loss::LogCoshLoss)(diff::T) where {T <: Number}
   _log_cosh(diff)
 end