Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@ language: scala
before_script:
- sudo chmod +x /usr/local/bin/sbt

scala:
- 2.11.8
- 2.12.4

# only trigger builds on master
branches:
only:
Expand Down
18 changes: 10 additions & 8 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@
import scalariform.formatter.preferences._

lazy val Benchmark = config("bench") extend Test

lazy val commonSettings = Seq(
organization := "org.gnieh",
name := "tekstlib",
version := "0.1.2-SNAPSHOT",
scalaVersion := "2.12.2",
crossScalaVersions := Seq("2.12.2", "2.11.8"),
scalaVersion := "2.12.4",
crossScalaVersions := Seq("2.12.4", "2.11.8"),
libraryDependencies += "org.scalatest" %% "scalatest" % "3.0.3" % "test",
libraryDependencies += "org.scodec" %% "scodec-bits" % "1.1.4",
scalacOptions in (Compile, doc) ++= Seq("-doc-root-content", "rootdoc.txt"),
Expand All @@ -14,9 +16,9 @@ lazy val commonSettings = Seq(
homepage := Some(url("https://github.com/gnieh/tekstlib")))

lazy val root = project.in(file("."))
.enablePlugins(SbtOsgi)
.settings(commonSettings)
.settings(osgiSettings)
.settings(scalariformSettings)
.settings(
resourceDirectories in Compile := List(),
OsgiKeys.exportPackage := Seq(
Expand All @@ -26,12 +28,12 @@ lazy val root = project.in(file("."))
"Bundle-Name" -> "Gnieh Text and Document Manipulation"),
OsgiKeys.bundleSymbolicName := "org.gnieh.tekstlib",
OsgiKeys.privatePackage := Seq(),
ScalariformKeys.preferences := {
import scalariform.formatter.preferences._
ScalariformKeys.preferences.value
scalariformAutoformat := true,
scalariformPreferences := {
scalariformPreferences.value
.setPreference(AlignSingleLineCaseStatements, true)
.setPreference(DoubleIndentClassDeclaration, true)
.setPreference(PreserveDanglingCloseParenthesis, true)
.setPreference(DoubleIndentConstructorArguments, true)
.setPreference(DanglingCloseParenthesis, Preserve)
.setPreference(MultilineScaladocCommentsStartOnFirstLine, true)
},
publishMavenStyle := true,
Expand Down
2 changes: 1 addition & 1 deletion project/build.properties
Original file line number Diff line number Diff line change
@@ -1 +1 @@
sbt.version=0.13.15
sbt.version=1.0.3
6 changes: 3 additions & 3 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
addSbtPlugin("com.typesafe.sbt" % "sbt-osgi" % "0.6.0")
addSbtPlugin("com.typesafe.sbt" % "sbt-osgi" % "0.9.2")

addSbtPlugin("com.typesafe.sbt" % "sbt-scalariform" % "1.2.1")
addSbtPlugin("org.scalariform" % "sbt-scalariform" % "1.8.2")

addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.0")
addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.5.1")
126 changes: 126 additions & 0 deletions src/main/scala/gnieh/Indexable.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/*
* Copyright (c) 2017 Lucas Satabin
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gnieh

import matching._

import scala.language.higherKinds

import scala.annotation.tailrec

abstract class Indexable[Coll, Elem] {

private implicit val self = this

def apply(coll: Coll, idx: Int): Elem

def isEmpty(coll: Coll): Boolean

def size(coll: Coll): Int

def slice(coll: Coll, start: Int, end: Int): Coll

def indexOfSlice(s1: Coll, s2: Coll)(implicit equiv: Equiv[Elem]): Int =
KMP.search(s1, s2)

def startsWith(s1: Coll, s2: Coll)(implicit equiv: Equiv[Elem]): Boolean = {
@tailrec
def loop(idx: Int): Boolean =
if (idx < size(s1) && idx < size(s2)) {
if (equiv.equiv(apply(s1, idx), apply(s2, idx))) {
loop(idx + 1)
} else {
false
}
} else if (idx >= size(s2)) {
true
} else {
false
}
loop(0)
}

def equivalent(coll: Coll, that: Coll)(implicit equiv: Equiv[Elem]): Boolean = {
val s1 = size(coll)
val s2 = size(that)

if (s1 == s2) {
// same size, there is a chance they are equivalent
@tailrec
def loop(idx: Int): Boolean =
if (idx < s1) {
if (equiv.equiv(apply(coll, idx), apply(that, idx))) {
// elements are equivalent, continue
loop(idx + 1)
} else {
// non equivalent elements, stop
false
}
} else {
// end of collection for both, they are equivalent
true
}
loop(0)
} else {
false
}
}

}

trait IndexableInstances {

implicit object IndexableString extends Indexable[String, Char] {

@inline
def apply(s: String, idx: Int) =
s.charAt(idx)

@inline
def isEmpty(s: String) =
s == null || s.length == 0

@inline
def size(s: String): Int =
s.length

@inline
def slice(s: String, start: Int, end: Int) =
s.slice(start, end)

}

implicit def IndexableIndexedSeq[T]: Indexable[IndexedSeq[T], T] = new Indexable[IndexedSeq[T], T] {

@inline
def apply(s: IndexedSeq[T], idx: Int) =
s(idx)

@inline
def isEmpty(s: IndexedSeq[T]) =
s.isEmpty

@inline
def size(s: IndexedSeq[T]): Int =
s.size

@inline
def slice(s: IndexedSeq[T], start: Int, end: Int) =
s.slice(start, end)

}

}
9 changes: 4 additions & 5 deletions src/main/scala/gnieh/diff/Diff.scala
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gnieh.diff
package gnieh
package diff

import scala.annotation.tailrec

class LcsDiff[T](lcsalg: Lcs[T]) {
class LcsDiff(lcsalg: Lcs) {

def diff(s1: IndexedSeq[T], s2: IndexedSeq[T]): List[Diff] = {
def diff[Coll, T](s1: Coll, s2: Coll)(implicit indexable: Indexable[Coll, T], equiv: Equiv[T]): List[Diff] = {
val lcs = lcsalg.lcs(s1, s2)
@tailrec
def loop(lcs: List[Common], idx1: Int, idx2: Int, acc: List[Diff]): List[Diff] =
Expand All @@ -34,15 +35,13 @@ class LcsDiff[T](lcsalg: Lcs[T]) {
else
acc.reverse
case Common(start1, start2, _) :: _ if idx1 < start1 || idx2 < start2 =>
// assert(idx1 < s1.size && idx2 < s2.size)
if (idx1 < start1 && idx2 < start2)
loop(lcs, start1, start2, Second(idx2, start2) :: First(idx1, start1) :: acc)
else if (idx1 < start1)
loop(lcs, start1, idx2, First(idx1, start1) :: acc)
else
loop(lcs, idx1, start2, Second(idx2, start2) :: acc)
case Common(start1, start2, length) :: rest if length > 0 =>
// assert(start1 == idx1 && start2 == idx2)
loop(rest, start1 + length, start2 + length, Both(start1, start1 + length, start2, start2 + length) :: acc)
case Common(start1, start2, _) :: rest =>
loop(rest, start1, start2, acc)
Expand Down
9 changes: 5 additions & 4 deletions src/main/scala/gnieh/diff/DynamicProgLcs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,23 +11,24 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gnieh.diff
package gnieh
package diff

import scala.annotation.tailrec

/** Implementation of the LCS using dynamic programming.
*
* @author Lucas Satabin
*/
class DynamicProgLcs[T] extends Lcs[T] {
class DynamicProgLcs extends Lcs {

def lcsInner(seq1: IndexedSeq[T], low1: Int, seq2: IndexedSeq[T], low2: Int): List[Common] = {
def lcsInner[Coll, T](seq1: Coll, low1: Int, seq2: Coll, low2: Int)(implicit indexable: Indexable[Coll, T], equiv: Equiv[T]): List[Common] = {
val lengths = Array.ofDim[Int](seq1.size + 1, seq2.size + 1)
// fill up the length matrix
for {
i <- 0 until seq1.size
j <- 0 until seq2.size
} if (seq1(i) == seq2(j))
} if (equiv.equiv(seq1(i), seq2(j)))
lengths(i + 1)(j + 1) = lengths(i)(j) + 1
else
lengths(i + 1)(j + 1) = math.max(lengths(i + 1)(j), lengths(i)(j + 1))
Expand Down
23 changes: 12 additions & 11 deletions src/main/scala/gnieh/diff/Lcs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gnieh.diff
package gnieh
package diff

import scala.annotation.tailrec

Expand All @@ -20,27 +21,27 @@ import scala.annotation.tailrec
*
* @author Lucas Satabin
*/
abstract class Lcs[T] {
abstract class Lcs {

/** Computes the longest commons subsequence between both inputs.
* Returns an ordered list containing the indices in the first sequence and in the second sequence.
*/
@inline
def lcs(seq1: IndexedSeq[T], seq2: IndexedSeq[T]): List[Common] =
def lcs[Coll, T](seq1: Coll, seq2: Coll)(implicit indexable: Indexable[Coll, T], equiv: Equiv[T]): List[Common] =
lcs(seq1, seq2, 0, seq1.size, 0, seq2.size)

/** Computest the longest common subsequence between both input slices.
* Returns an ordered list containing the indices in the first sequence and in the second sequence.
* Before calling the actual lcs algorithm, it performs some preprocessing to detect trivial solutions.
*/
def lcs(s1: IndexedSeq[T], s2: IndexedSeq[T], low1: Int, high1: Int, low2: Int, high2: Int): List[Common] = {
def lcs[Coll, T](s1: Coll, s2: Coll, low1: Int, high1: Int, low2: Int, high2: Int)(implicit indexable: Indexable[Coll, T], equiv: Equiv[T]): List[Common] = {
val seq1 = s1.slice(low1, high1)
val seq2 = s2.slice(low2, high2)

if (seq1.isEmpty || seq2.isEmpty) {
// shortcut if at least on sequence is empty, the lcs, is empty as well
Nil
} else if (seq1 == seq2) {
} else if (indexable.equivalent(seq1, seq2)) {
// both sequences are equal, the lcs is either of them
List(Common(low1, low2, seq1.size))
} else if (seq1.startsWith(seq2)) {
Expand Down Expand Up @@ -93,16 +94,16 @@ abstract class Lcs[T] {
/** Computest the longest common subsequence between both input slices.
* Returns an ordered list containing the indices in the first sequence and in the second sequence.
*/
def lcsInner(s1: IndexedSeq[T], low1: Int, s2: IndexedSeq[T], low2: Int): List[Common]
def lcsInner[Coll, T](s1: Coll, low1: Int, s2: Coll, low2: Int)(implicit indexable: Indexable[Coll, T], equiv: Equiv[T]): List[Common]

/* Extract common prefix and suffix from both sequences */
private def splitPrefixSuffix(seq1: IndexedSeq[T], seq2: IndexedSeq[T], low1: Int, low2: Int): (Option[Common], IndexedSeq[T], IndexedSeq[T], Option[Common]) = {
private def splitPrefixSuffix[Coll, T](seq1: Coll, seq2: Coll, low1: Int, low2: Int)(implicit indexable: Indexable[Coll, T], equiv: Equiv[T]): (Option[Common], Coll, Coll, Option[Common]) = {
val size1 = seq1.size
val size2 = seq2.size
val size = math.min(size1, size2)
@tailrec
def prefixLoop(idx: Int): Option[Common] =
if (idx >= size || seq1(idx) != seq2(idx)) {
if (idx >= size || !equiv.equiv(seq1(idx), seq2(idx))) {
if (idx == 0) {
None
} else {
Expand All @@ -112,10 +113,10 @@ abstract class Lcs[T] {
prefixLoop(idx + 1)
}
val prefix = prefixLoop(0)
val (endPrefix1, endPrefix2) = prefix.map { case Common(s1, s2, l) => (s1 + l, s2 + l) } getOrElse ((0, 0))
val (prefixEnd1, prefixEnd2) = prefix.map { case Common(s1, s2, l) => (s1 + l, s2 + l) } getOrElse ((0, 0))
@tailrec
def suffixLoop(idx1: Int, idx2: Int, l: Int): Option[Common] =
if (idx1 < endPrefix1 || idx2 < endPrefix2 || seq1(idx1) != seq2(idx2)) {
if (idx1 < prefixEnd1 || idx2 < prefixEnd2 || !equiv.equiv(seq1(idx1), seq2(idx2))) {
if (l == 0) {
None
} else {
Expand All @@ -127,7 +128,7 @@ abstract class Lcs[T] {
val suffix = suffixLoop(size1 - 1, size2 - 1, 0)
val psize = prefix.map(_.length).getOrElse(0)
val ssize = suffix.map(_.length).getOrElse(0)
(prefix, seq1.drop(psize).dropRight(ssize), seq2.drop(psize).dropRight(ssize), suffix)
(prefix, seq1.slice(psize, seq1.size - ssize), seq2.slice(psize, seq2.size - ssize), suffix)
}

protected def push(idx1: Int, idx2: Int, commons: List[Common], back: Boolean): List[Common] =
Expand Down
9 changes: 5 additions & 4 deletions src/main/scala/gnieh/diff/MyersLcs.scala
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,16 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package gnieh.diff
package gnieh
package diff

import scala.annotation.tailrec

import scala.collection.mutable.ListBuffer

class MyersLcs[T] extends Lcs[T] {
class MyersLcs extends Lcs {

def lcsInner(seq1: IndexedSeq[T], low1: Int, seq2: IndexedSeq[T], low2: Int): List[Common] = {
def lcsInner[Coll, T](seq1: Coll, low1: Int, seq2: Coll, low2: Int)(implicit indexable: Indexable[Coll, T], equiv: Equiv[T]): List[Common] = {
val size1 = seq1.size
val size2 = seq2.size
val max = 1 + size1 + size2
Expand All @@ -36,7 +37,7 @@ class MyersLcs[T] extends Lcs[T] {
else
v(max + k - 1) + 1
var y = x - k
while (x < size1 && y < size2 && seq1(x) == seq2(y)) {
while (x < size1 && y < size2 && equiv.equiv(seq1(x), seq2(y))) {
acc = push(x + low1, y + low2, acc, false)
x += 1
y += 1
Expand Down
Loading