From c585106da0b8b531c58a17c627e919c77429710d Mon Sep 17 00:00:00 2001
From: Tom White <tom@cloudera.com>
Date: Wed, 7 Feb 2018 15:57:27 +0000
Subject: [PATCH 1/3] Reformat in line with the Google Java Style Guide.

---
 .../java/htsjdk/samtools/LinearBAMIndex.java  |   21 +-
 .../java/htsjdk/samtools/SAMRecordHelper.java |    5 +-
 .../seqdoop/hadoop_bam/AnySAMInputFormat.java |  432 +++---
 .../hadoop_bam/AnySAMOutputFormat.java        |   59 +-
 .../seqdoop/hadoop_bam/BAMInputFormat.java    | 1244 ++++++++---------
 .../seqdoop/hadoop_bam/BAMOutputFormat.java   |   23 +-
 .../seqdoop/hadoop_bam/BAMRecordReader.java   |  372 ++---
 .../seqdoop/hadoop_bam/BAMRecordWriter.java   |  216 ++-
 .../seqdoop/hadoop_bam/BAMSplitGuesser.java   |  708 +++++-----
 .../seqdoop/hadoop_bam/BCFRecordReader.java   |  403 +++---
 .../seqdoop/hadoop_bam/BCFRecordWriter.java   |  256 ++--
 .../seqdoop/hadoop_bam/BCFSplitGuesser.java   |  819 +++++------
 .../seqdoop/hadoop_bam/BaseSplitGuesser.java  |  162 ++-
 .../seqdoop/hadoop_bam/CRAMInputFormat.java   |   59 +-
 .../seqdoop/hadoop_bam/CRAMOutputFormat.java  |    7 +-
 .../seqdoop/hadoop_bam/CRAMRecordReader.java  |   12 +-
 .../seqdoop/hadoop_bam/CRAMRecordWriter.java  |  180 ++-
 .../seqdoop/hadoop_bam/FastaInputFormat.java  |  665 +++++----
 .../seqdoop/hadoop_bam/FastqInputFormat.java  |  725 +++++-----
 .../seqdoop/hadoop_bam/FastqOutputFormat.java |  274 ++--
 .../seqdoop/hadoop_bam/FileVirtualSplit.java  |  206 +--
 .../seqdoop/hadoop_bam/FormatConstants.java   |   63 +-
 .../seqdoop/hadoop_bam/FormatException.java   |   14 +-
 .../KeyIgnoringAnySAMOutputFormat.java        |  175 ++-
 .../KeyIgnoringBAMOutputFormat.java           |   97 +-
 .../KeyIgnoringBAMRecordWriter.java           |   41 +-
 .../KeyIgnoringBCFRecordWriter.java           |   54 +-
 .../KeyIgnoringCRAMOutputFormat.java          |   95 +-
 .../KeyIgnoringCRAMRecordWriter.java          |   39 +-
 .../KeyIgnoringSAMRecordWriter.java           |   53 +-
 .../KeyIgnoringVCFOutputFormat.java           |  226 +--
 .../KeyIgnoringVCFRecordWriter.java           |   54 +-
 .../hadoop_bam/LazyBAMRecordFactory.java      |  194 ++-
 .../hadoop_bam/LazyBCFGenotypesContext.java   |  224 +--
 .../LazyParsingGenotypesContext.java          |   53 +-
 .../hadoop_bam/LazyVCFGenotypesContext.java   |  178 +--
 .../org/seqdoop/hadoop_bam/LineReader.java    |  156 +--
 .../seqdoop/hadoop_bam/QseqInputFormat.java   |  795 +++++------
 .../seqdoop/hadoop_bam/QseqOutputFormat.java  |  305 ++--
 .../seqdoop/hadoop_bam/ReferenceFragment.java |  218 +--
 .../org/seqdoop/hadoop_bam/SAMFormat.java     |   72 +-
 .../seqdoop/hadoop_bam/SAMInputFormat.java    |   39 +-
 .../seqdoop/hadoop_bam/SAMRecordReader.java   |  604 ++++----
 .../seqdoop/hadoop_bam/SAMRecordWritable.java |   79 +-
 .../seqdoop/hadoop_bam/SAMRecordWriter.java   |  113 +-
 .../seqdoop/hadoop_bam/SequencedFragment.java |  824 ++++++-----
 .../seqdoop/hadoop_bam/SplittingBAMIndex.java |  248 ++--
 .../hadoop_bam/SplittingBAMIndexer.java       |  648 +++++----
 .../org/seqdoop/hadoop_bam/VCFFormat.java     |  100 +-
 .../seqdoop/hadoop_bam/VCFInputFormat.java    |  830 +++++------
 .../seqdoop/hadoop_bam/VCFOutputFormat.java   |   59 +-
 .../seqdoop/hadoop_bam/VCFRecordReader.java   |  312 ++---
 .../seqdoop/hadoop_bam/VCFRecordWriter.java   |  254 ++--
 .../hadoop_bam/VariantContextCodec.java       |  621 ++++----
 .../hadoop_bam/VariantContextWithHeader.java  |   17 +-
 .../hadoop_bam/VariantContextWritable.java    |   67 +-
 .../hadoop_bam/util/BGZFBlockIndex.java       |  182 +--
 .../hadoop_bam/util/BGZFBlockIndexer.java     |  373 +++--
 .../seqdoop/hadoop_bam/util/BGZFCodec.java    |   31 +-
 .../util/BGZFCompressionOutputStream.java     |   14 +-
 .../util/BGZFEnhancedGzipCodec.java           |   59 +-
 .../util/BGZFSplitCompressionInputStream.java |   38 +-
 .../util/BGZFSplitFileInputFormat.java        |  235 ++--
 .../hadoop_bam/util/BGZFSplitGuesser.java     |  274 ++--
 .../seqdoop/hadoop_bam/util/ConfHelper.java   |   75 +-
 .../hadoop_bam/util/DataInputWrapper.java     |   49 +-
 .../hadoop_bam/util/DataOutputWrapper.java    |   22 +-
 .../hadoop_bam/util/GetSortedBAMHeader.java   |   49 +-
 .../seqdoop/hadoop_bam/util/IntervalUtil.java |   96 +-
 .../seqdoop/hadoop_bam/util/MurmurHash3.java  |  362 +++--
 .../seqdoop/hadoop_bam/util/NIOFileUtil.java  |   67 +-
 .../hadoop_bam/util/SAMFileMerger.java        |   74 +-
 .../hadoop_bam/util/SAMHeaderReader.java      |  108 +-
 .../hadoop_bam/util/SAMOutputPreparer.java    |  209 ++-
 .../hadoop_bam/util/VCFFileMerger.java        |   56 +-
 .../hadoop_bam/util/VCFHeaderReader.java      |   63 +-
 .../seqdoop/hadoop_bam/util/WrapSeekable.java |  123 +-
 .../org/seqdoop/hadoop_bam/BAMTestUtil.java   |   50 +-
 .../seqdoop/hadoop_bam/IntervalUtilTest.java  |  109 +-
 .../hadoop_bam/TestAnySAMInputFormat.java     |   41 +-
 .../hadoop_bam/TestBAMInputFormat.java        |   52 +-
 .../hadoop_bam/TestBAMOutputFormat.java       |  550 ++++----
 .../hadoop_bam/TestBAMSplitGuesser.java       |    4 +-
 .../hadoop_bam/TestBGZFSplitGuesser.java      |   17 +-
 .../hadoop_bam/TestCRAMInputFormat.java       |   25 +-
 .../hadoop_bam/TestCRAMInputFormatOnHDFS.java |   91 +-
 .../hadoop_bam/TestCRAMOutputFormat.java      |  510 ++++---
 .../seqdoop/hadoop_bam/TestConfHelper.java    |  101 +-
 .../hadoop_bam/TestFastaInputFormat.java      |   68 +-
 .../hadoop_bam/TestFastqInputFormat.java      | 1070 +++++++-------
 .../hadoop_bam/TestFastqOutputFormat.java     |  337 +++--
 .../seqdoop/hadoop_bam/TestLineReader.java    |   95 +-
 .../hadoop_bam/TestQseqInputFormat.java       |  698 +++++----
 .../hadoop_bam/TestQseqOutputFormat.java      |  271 ++--
 .../org/seqdoop/hadoop_bam/TestSAMFormat.java |    8 +-
 .../hadoop_bam/TestSAMHeaderReader.java       |   79 +-
 .../hadoop_bam/TestSAMInputFormat.java        |   13 +-
 .../hadoop_bam/TestSequencedFragment.java     |  654 +++++----
 .../hadoop_bam/TestSplittingBAMIndexer.java   |   18 +-
 .../org/seqdoop/hadoop_bam/TestVCFFormat.java |    8 +-
 .../hadoop_bam/TestVCFInputFormat.java        |  282 ++--
 .../TestVCFInputFormatStringency.java         |  101 +-
 .../hadoop_bam/TestVCFOutputFormat.java       |  291 ++--
 .../seqdoop/hadoop_bam/TestVCFRoundTrip.java  |  365 ++---
 .../hadoop_bam/util/TestVCFHeaderReader.java  |   16 +-
 src/test/resources/log4j.properties           |   10 +-
 106 files changed, 11803 insertions(+), 11459 deletions(-)

diff --git a/src/main/java/htsjdk/samtools/LinearBAMIndex.java b/src/main/java/htsjdk/samtools/LinearBAMIndex.java
index 7e80846..dbbedda 100644
--- a/src/main/java/htsjdk/samtools/LinearBAMIndex.java
+++ b/src/main/java/htsjdk/samtools/LinearBAMIndex.java
@@ -19,21 +19,16 @@
 // IN THE SOFTWARE.
 package htsjdk.samtools;
 
-import htsjdk.samtools.CachingBAMFileIndex;
-import htsjdk.samtools.LinearIndex;
-import htsjdk.samtools.SAMSequenceDictionary;
 import htsjdk.samtools.seekablestream.SeekableStream;
 
-/**
- * The htsjdk APIs for accessing the linear BAM index are private...
- */
+/** The htsjdk APIs for accessing the linear BAM index are private... */
 public class LinearBAMIndex extends CachingBAMFileIndex {
 
-        public LinearBAMIndex(SeekableStream stream, SAMSequenceDictionary dict) {
-                super(stream, dict);
-        }
-        
-        public LinearIndex getLinearIndex(int idx) {
-                return getQueryResults(idx).getLinearIndex();
-        }
+  public LinearBAMIndex(SeekableStream stream, SAMSequenceDictionary dict) {
+    super(stream, dict);
+  }
+
+  public LinearIndex getLinearIndex(int idx) {
+    return getQueryResults(idx).getLinearIndex();
+  }
 }
diff --git a/src/main/java/htsjdk/samtools/SAMRecordHelper.java b/src/main/java/htsjdk/samtools/SAMRecordHelper.java
index 35184f0..ea02e94 100644
--- a/src/main/java/htsjdk/samtools/SAMRecordHelper.java
+++ b/src/main/java/htsjdk/samtools/SAMRecordHelper.java
@@ -1,10 +1,11 @@
 package htsjdk.samtools;
 
 /**
- * This class is required in order to access the protected
- * {@link SAMRecord#eagerDecode()} method in HTSJDK.
+ * This class is required in order to access the protected {@link SAMRecord#eagerDecode()} method in
+ * HTSJDK.
  */
 public class SAMRecordHelper {
+
   public static void eagerDecode(SAMRecord record) {
     record.eagerDecode();
   }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java
index 8d871e6..68d38f0 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/AnySAMInputFormat.java
@@ -27,7 +27,6 @@
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -40,218 +39,231 @@
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 
-/** An {@link org.apache.hadoop.mapreduce.InputFormat} for SAM, BAM, and CRAM files.
- * Values are the individual records; see {@link BAMRecordReader} for the
- * meaning of the key.
+/**
+ * An {@link org.apache.hadoop.mapreduce.InputFormat} for SAM, BAM, and CRAM files. Values are the
+ * individual records; see {@link BAMRecordReader} for the meaning of the key.
  *
- * <p>By default, files are recognized as SAM, BAM, or CRAM based on their file
- * extensions: see {@link #TRUST_EXTS_PROPERTY}. If that fails, or this
- * behaviour is disabled, the first byte of each file is read to determine the
- * file type.</p>
+ * <p>By default, files are recognized as SAM, BAM, or CRAM based on their file extensions: see
+ * {@link #TRUST_EXTS_PROPERTY}. If that fails, or this behaviour is disabled, the first byte of
+ * each file is read to determine the file type.
  */
-public class AnySAMInputFormat
-	extends FileInputFormat<LongWritable,SAMRecordWritable>
-{
-	/** A Boolean property: are file extensions trusted? The default is
-	 * <code>true</code>.
-	 *
-	 * @see SAMFormat#inferFromFilePath
-	 */
-	public static final String TRUST_EXTS_PROPERTY =
-		"hadoopbam.anysam.trust-exts";
-
-	private final BAMInputFormat bamIF = new BAMInputFormat();
-	private final CRAMInputFormat cramIF = new CRAMInputFormat();
-	private final SAMInputFormat samIF = new SAMInputFormat();
-
-	private final Map<Path,SAMFormat> formatMap;
-	private final boolean              givenMap;
-
-	private Configuration conf;
-
-	/** Creates a new input format, which will use the
-	 * <code>Configuration</code> from the first public method called. Thus this
-	 * will behave as though constructed with a <code>Configuration</code>
-	 * directly, but only after it has received it in
-	 * <code>createRecordReader</code> (via the <code>TaskAttemptContext</code>)
-	 * or <code>isSplitable</code> or <code>getSplits</code> (via the
-	 * <code>JobContext</code>). Until then, other methods will throw an {@link
-	 * IllegalStateException}.
-	 *
-	 * This constructor exists mainly as a convenience, e.g. so that
-	 * <code>AnySAMInputFormat</code> can be used directly in
-	 * <code>Job.setInputFormatClass</code>.
-	 */
-	public AnySAMInputFormat() {
-		this(null, new HashMap<>(), false);
-	}
-
-	/** Creates a new input format, reading {@link #TRUST_EXTS_PROPERTY} from
-	 * the given <code>Configuration</code>.
-	 */
-	public AnySAMInputFormat(Configuration conf) {
-		this(conf, new HashMap<>(), false);
-	}
-
-	private static boolean trustExtensions(Configuration conf) {
-		return conf.getBoolean(TRUST_EXTS_PROPERTY, true);
-	}
-
-	/** Creates a new input format, trusting the given <code>Map</code> to
-	 * define the file-to-format associations. Neither file paths nor their
-	 * contents are looked at, only the <code>Map</code> is used.
-	 *
-	 * <p>The <code>Map</code> is not copied, so it should not be modified while
-	 * this input format is in use!</p>
-	 * */
-	public AnySAMInputFormat(Map<Path,SAMFormat> formatMap) {
-		this(null, formatMap, true);
-	}
-
-	private AnySAMInputFormat(Configuration conf, Map<Path, SAMFormat> formatMap, boolean givenMap){
-		this.formatMap = formatMap;
-		this.givenMap = givenMap;
-		this.conf = conf;
-	}
-
-	/** Returns the {@link SAMFormat} corresponding to the given path. Returns
-	 * <code>null</code> if it cannot be determined even based on the file
-	 * contents (unless future SAM/BAM formats are very different, this means
-	 * that the path does not refer to a SAM or BAM file).
-	 *
-	 * <p>If this input format was constructed using a given
-	 * <code>Map&lt;Path,SAMFormat&gt;</code> and the path is not contained
-	 * within that map, throws an {@link IllegalArgumentException}.</p>
-	 */
-	public SAMFormat getFormat(final Path path) throws PathNotFoundException {
-		SAMFormat fmt = formatMap.get(path);
-		if (fmt != null || formatMap.containsKey(path))
-			return fmt;
-
-		if (givenMap)
-			throw new IllegalArgumentException(
-				"SAM format for '"+path+"' not in given map");
-
-		if (this.conf == null)
-			throw new IllegalStateException("Don't have a Configuration yet");
-
-		if (trustExtensions(conf)) {
-			final SAMFormat f = SAMFormat.inferFromFilePath(path);
-			if (f != null) {
-				formatMap.put(path, f);
-				return f;
-			}
-		}
-
-		try {
-			FileSystem fileSystem = path.getFileSystem(conf);
-			if (!fileSystem.exists(path)) {
-				throw new PathNotFoundException(path.toString());
-			}
-			fmt = SAMFormat.inferFromData(fileSystem.open(path));
-		} catch (IOException e) {}
-
-		formatMap.put(path, fmt);
-		return fmt;
-	}
-
-	/** Returns a {@link BAMRecordReader} or {@link SAMRecordReader} as
-	 * appropriate, initialized with the given parameters.
-	 *
-	 * <p>Throws {@link IllegalArgumentException} if the given input split is
-	 * not a {@link FileVirtualSplit} (used by {@link BAMInputFormat}) or a
-	 * {@link FileSplit} (used by {@link SAMInputFormat}), or if the path
-	 * referred to is not recognized as a SAM, BAM, or CRAM file (see {@link
-	 * #getFormat}).</p>
-	 */
-	@Override public RecordReader<LongWritable,SAMRecordWritable>
-		createRecordReader(InputSplit split, TaskAttemptContext ctx)
-			throws InterruptedException, IOException
-	{
-		final Path path;
-		if (split instanceof FileSplit)
-			path = ((FileSplit)split).getPath();
-		else if (split instanceof FileVirtualSplit)
-			path = ((FileVirtualSplit)split).getPath();
-		else
-			throw new IllegalArgumentException(
-				"split '"+split+"' has unknown type: cannot extract path");
-
-		if (this.conf == null)
-			this.conf = ctx.getConfiguration();
-
-		final SAMFormat fmt = getFormat(path);
-		if (fmt == null)
-			throw new IllegalArgumentException(
-				"unknown SAM format, cannot create RecordReader: "+path);
-
-		switch (fmt) {
-			case SAM: return samIF.createRecordReader(split, ctx);
-			case BAM: return bamIF.createRecordReader(split, ctx);
-			case CRAM: return cramIF.createRecordReader(split, ctx);
-			default: assert false; return null;
-		}
-	}
-
-	/** Defers to {@link BAMInputFormat}, {@link CRAMInputFormat}, or
-	 * {@link SAMInputFormat} as appropriate for the given path.
-	 */
-	@Override public boolean isSplitable(JobContext job, Path path) {
-		if (this.conf == null)
-			this.conf = job.getConfiguration();
-
-		try {
-			final SAMFormat fmt = getFormat(path);
-			if (fmt == null)
+public class AnySAMInputFormat extends FileInputFormat<LongWritable, SAMRecordWritable> {
+
+  /**
+   * A Boolean property: are file extensions trusted? The default is <code>true</code>.
+   *
+   * @see SAMFormat#inferFromFilePath
+   */
+  public static final String TRUST_EXTS_PROPERTY = "hadoopbam.anysam.trust-exts";
+
+  private final BAMInputFormat bamIF = new BAMInputFormat();
+  private final CRAMInputFormat cramIF = new CRAMInputFormat();
+  private final SAMInputFormat samIF = new SAMInputFormat();
+
+  private final Map<Path, SAMFormat> formatMap;
+  private final boolean givenMap;
+
+  private Configuration conf;
+
+  /**
+   * Creates a new input format, which will use the <code>Configuration</code> from the first public
+   * method called. Thus this will behave as though constructed with a <code>Configuration</code>
+   * directly, but only after it has received it in <code>createRecordReader</code> (via the <code>
+   * TaskAttemptContext</code>) or <code>isSplitable</code> or <code>getSplits</code> (via the
+   * <code>JobContext</code>). Until then, other methods will throw an {@link
+   * IllegalStateException}.
+   *
+   * <p>This constructor exists mainly as a convenience, e.g. so that <code>AnySAMInputFormat</code>
+   * can be used directly in <code>Job.setInputFormatClass</code>.
+   */
+  public AnySAMInputFormat() {
+    this(null, new HashMap<>(), false);
+  }
+
+  /**
+   * Creates a new input format, reading {@link #TRUST_EXTS_PROPERTY} from the given <code>
+   * Configuration</code>.
+   */
+  public AnySAMInputFormat(Configuration conf) {
+    this(conf, new HashMap<>(), false);
+  }
+
+  /**
+   * Creates a new input format, trusting the given <code>Map</code> to define the file-to-format
+   * associations. Neither file paths nor their contents are looked at, only the <code>Map</code> is
+   * used.
+   *
+   * <p>The <code>Map</code> is not copied, so it should not be modified while this input format is
+   * in use!
+   */
+  public AnySAMInputFormat(Map<Path, SAMFormat> formatMap) {
+    this(null, formatMap, true);
+  }
+
+  private AnySAMInputFormat(Configuration conf, Map<Path, SAMFormat> formatMap, boolean givenMap) {
+    this.formatMap = formatMap;
+    this.givenMap = givenMap;
+    this.conf = conf;
+  }
+
+  private static boolean trustExtensions(Configuration conf) {
+    return conf.getBoolean(TRUST_EXTS_PROPERTY, true);
+  }
+
+  /**
+   * Returns the {@link SAMFormat} corresponding to the given path. Returns <code>null</code> if it
+   * cannot be determined even based on the file contents (unless future SAM/BAM formats are very
+   * different, this means that the path does not refer to a SAM or BAM file).
+   *
+   * <p>If this input format was constructed using a given <code>Map&lt;Path,SAMFormat&gt;</code>
+   * and the path is not contained within that map, throws an {@link IllegalArgumentException}.
+   */
+  public SAMFormat getFormat(final Path path) throws PathNotFoundException {
+    SAMFormat fmt = formatMap.get(path);
+    if (fmt != null || formatMap.containsKey(path)) {
+      return fmt;
+    }
+
+    if (givenMap) {
+      throw new IllegalArgumentException("SAM format for '" + path + "' not in given map");
+    }
+
+    if (this.conf == null) {
+      throw new IllegalStateException("Don't have a Configuration yet");
+    }
+
+    if (trustExtensions(conf)) {
+      final SAMFormat f = SAMFormat.inferFromFilePath(path);
+      if (f != null) {
+        formatMap.put(path, f);
+        return f;
+      }
+    }
+
+    try {
+      FileSystem fileSystem = path.getFileSystem(conf);
+      if (!fileSystem.exists(path)) {
+        throw new PathNotFoundException(path.toString());
+      }
+      fmt = SAMFormat.inferFromData(fileSystem.open(path));
+    } catch (IOException e) {
+    }
+
+    formatMap.put(path, fmt);
+    return fmt;
+  }
+
+  /**
+   * Returns a {@link BAMRecordReader} or {@link SAMRecordReader} as appropriate, initialized with
+   * the given parameters.
+   *
+   * <p>Throws {@link IllegalArgumentException} if the given input split is not a {@link
+   * FileVirtualSplit} (used by {@link BAMInputFormat}) or a {@link FileSplit} (used by {@link
+   * SAMInputFormat}), or if the path referred to is not recognized as a SAM, BAM, or CRAM file (see
+   * {@link #getFormat}).
+   */
+  @Override
+  public RecordReader<LongWritable, SAMRecordWritable> createRecordReader(
+      InputSplit split, TaskAttemptContext ctx) throws InterruptedException, IOException {
+    final Path path;
+    if (split instanceof FileSplit) {
+      path = ((FileSplit) split).getPath();
+    } else if (split instanceof FileVirtualSplit) {
+      path = ((FileVirtualSplit) split).getPath();
+    } else {
+      throw new IllegalArgumentException(
+          "split '" + split + "' has unknown type: cannot extract path");
+    }
+
+    if (this.conf == null) {
+      this.conf = ctx.getConfiguration();
+    }
+
+    final SAMFormat fmt = getFormat(path);
+    if (fmt == null) {
+      throw new IllegalArgumentException("unknown SAM format, cannot create RecordReader: " + path);
+    }
+
+    switch (fmt) {
+      case SAM:
+        return samIF.createRecordReader(split, ctx);
+      case BAM:
+        return bamIF.createRecordReader(split, ctx);
+      case CRAM:
+        return cramIF.createRecordReader(split, ctx);
+      default:
+        assert false;
+        return null;
+    }
+  }
+
+  /**
+   * Defers to {@link BAMInputFormat}, {@link CRAMInputFormat}, or {@link SAMInputFormat} as
+   * appropriate for the given path.
+   */
+  @Override
+  public boolean isSplitable(JobContext job, Path path) {
+    if (this.conf == null) {
+      this.conf = job.getConfiguration();
+    }
+
+    try {
+      final SAMFormat fmt = getFormat(path);
+      if (fmt == null) {
         return super.isSplitable(job, path);
+      }
 
-			switch (fmt) {
-        case SAM: return samIF.isSplitable(job, path);
-        case BAM: return bamIF.isSplitable(job, path);
-        case CRAM: return cramIF.isSplitable(job, path);
-        default: assert false; return false;
+      switch (fmt) {
+        case SAM:
+          return samIF.isSplitable(job, path);
+        case BAM:
+          return bamIF.isSplitable(job, path);
+        case CRAM:
+          return cramIF.isSplitable(job, path);
+        default:
+          assert false;
+          return false;
+      }
+    } catch (PathNotFoundException e) {
+      return super.isSplitable(job, path);
+    }
+  }
+
+  /**
+   * Defers to {@link BAMInputFormat} or {@link CRAMInputFormat} as appropriate for each individual
+   * path. SAM paths do not require special handling, so their splits are left unchanged.
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext job) throws IOException {
+    if (this.conf == null) {
+      this.conf = job.getConfiguration();
+    }
+
+    final List<InputSplit> origSplits = BAMInputFormat.removeIndexFiles(super.getSplits(job));
+
+    // We have to partition the splits by input format and hand them over to
+    // the *InputFormats for any further handling.
+    //
+    // BAMInputFormat and CRAMInputFormat need to change the split boundaries, so we can
+    // just extract the BAM and CRAM ones and leave the rest as they are.
+
+    final List<InputSplit> bamOrigSplits = new ArrayList<InputSplit>(origSplits.size()),
+        cramOrigSplits = new ArrayList<InputSplit>(origSplits.size()),
+        newSplits = new ArrayList<InputSplit>(origSplits.size());
+
+    for (final InputSplit iSplit : origSplits) {
+      final FileSplit split = (FileSplit) iSplit;
+
+      if (SAMFormat.BAM.equals(getFormat(split.getPath()))) {
+        bamOrigSplits.add(split);
+      } else if (SAMFormat.CRAM.equals(getFormat(split.getPath()))) {
+        cramOrigSplits.add(split);
+      } else {
+        newSplits.add(split);
       }
-		} catch (PathNotFoundException e) {
-			return super.isSplitable(job, path);
-		}
-	}
-
-	/** Defers to {@link BAMInputFormat} or {@link CRAMInputFormat} as appropriate for each
-	 * individual path. SAM paths do not require special handling, so their splits are left
-	 * unchanged.
-	 */
-	@Override public List<InputSplit> getSplits(JobContext job)
-		throws IOException
-	{
-		if (this.conf == null)
-			this.conf = job.getConfiguration();
-
-		final List<InputSplit> origSplits =
-				BAMInputFormat.removeIndexFiles(super.getSplits(job));
-
-		// We have to partition the splits by input format and hand them over to
-		// the *InputFormats for any further handling.
-		//
-		// BAMInputFormat and CRAMInputFormat need to change the split boundaries, so we can
-		// just extract the BAM and CRAM ones and leave the rest as they are.
-
-		final List<InputSplit>
-			bamOrigSplits = new ArrayList<InputSplit>(origSplits.size()),
-			cramOrigSplits = new ArrayList<InputSplit>(origSplits.size()),
-			newSplits     = new ArrayList<InputSplit>(origSplits.size());
-
-		for (final InputSplit iSplit : origSplits) {
-			final FileSplit split = (FileSplit)iSplit;
-
-			if (SAMFormat.BAM.equals(getFormat(split.getPath())))
-				bamOrigSplits.add(split);
-			else if (SAMFormat.CRAM.equals(getFormat(split.getPath())))
-				cramOrigSplits.add(split);
-			else
-				newSplits.add(split);
-		}
-		newSplits.addAll(bamIF.getSplits(bamOrigSplits, job.getConfiguration()));
-		newSplits.addAll(cramIF.getSplits(cramOrigSplits, job.getConfiguration()));
-		return newSplits;
-	}
+    }
+    newSplits.addAll(bamIF.getSplits(bamOrigSplits, job.getConfiguration()));
+    newSplits.addAll(cramIF.getSplits(cramOrigSplits, job.getConfiguration()));
+    return newSplits;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/AnySAMOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/AnySAMOutputFormat.java
index 538b842..403ca13 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/AnySAMOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/AnySAMOutputFormat.java
@@ -25,34 +25,35 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
-/** An abstract {@link org.apache.hadoop.mapreduce.OutputFormat} for SAM and
- * BAM files. Only locks down the value type and stores the output format
- * requested.
+/**
+ * An abstract {@link org.apache.hadoop.mapreduce.OutputFormat} for SAM and BAM files. Only locks
+ * down the value type and stores the output format requested.
  */
-public abstract class AnySAMOutputFormat<K>
-	extends FileOutputFormat<K,SAMRecordWritable>
-{
-	/** A string property defining the output format to use. The value is read
-	 * directly by {@link SAMFormat#valueOf}.
-	 */
-	public static final String OUTPUT_SAM_FORMAT_PROPERTY =
-		"hadoopbam.anysam.output-format";
-
-	protected SAMFormat format;
-
-	/** Creates a new output format, reading {@link #OUTPUT_SAM_FORMAT_PROPERTY}
-	 * from the given <code>Configuration</code>.
-	 */
-	protected AnySAMOutputFormat(Configuration conf) {
-		final String fmtStr = conf.get(OUTPUT_SAM_FORMAT_PROPERTY);
-
-		format = fmtStr == null ? null : SAMFormat.valueOf(fmtStr);
-	}
-
-	/** Creates a new output format for the given SAM format. */
-	protected AnySAMOutputFormat(SAMFormat fmt) {
-		if (fmt == null)
-			throw new IllegalArgumentException("null SAMFormat");
-		format = fmt;
-	}
+public abstract class AnySAMOutputFormat<K> extends FileOutputFormat<K, SAMRecordWritable> {
+
+  /**
+   * A string property defining the output format to use. The value is read directly by {@link
+   * SAMFormat#valueOf}.
+   */
+  public static final String OUTPUT_SAM_FORMAT_PROPERTY = "hadoopbam.anysam.output-format";
+
+  protected SAMFormat format;
+
+  /**
+   * Creates a new output format, reading {@link #OUTPUT_SAM_FORMAT_PROPERTY} from the given <code>
+   * Configuration</code>.
+   */
+  protected AnySAMOutputFormat(Configuration conf) {
+    final String fmtStr = conf.get(OUTPUT_SAM_FORMAT_PROPERTY);
+
+    format = fmtStr == null ? null : SAMFormat.valueOf(fmtStr);
+  }
+
+  /** Creates a new output format for the given SAM format. */
+  protected AnySAMOutputFormat(SAMFormat fmt) {
+    if (fmt == null) {
+      throw new IllegalArgumentException("null SAMFormat");
+    }
+    format = fmt;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BAMInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/BAMInputFormat.java
index 68a7640..147f7e4 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BAMInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BAMInputFormat.java
@@ -36,33 +36,24 @@
 import htsjdk.samtools.SamInputResource;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.util.Interval;
 import htsjdk.samtools.util.Locatable;
+import java.io.IOException;
+import java.nio.file.ProviderNotFoundException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Comparator;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.LinkedHashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 import java.util.stream.Collectors;
+import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
-import org.seqdoop.hadoop_bam.util.IntervalUtil;
-import org.seqdoop.hadoop_bam.util.NIOFileUtil;
-import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
-import org.seqdoop.hadoop_bam.util.WrapSeekable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.nio.file.ProviderNotFoundException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.List;
-
-import htsjdk.samtools.seekablestream.SeekableStream;
-
-import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapreduce.InputSplit;
@@ -71,615 +62,624 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.seqdoop.hadoop_bam.util.IntervalUtil;
+import org.seqdoop.hadoop_bam.util.NIOFileUtil;
+import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
+import org.seqdoop.hadoop_bam.util.WrapSeekable;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
-/** An {@link org.apache.hadoop.mapreduce.InputFormat} for BAM files. Values
- * are the individual records; see {@link BAMRecordReader} for the meaning of
- * the key.
+/**
+ * An {@link org.apache.hadoop.mapreduce.InputFormat} for BAM files. Values are the individual
+ * records; see {@link BAMRecordReader} for the meaning of the key.
  */
-public class BAMInputFormat
-	extends FileInputFormat<LongWritable,SAMRecordWritable>
-{
-	private static final Logger logger = LoggerFactory.getLogger(BAMInputFormat.class);
-
-	/**
-	 * If set to true, only include reads that overlap the given intervals (if specified),
-	 * and unplaced unmapped reads (if specified). For programmatic use
-	 * {@link #setTraversalParameters(Configuration, List, boolean)} should be preferred.
-	 */
-	public static final String BOUNDED_TRAVERSAL_PROPERTY = "hadoopbam.bam.bounded-traversal";
-
-	/**
-	 * If set to true, enables the use of BAM indices to calculate splits.
-	 * For programmatic use
-	 * {@link #setEnableBAISplitCalculator(Configuration, boolean)} should be preferred.
-         * By default, this split calculator is disabled in favor of the splitting-bai calculator.
-	 */
-	public static final String ENABLE_BAI_SPLIT_CALCULATOR = "hadoopbam.bam.enable-bai-splitter";
-    
-	/**
-	 * Filter by region, like <code>-L</code> in SAMtools. Takes a comma-separated
-	 * list of intervals, e.g. <code>chr1:1-20000,chr2:12000-20000</code>. For
-	 * programmatic use {@link #setIntervals(Configuration, List)} should be preferred.
-	 */
-	public static final String INTERVALS_PROPERTY = "hadoopbam.bam.intervals";
-
-	/**
-	 * If set to true, include unplaced unmapped reads (that is, unmapped reads with no
-	 * position). For programmatic use
-	 * {@link #setTraversalParameters(Configuration, List, boolean)} should be preferred.
-	 */
-	public static final String TRAVERSE_UNPLACED_UNMAPPED_PROPERTY = "hadoopbam.bam.traverse-unplaced-unmapped";
-
-	/**
-	 * Only include reads that overlap the given intervals. Unplaced unmapped reads are not
-	 * included.
-	 * @param conf the Hadoop configuration to set properties on
-	 * @param intervals the intervals to filter by
-	 * @param <T> the {@link Locatable} type
-	 */
-	public static <T extends Locatable> void setIntervals(Configuration conf,
-			List<T> intervals) {
-		setTraversalParameters(conf, intervals, false);
-	}
-
-        /**
-         * Enables or disables the split calculator that uses the BAM index to calculate splits.
-         */
-        public static void setEnableBAISplitCalculator(Configuration conf,
-                        boolean setEnabled) {
-            conf.setBoolean(ENABLE_BAI_SPLIT_CALCULATOR, setEnabled);
+public class BAMInputFormat extends FileInputFormat<LongWritable, SAMRecordWritable> {
+
+  /**
+   * If set to true, only include reads that overlap the given intervals (if specified), and
+   * unplaced unmapped reads (if specified). For programmatic use {@link
+   * #setTraversalParameters(Configuration, List, boolean)} should be preferred.
+   */
+  public static final String BOUNDED_TRAVERSAL_PROPERTY = "hadoopbam.bam.bounded-traversal";
+  /**
+   * If set to true, enables the use of BAM indices to calculate splits. For programmatic use {@link
+   * #setEnableBAISplitCalculator(Configuration, boolean)} should be preferred. By default, this
+   * split calculator is disabled in favor of the splitting-bai calculator.
+   */
+  public static final String ENABLE_BAI_SPLIT_CALCULATOR = "hadoopbam.bam.enable-bai-splitter";
+  /**
+   * Filter by region, like <code>-L</code> in SAMtools. Takes a comma-separated list of intervals,
+   * e.g. <code>chr1:1-20000,chr2:12000-20000</code>. For programmatic use {@link
+   * #setIntervals(Configuration, List)} should be preferred.
+   */
+  public static final String INTERVALS_PROPERTY = "hadoopbam.bam.intervals";
+  /**
+   * If set to true, include unplaced unmapped reads (that is, unmapped reads with no position). For
+   * programmatic use {@link #setTraversalParameters(Configuration, List, boolean)} should be
+   * preferred.
+   */
+  public static final String TRAVERSE_UNPLACED_UNMAPPED_PROPERTY =
+      "hadoopbam.bam.traverse-unplaced-unmapped";
+
+  private static final Logger logger = LoggerFactory.getLogger(BAMInputFormat.class);
+
+  /**
+   * Only include reads that overlap the given intervals. Unplaced unmapped reads are not included.
+   *
+   * @param conf the Hadoop configuration to set properties on
+   * @param intervals the intervals to filter by
+   * @param <T> the {@link Locatable} type
+   */
+  public static <T extends Locatable> void setIntervals(Configuration conf, List<T> intervals) {
+    setTraversalParameters(conf, intervals, false);
+  }
+
+  /** Enables or disables the split calculator that uses the BAM index to calculate splits. */
+  public static void setEnableBAISplitCalculator(Configuration conf, boolean setEnabled) {
+    conf.setBoolean(ENABLE_BAI_SPLIT_CALCULATOR, setEnabled);
+  }
+
+  /**
+   * Only include reads that overlap the given intervals (if specified) and unplaced unmapped reads
+   * (if <code>true</code>).
+   *
+   * @param conf the Hadoop configuration to set properties on
+   * @param intervals the intervals to filter by, or <code>null</code> if all reads are to be
+   *     included (in which case <code>traverseUnplacedUnmapped</code> must be <code>true</code>)
+   * @param traverseUnplacedUnmapped whether to included unplaced unampped reads
+   * @param <T> the {@link Locatable} type
+   */
+  public static <T extends Locatable> void setTraversalParameters(
+      Configuration conf, List<T> intervals, boolean traverseUnplacedUnmapped) {
+    if (intervals == null && !traverseUnplacedUnmapped) {
+      throw new IllegalArgumentException("Traversing mapped reads only is not supported.");
+    }
+    conf.setBoolean(BOUNDED_TRAVERSAL_PROPERTY, true);
+    if (intervals != null) {
+      StringBuilder sb = new StringBuilder();
+      for (Iterator<T> it = intervals.iterator(); it.hasNext(); ) {
+        Locatable l = it.next();
+        sb.append(String.format("%s:%d-%d", l.getContig(), l.getStart(), l.getEnd()));
+        if (it.hasNext()) {
+          sb.append(",");
+        }
+      }
+      conf.set(INTERVALS_PROPERTY, sb.toString());
+    }
+    conf.setBoolean(TRAVERSE_UNPLACED_UNMAPPED_PROPERTY, traverseUnplacedUnmapped);
+  }
+
+  /**
+   * Reset traversal parameters so that all reads are included.
+   *
+   * @param conf the Hadoop configuration to set properties on
+   */
+  public static void unsetTraversalParameters(Configuration conf) {
+    conf.unset(BOUNDED_TRAVERSAL_PROPERTY);
+    conf.unset(INTERVALS_PROPERTY);
+    conf.unset(TRAVERSE_UNPLACED_UNMAPPED_PROPERTY);
+  }
+
+  static boolean isBoundedTraversal(Configuration conf) {
+    return conf.getBoolean(BOUNDED_TRAVERSAL_PROPERTY, false)
+        || conf.get(INTERVALS_PROPERTY) != null; // backwards compatibility
+  }
+
+  static boolean traverseUnplacedUnmapped(Configuration conf) {
+    return conf.getBoolean(TRAVERSE_UNPLACED_UNMAPPED_PROPERTY, false);
+  }
+
+  static List<Interval> getIntervals(Configuration conf) {
+    return IntervalUtil.getIntervals(conf, INTERVALS_PROPERTY);
+  }
+
+  static Path getIdxPath(Path path) {
+    return path.suffix(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
+  }
+
+  static List<InputSplit> removeIndexFiles(List<InputSplit> splits) {
+    // Remove any splitting bai files
+    return splits
+        .stream()
+        .filter(
+            split ->
+                !((FileSplit) split)
+                    .getPath()
+                    .getName()
+                    .endsWith(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION))
+        .filter(split -> !((FileSplit) split).getPath().getName().endsWith(BAMIndex.BAMIndexSuffix))
+        .collect(Collectors.toList());
+  }
+
+  static Path getBAIPath(Path path) {
+    return path.suffix(BAMIndex.BAMIndexSuffix);
+  }
+
+  /**
+   * Converts a List of SimpleIntervals into the format required by the SamReader query API
+   *
+   * @param rawIntervals SimpleIntervals to be converted
+   * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API
+   */
+  static QueryInterval[] prepareQueryIntervals(
+      final List<Interval> rawIntervals, final SAMSequenceDictionary sequenceDictionary) {
+    if (rawIntervals == null || rawIntervals.isEmpty()) {
+      return null;
+    }
+
+    // Convert each SimpleInterval to a QueryInterval
+    final QueryInterval[] convertedIntervals =
+        rawIntervals
+            .stream()
+            .map(
+                rawInterval ->
+                    convertSimpleIntervalToQueryInterval(rawInterval, sequenceDictionary))
+            .toArray(QueryInterval[]::new);
+
+    // Intervals must be optimized (sorted and merged) in order to use the htsjdk query API
+    return QueryInterval.optimizeIntervals(convertedIntervals);
+  }
+
+  /**
+   * Converts an interval in SimpleInterval format into an htsjdk QueryInterval.
+   *
+   * <p>In doing so, a header lookup is performed to convert from contig name to index
+   *
+   * @param interval interval to convert
+   * @param sequenceDictionary sequence dictionary used to perform the conversion
+   * @return an equivalent interval in QueryInterval format
+   */
+  private static QueryInterval convertSimpleIntervalToQueryInterval(
+      final Interval interval, final SAMSequenceDictionary sequenceDictionary) {
+    if (interval == null) {
+      throw new IllegalArgumentException("interval may not be null");
+    }
+    if (sequenceDictionary == null) {
+      throw new IllegalArgumentException("sequence dictionary may not be null");
+    }
+
+    final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
+    if (contigIndex == -1) {
+      throw new IllegalArgumentException(
+          "Contig " + interval.getContig() + " not present in reads sequence " + "dictionary");
+    }
+
+    return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd());
+  }
+
+  /** Returns a {@link BAMRecordReader} initialized with the parameters. */
+  @Override
+  public RecordReader<LongWritable, SAMRecordWritable> createRecordReader(
+      InputSplit split, TaskAttemptContext ctx) throws InterruptedException, IOException {
+    final RecordReader<LongWritable, SAMRecordWritable> rr = new BAMRecordReader();
+    rr.initialize(split, ctx);
+    return rr;
+  }
+
+  /** The splits returned are {@link FileVirtualSplit FileVirtualSplits}. */
+  @Override
+  public List<InputSplit> getSplits(JobContext job) throws IOException {
+    return getSplits(super.getSplits(job), job.getConfiguration());
+  }
+
+  public List<InputSplit> getSplits(List<InputSplit> splits, Configuration cfg) throws IOException {
+
+    final List<InputSplit> origSplits = removeIndexFiles(splits);
+
+    // Align the splits so that they don't cross blocks.
+
+    // addIndexedSplits() requires the given splits to be sorted by file
+    // path, so do so. Although FileInputFormat.getSplits() does, at the time
+    // of writing this, generate them in that order, we shouldn't rely on it.
+    Collections.sort(
+        origSplits,
+        new Comparator<InputSplit>() {
+          public int compare(InputSplit a, InputSplit b) {
+            FileSplit fa = (FileSplit) a, fb = (FileSplit) b;
+            return fa.getPath().compareTo(fb.getPath());
+          }
+        });
+
+    final List<InputSplit> newSplits = new ArrayList<InputSplit>(origSplits.size());
+
+    for (int i = 0; i < origSplits.size(); ) {
+      try {
+        i = addIndexedSplits(origSplits, i, newSplits, cfg);
+      } catch (IOException | ProviderNotFoundException e) {
+        if (cfg.getBoolean(ENABLE_BAI_SPLIT_CALCULATOR, false)) {
+          try {
+            i = addBAISplits(origSplits, i, newSplits, cfg);
+          } catch (IOException | ProviderNotFoundException e2) {
+            i = addProbabilisticSplits(origSplits, i, newSplits, cfg);
+          }
+        } else {
+          i = addProbabilisticSplits(origSplits, i, newSplits, cfg);
+        }
+      }
+    }
+    return filterByInterval(newSplits, cfg);
+  }
+
+  // Handles all the splits that share the Path of the one at index i,
+  // returning the next index to be used.
+  private int addIndexedSplits(
+      List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
+      throws IOException {
+    final Path file = ((FileSplit) splits.get(i)).getPath();
+    List<InputSplit> potentialSplits = new ArrayList<InputSplit>();
+
+    final SplittingBAMIndex idx =
+        new SplittingBAMIndex(file.getFileSystem(cfg).open(getIdxPath(file)));
+
+    int splitsEnd = splits.size();
+    for (int j = i; j < splitsEnd; ++j) {
+      if (!file.equals(((FileSplit) splits.get(j)).getPath())) {
+        splitsEnd = j;
+      }
+    }
+
+    if (idx.size() == 1) { // no alignments, only the file size, so no splits to add
+      return splitsEnd;
+    }
+
+    for (int j = i; j < splitsEnd; ++j) {
+      final FileSplit fileSplit = (FileSplit) splits.get(j);
+
+      final long start = fileSplit.getStart();
+      final long end = start + fileSplit.getLength();
+
+      final Long blockStart = idx.nextAlignment(start);
+
+      // The last split needs to end where the last alignment ends, but the
+      // index doesn't store that data (whoops); we only know where the last
+      // alignment begins. Fortunately there's no need to change the index
+      // format for this: we can just set the end to the maximal length of
+      // the final BGZF block (0xffff), and then read until BAMRecordCodec
+      // hits EOF.
+      Long blockEnd;
+      if (j == splitsEnd - 1) {
+        blockEnd = idx.prevAlignment(end) | 0xffff;
+      } else {
+        blockEnd = idx.nextAlignment(end);
+      }
+
+      if (blockStart == null || blockEnd == null) {
+        logger.warn("Index for {} was not good. Generating probabilistic splits.", file);
+        return addProbabilisticSplits(splits, i, newSplits, cfg);
+      }
+
+      potentialSplits.add(
+          new FileVirtualSplit(file, blockStart, blockEnd, fileSplit.getLocations()));
+    }
+
+    for (InputSplit s : potentialSplits) {
+      newSplits.add(s);
+    }
+    return splitsEnd;
+  }
+
+  // Handles all the splits that share the Path of the one at index i,
+  // returning the next index to be used.
+  private int addBAISplits(
+      List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration conf)
+      throws IOException {
+    final Path path = ((FileSplit) splits.get(i)).getPath();
+    FileSystem fs = path.getFileSystem(conf);
+    int splitsEnd = i;
+
+    try (FSDataInputStream in = fs.open(path)) {
+      SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
+      SAMSequenceDictionary dict = header.getSequenceDictionary();
+
+      final SeekableStream guesserSin = WrapSeekable.openPath(fs, path);
+      final BAMSplitGuesser guesser = new BAMSplitGuesser(guesserSin, conf);
+
+      final SeekableStream sin;
+      if (fs.exists(getBAIPath(path))) {
+        sin = WrapSeekable.openPath(fs, getBAIPath(path));
+      } else {
+        sin =
+            WrapSeekable.openPath(
+                fs, new Path(path.toString().replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix)));
+      }
+      final LinearBAMIndex idx = new LinearBAMIndex(sin, dict);
+
+      // searches for the first contig that contains linear bins
+      // a contig will have no linear bins if there are no reads mapped to that
+      // contig (e.g., reads were aligned to a whole genome, and then reads from
+      // only a single contig were selected)
+      int ctgIdx = -1;
+      int bin = 0;
+      LinearIndex linIdx;
+      int ctgBins;
+      long lastStart = 0;
+      do {
+        ctgIdx++;
+        linIdx = idx.getLinearIndex(ctgIdx);
+        ctgBins = linIdx.size();
+      } while (ctgBins == 0);
+      long nextStart = linIdx.get(bin);
+
+      FileVirtualSplit newSplit = null;
+      boolean lastWasGuessed = false;
+
+      // loop and process all of the splits that share a single .bai
+      while (splitsEnd < splits.size() && ((FileSplit) (splits.get(splitsEnd))).getPath() == path) {
+        FileSplit fSplit = (FileSplit) splits.get(splitsEnd);
+        splitsEnd++;
+
+        if (splitsEnd >= splits.size()) {
+          break;
         }
 
-	/**
-	 * Only include reads that overlap the given intervals (if specified) and unplaced
-	 * unmapped reads (if <code>true</code>).
-	 * @param conf the Hadoop configuration to set properties on
-	 * @param intervals the intervals to filter by, or <code>null</code> if all reads
-	 *   are to be included (in which case <code>traverseUnplacedUnmapped</code> must be
-	 *   <code>true</code>)
-	 * @param traverseUnplacedUnmapped whether to included unplaced unampped reads
-	 * @param <T> the {@link Locatable} type
-	 */
-	public static <T extends Locatable> void setTraversalParameters(Configuration conf,
-			List<T> intervals, boolean traverseUnplacedUnmapped) {
-		if (intervals == null && !traverseUnplacedUnmapped) {
-			throw new IllegalArgumentException("Traversing mapped reads only is not supported.");
-		}
-		conf.setBoolean(BOUNDED_TRAVERSAL_PROPERTY, true);
-		if (intervals != null) {
-			StringBuilder sb = new StringBuilder();
-			for (Iterator<T> it = intervals.iterator(); it.hasNext(); ) {
-				Locatable l = it.next();
-				sb.append(String.format("%s:%d-%d", l.getContig(), l.getStart(), l.getEnd()));
-				if (it.hasNext()) {
-					sb.append(",");
-				}
-			}
-			conf.set(INTERVALS_PROPERTY, sb.toString());
-		}
-		conf.setBoolean(TRAVERSE_UNPLACED_UNMAPPED_PROPERTY, traverseUnplacedUnmapped);
-	}
-
-	/**
-	 * Reset traversal parameters so that all reads are included.
-	 * @param conf the Hadoop configuration to set properties on
-	 */
-	public static void unsetTraversalParameters(Configuration conf) {
-		conf.unset(BOUNDED_TRAVERSAL_PROPERTY);
-		conf.unset(INTERVALS_PROPERTY);
-		conf.unset(TRAVERSE_UNPLACED_UNMAPPED_PROPERTY);
-	}
-
-	static boolean isBoundedTraversal(Configuration conf) {
-		return conf.getBoolean(BOUNDED_TRAVERSAL_PROPERTY, false) ||
-				conf.get(INTERVALS_PROPERTY) != null; // backwards compatibility
-	}
-
-	static boolean traverseUnplacedUnmapped(Configuration conf) {
-		return conf.getBoolean(TRAVERSE_UNPLACED_UNMAPPED_PROPERTY, false);
-	}
-
-	static List<Interval> getIntervals(Configuration conf) {
-		return IntervalUtil.getIntervals(conf, INTERVALS_PROPERTY);
-	}
-
-	static Path getIdxPath(Path path) {
-		return path.suffix(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
-	}
-
-	static List<InputSplit> removeIndexFiles(List<InputSplit> splits) {
-		// Remove any splitting bai files
-		return splits.stream()
-				.filter(split -> !((FileSplit) split).getPath().getName().endsWith(
-						SplittingBAMIndexer.OUTPUT_FILE_EXTENSION))
-                                .filter(split -> !((FileSplit) split).getPath().getName().endsWith(
-                                                BAMIndex.BAMIndexSuffix))
-				.collect(Collectors.toList());
+        long fSplitEnd = (fSplit.getStart() + fSplit.getLength()) << 16;
+        lastStart = nextStart;
+
+        // we need to advance and find the first linear index bin
+        // that starts after the current split ends.
+        // this is the end of our split.
+        while (nextStart < fSplitEnd && ctgIdx < dict.size()) {
+
+          // are we going off of the end of this contig?
+          // if so, advance to the next contig with a linear bin
+          if (bin + 1 >= ctgBins) {
+            do {
+              ctgIdx += 1;
+              bin = 0;
+              if (ctgIdx >= dict.size()) {
+                break;
+              }
+              linIdx = idx.getLinearIndex(ctgIdx);
+              ctgBins = linIdx.size();
+            } while (ctgBins == 0);
+          }
+          if (ctgIdx < dict.size() && linIdx.size() > bin) {
+            nextStart = linIdx.get(bin);
+            bin++;
+          }
         }
-    
-    	static Path getBAIPath(Path path) {
-		return path.suffix(BAMIndex.BAMIndexSuffix);
-	}
-
-	/** Returns a {@link BAMRecordReader} initialized with the parameters. */
-	@Override public RecordReader<LongWritable,SAMRecordWritable>
-		createRecordReader(InputSplit split, TaskAttemptContext ctx)
-            throws InterruptedException, IOException
-	{
-		final RecordReader<LongWritable,SAMRecordWritable> rr =
-			new BAMRecordReader();
-		rr.initialize(split, ctx);
-		return rr;
-	}
-
-	/** The splits returned are {@link FileVirtualSplit FileVirtualSplits}. */
-	@Override public List<InputSplit> getSplits(JobContext job)
-		throws IOException
-	{
-		return getSplits(super.getSplits(job), job.getConfiguration());
-	}
-
-	public List<InputSplit> getSplits(
-			List<InputSplit> splits, Configuration cfg)
-		throws IOException
-	{
-
-		final List<InputSplit> origSplits = removeIndexFiles(splits);
-
-		// Align the splits so that they don't cross blocks.
-
-		// addIndexedSplits() requires the given splits to be sorted by file
-		// path, so do so. Although FileInputFormat.getSplits() does, at the time
-		// of writing this, generate them in that order, we shouldn't rely on it.
-		Collections.sort(origSplits, new Comparator<InputSplit>() {
-			public int compare(InputSplit a, InputSplit b) {
-				FileSplit fa = (FileSplit)a, fb = (FileSplit)b;
-				return fa.getPath().compareTo(fb.getPath());
-			}
-		});
-
-		final List<InputSplit> newSplits =
-			new ArrayList<InputSplit>(origSplits.size());
-
-		for (int i = 0; i < origSplits.size();) {
-			try {
-				i = addIndexedSplits                        (origSplits, i, newSplits, cfg);
-			} catch (IOException | ProviderNotFoundException e) {
-				if (cfg.getBoolean(ENABLE_BAI_SPLIT_CALCULATOR, false)) {
-					try {
-						i = addBAISplits            (origSplits, i, newSplits, cfg);
-					} catch (IOException | ProviderNotFoundException e2) {
-						i = addProbabilisticSplits  (origSplits, i, newSplits, cfg);
-					}
-				} else {
-					i = addProbabilisticSplits          (origSplits, i, newSplits, cfg);
-				}
-			}
-		}
-		return filterByInterval(newSplits, cfg);
-	}
-
-	// Handles all the splits that share the Path of the one at index i,
-	// returning the next index to be used.
-	private int addIndexedSplits(
-			List<InputSplit> splits, int i, List<InputSplit> newSplits,
-			Configuration cfg)
-		throws IOException
-	{
-		final Path file = ((FileSplit)splits.get(i)).getPath();
-		List<InputSplit> potentialSplits = new ArrayList<InputSplit>();
-
-		final SplittingBAMIndex idx = new SplittingBAMIndex(
-			file.getFileSystem(cfg).open(getIdxPath(file)));
-
-		int splitsEnd = splits.size();
-		for (int j = i; j < splitsEnd; ++j)
-			if (!file.equals(((FileSplit)splits.get(j)).getPath()))
-				splitsEnd = j;
-
-		if (idx.size() == 1) { // no alignments, only the file size, so no splits to add
-			return splitsEnd;
-		}
-
-		for (int j = i; j < splitsEnd; ++j) {
-			final FileSplit fileSplit = (FileSplit)splits.get(j);
-
-			final long start =         fileSplit.getStart();
-			final long end   = start + fileSplit.getLength();
-
-			final Long blockStart = idx.nextAlignment(start);
-
-			// The last split needs to end where the last alignment ends, but the
-			// index doesn't store that data (whoops); we only know where the last
-			// alignment begins. Fortunately there's no need to change the index
-			// format for this: we can just set the end to the maximal length of
-			// the final BGZF block (0xffff), and then read until BAMRecordCodec
-			// hits EOF.
-			Long blockEnd;
-			if (j == splitsEnd - 1) {
-				blockEnd = idx.prevAlignment(end) | 0xffff;
-			} else {
-				blockEnd = idx.nextAlignment(end);
-			}
-
-			if (blockStart == null || blockEnd == null) {
-				logger.warn("Index for {} was not good. Generating probabilistic splits.", file);
-				return addProbabilisticSplits(splits, i, newSplits, cfg);
-			}
-
-			potentialSplits.add(new FileVirtualSplit(
-						file, blockStart, blockEnd, fileSplit.getLocations()));
-		}
-
-		for (InputSplit s : potentialSplits) {
-			newSplits.add(s);
-		}
-		return splitsEnd;
-	}
-
-	// Handles all the splits that share the Path of the one at index i,
-	// returning the next index to be used.
-        private int addBAISplits(List<InputSplit> splits,
-                                 int i,
-                                 List<InputSplit> newSplits,
-                                 Configuration conf) throws IOException {
-                final Path path = ((FileSplit)splits.get(i)).getPath();
-                FileSystem fs = path.getFileSystem(conf);
-                int splitsEnd = i;
-                
-                try (FSDataInputStream in = fs.open(path)) {
-                        SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
-                        SAMSequenceDictionary dict = header.getSequenceDictionary();
-                        
-                        final SeekableStream guesserSin =
-                                WrapSeekable.openPath(fs, path);
-                        final BAMSplitGuesser guesser = new BAMSplitGuesser(guesserSin, conf);
-
-                        final SeekableStream sin;
-                        if (fs.exists(getBAIPath(path))) {
-                                sin = WrapSeekable.openPath(fs, getBAIPath(path));
-                        } else {
-                                sin = WrapSeekable.openPath(fs, new Path(path.toString()
-                                                                         .replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix)));
-                        }
-                        final LinearBAMIndex idx = new LinearBAMIndex(sin, dict);
-
-			// searches for the first contig that contains linear bins
-			// a contig will have no linear bins if there are no reads mapped to that
-			// contig (e.g., reads were aligned to a whole genome, and then reads from
-			// only a single contig were selected)
-                        int ctgIdx = -1;
-                        int bin = 0;
-                        LinearIndex linIdx;
-                        int ctgBins;
-                        long lastStart = 0;
-                        do {
-                                ctgIdx++;
-                                linIdx = idx.getLinearIndex(ctgIdx);
-                                ctgBins = linIdx.size();
-                        } while(ctgBins == 0);
-                        long nextStart = linIdx.get(bin);
-                        
-                        FileVirtualSplit newSplit = null;
-                        boolean lastWasGuessed = false;
-
-			// loop and process all of the splits that share a single .bai
-                        while(splitsEnd < splits.size() &&
-                              ((FileSplit)(splits.get(splitsEnd))).getPath() == path) {
-                                FileSplit fSplit = (FileSplit)splits.get(splitsEnd);
-                                splitsEnd++;
-                                
-                                if (splitsEnd >= splits.size()) {
-                                        break;
-                                }
-
-                                long fSplitEnd = (fSplit.getStart() + fSplit.getLength()) << 16;
-                                lastStart = nextStart;
-
-				// we need to advance and find the first linear index bin
-				// that starts after the current split ends.
-				// this is the end of our split.
-				while(nextStart < fSplitEnd && ctgIdx < dict.size()) {
-
-					// are we going off of the end of this contig?
-					// if so, advance to the next contig with a linear bin
-					if (bin + 1 >= ctgBins) {
-						do {
-                                                        ctgIdx += 1;
-                                                        bin = 0;
-                                                        if (ctgIdx >= dict.size()) {
-                                                                break;
-                                                        }
-                                                        linIdx = idx.getLinearIndex(ctgIdx);
-                                                        ctgBins = linIdx.size();
-                                                } while (ctgBins == 0);
-                                        }
-                                        if (ctgIdx < dict.size() && linIdx.size() > bin) {
-                                                nextStart = linIdx.get(bin);
-                                                bin++;
-                                        }
-                                }
-
-				// is this the first split?
-				// if so, split ranges from where the reads start until the identified end
-                                if (fSplit.getStart() == 0) {
-                                        final SeekableStream inFile =
-                                                WrapSeekable.openPath(path.getFileSystem(conf), path);
-                                        SamReader open = SamReaderFactory.makeDefault().setUseAsyncIo(false)
-                                                .open(SamInputResource.of(inFile));
-                                        SAMFileSpan span = open.indexing().getFilePointerSpanningReads();
-                                        long bamStart = ((BAMFileSpan) span).getFirstOffset();
-                                        newSplit = new FileVirtualSplit(fSplit.getPath(),
-                                                                        bamStart,
-                                                                        nextStart - 1,
-                                                                        fSplit.getLocations());
-                                        newSplits.add(newSplit);
-                                } else {
-
-					// did we find any blocks that started in the last split?
-					// if yes, then we're fine
-					// if no, then we need to guess a split start (in the else clause)
-					if (lastStart != nextStart) {
-                                                if (lastWasGuessed) {
-                                                        newSplit.setEndVirtualOffset(lastStart - 1);
-                                                        lastWasGuessed = false;
-                                                }
-                                                newSplit = new FileVirtualSplit(fSplit.getPath(),
-                                                                                lastStart,
-                                                                                nextStart - 1,
-                                                                                fSplit.getLocations());
-                                                newSplits.add(newSplit);
-                                        } else {
-						// guess the start
-                                                long alignedBeg = guesser.guessNextBAMRecordStart(fSplit.getStart(),
-                                                                                                  fSplit.getStart() + fSplit.getLength());
-                                                newSplit.setEndVirtualOffset(alignedBeg - 1);
-                                                lastStart = alignedBeg;
-                                                nextStart = alignedBeg;
-                                                newSplit = new FileVirtualSplit(fSplit.getPath(),
-                                                                                alignedBeg,
-                                                                                alignedBeg + 1,
-                                                                                fSplit.getLocations());
-                                                lastWasGuessed = true;
-                                                newSplits.add(newSplit);
-                                        }
-                                }
-                                lastStart = nextStart;
-                        }
-			// clean up the last split
-                        if (splitsEnd == splits.size()) {
-                                if (lastWasGuessed) {
-                                        newSplit.setEndVirtualOffset(lastStart - 1);
-                                        lastWasGuessed = false;
-                                }
-                                FileSplit fSplit = (FileSplit)splits.get(splitsEnd - 1);
-                                long fSplitEnd = (fSplit.getStart() + fSplit.getLength()) << 16;
-                                newSplit = new FileVirtualSplit(fSplit.getPath(),
-                                                                lastStart,
-                                                                fSplitEnd,
-                                                                fSplit.getLocations());
-                                newSplits.add(newSplit);
-                        }
-                }
-                return splitsEnd + 1;
+
+        // is this the first split?
+        // if so, split ranges from where the reads start until the identified end
+        if (fSplit.getStart() == 0) {
+          final SeekableStream inFile = WrapSeekable.openPath(path.getFileSystem(conf), path);
+          SamReader open =
+              SamReaderFactory.makeDefault().setUseAsyncIo(false).open(SamInputResource.of(inFile));
+          SAMFileSpan span = open.indexing().getFilePointerSpanningReads();
+          long bamStart = ((BAMFileSpan) span).getFirstOffset();
+          newSplit =
+              new FileVirtualSplit(
+                  fSplit.getPath(), bamStart, nextStart - 1, fSplit.getLocations());
+          newSplits.add(newSplit);
+        } else {
+
+          // did we find any blocks that started in the last split?
+          // if yes, then we're fine
+          // if no, then we need to guess a split start (in the else clause)
+          if (lastStart != nextStart) {
+            if (lastWasGuessed) {
+              newSplit.setEndVirtualOffset(lastStart - 1);
+              lastWasGuessed = false;
+            }
+            newSplit =
+                new FileVirtualSplit(
+                    fSplit.getPath(), lastStart, nextStart - 1, fSplit.getLocations());
+            newSplits.add(newSplit);
+          } else {
+            // guess the start
+            long alignedBeg =
+                guesser.guessNextBAMRecordStart(
+                    fSplit.getStart(), fSplit.getStart() + fSplit.getLength());
+            newSplit.setEndVirtualOffset(alignedBeg - 1);
+            lastStart = alignedBeg;
+            nextStart = alignedBeg;
+            newSplit =
+                new FileVirtualSplit(
+                    fSplit.getPath(), alignedBeg, alignedBeg + 1, fSplit.getLocations());
+            lastWasGuessed = true;
+            newSplits.add(newSplit);
+          }
+        }
+        lastStart = nextStart;
+      }
+      // clean up the last split
+      if (splitsEnd == splits.size()) {
+        if (lastWasGuessed) {
+          newSplit.setEndVirtualOffset(lastStart - 1);
+          lastWasGuessed = false;
+        }
+        FileSplit fSplit = (FileSplit) splits.get(splitsEnd - 1);
+        long fSplitEnd = (fSplit.getStart() + fSplit.getLength()) << 16;
+        newSplit =
+            new FileVirtualSplit(fSplit.getPath(), lastStart, fSplitEnd, fSplit.getLocations());
+        newSplits.add(newSplit);
+      }
+    }
+    return splitsEnd + 1;
+  }
+
+  // Works the same way as addIndexedSplits, to avoid having to reopen the
+  // file repeatedly and checking addIndexedSplits for an index repeatedly.
+  private int addProbabilisticSplits(
+      List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
+      throws IOException {
+    final Path path = ((FileSplit) splits.get(i)).getPath();
+    final SeekableStream sin = WrapSeekable.openPath(path.getFileSystem(cfg), path);
+
+    final BAMSplitGuesser guesser = new BAMSplitGuesser(sin, cfg);
+
+    FileVirtualSplit previousSplit = null;
+
+    for (; i < splits.size(); ++i) {
+      FileSplit fspl = (FileSplit) splits.get(i);
+      if (!fspl.getPath().equals(path)) {
+        break;
+      }
+
+      long beg = fspl.getStart();
+      long end = beg + fspl.getLength();
+
+      long alignedBeg = guesser.guessNextBAMRecordStart(beg, end);
+
+      // As the guesser goes to the next BGZF block before looking for BAM
+      // records, the ending BGZF blocks have to always be traversed fully.
+      // Hence force the length to be 0xffff, the maximum possible.
+      long alignedEnd = end << 16 | 0xffff;
+
+      if (alignedBeg == end) {
+        // No records detected in this split: merge it to the previous one.
+        // This could legitimately happen e.g. if we have a split that is
+        // so small that it only contains the middle part of a BGZF block.
+        //
+        // Of course, if it's the first split, then this is simply not a
+        // valid BAM file.
+        //
+        // FIXME: In theory, any number of splits could only contain parts
+        // of the BAM header before we start to see splits that contain BAM
+        // records. For now, we require that the split size is at least as
+        // big as the header and don't handle that case.
+        if (previousSplit == null) {
+          throw new IOException(
+              "'" + path + "': " + "no reads in first split: bad BAM file or tiny split size?");
+        }
+
+        previousSplit.setEndVirtualOffset(alignedEnd);
+      } else {
+        previousSplit = new FileVirtualSplit(path, alignedBeg, alignedEnd, fspl.getLocations());
+        if (logger.isDebugEnabled()) {
+          final long byteOffset = alignedBeg >>> 16;
+          final long recordOffset = alignedBeg & 0xffff;
+          logger.debug(
+              "Split {}: byte offset: {} record offset: {}, virtual offset: {}",
+              i,
+              byteOffset,
+              recordOffset,
+              alignedBeg);
+        }
+        newSplits.add(previousSplit);
+      }
+    }
+
+    sin.close();
+    return i;
+  }
+
+  private List<InputSplit> filterByInterval(List<InputSplit> splits, Configuration conf)
+      throws IOException {
+    if (!isBoundedTraversal(conf)) {
+      return splits;
+    }
+
+    // Get the chunk lists (BAMFileSpans) in the intervals we want (chunks give start
+    // and end file pointers into a BAM file) by looking in all the indexes for the BAM
+    // files
+    Set<Path> bamFiles = new LinkedHashSet<>();
+    for (InputSplit split : splits) {
+      bamFiles.add(((FileVirtualSplit) split).getPath());
+    }
+    Map<Path, BAMFileSpan> fileToSpan = new LinkedHashMap<>();
+    SamReaderFactory readerFactory =
+        SamReaderFactory.makeDefault()
+            .setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
+            .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
+            .setUseAsyncIo(false);
+
+    List<Interval> intervals = getIntervals(conf);
+
+    Map<Path, Long> fileToUnmapped = new LinkedHashMap<>();
+    boolean traverseUnplacedUnmapped = traverseUnplacedUnmapped(conf);
+
+    for (Path bamFile : bamFiles) {
+      FileSystem fs = bamFile.getFileSystem(conf);
+
+      try (SamReader samReader =
+          readerFactory.open(NIOFileUtil.asPath(fs.makeQualified(bamFile).toUri()))) {
+        if (!samReader.hasIndex()) {
+          throw new IllegalArgumentException(
+              "Intervals set but no BAM index file found for " + bamFile);
         }
-        
-	// Works the same way as addIndexedSplits, to avoid having to reopen the
-	// file repeatedly and checking addIndexedSplits for an index repeatedly.
-	private int addProbabilisticSplits(
-			List<InputSplit> splits, int i, List<InputSplit> newSplits,
-			Configuration cfg)
-		throws IOException
-	{
-		final Path path = ((FileSplit)splits.get(i)).getPath();
-		final SeekableStream sin =
-			WrapSeekable.openPath(path.getFileSystem(cfg), path);
-
-		final BAMSplitGuesser guesser = new BAMSplitGuesser(sin, cfg);
-
-		FileVirtualSplit previousSplit = null;
-
-		for (; i < splits.size(); ++i) {
-			FileSplit fspl = (FileSplit)splits.get(i);
-			if (!fspl.getPath().equals(path))
-				break;
-
-			long beg =       fspl.getStart();
-			long end = beg + fspl.getLength();
-
-			long alignedBeg = guesser.guessNextBAMRecordStart(beg, end);
-
-			// As the guesser goes to the next BGZF block before looking for BAM
-			// records, the ending BGZF blocks have to always be traversed fully.
-			// Hence force the length to be 0xffff, the maximum possible.
-			long alignedEnd = end << 16 | 0xffff;
-
-			if (alignedBeg == end) {
-				// No records detected in this split: merge it to the previous one.
-				// This could legitimately happen e.g. if we have a split that is
-				// so small that it only contains the middle part of a BGZF block.
-				//
-				// Of course, if it's the first split, then this is simply not a
-				// valid BAM file.
-				//
-				// FIXME: In theory, any number of splits could only contain parts
-				// of the BAM header before we start to see splits that contain BAM
-				// records. For now, we require that the split size is at least as
-				// big as the header and don't handle that case.
-				if (previousSplit == null)
-					throw new IOException("'" + path + "': "+
-						"no reads in first split: bad BAM file or tiny split size?");
-
-				previousSplit.setEndVirtualOffset(alignedEnd);
-			} else {
-				previousSplit = new FileVirtualSplit(
-                                        path, alignedBeg, alignedEnd, fspl.getLocations());
-				if (logger.isDebugEnabled()) {
-					final long byteOffset  = alignedBeg >>> 16;
-					final long recordOffset = alignedBeg & 0xffff;
-					logger.debug(
-						"Split {}: byte offset: {} record offset: {}, virtual offset: {}",
-						i, byteOffset, recordOffset, alignedBeg);
-				}
-				newSplits.add(previousSplit);
-			}
-		}
-
-		sin.close();
-		return i;
-	}
-
-	private List<InputSplit> filterByInterval(List<InputSplit> splits, Configuration conf)
-			throws IOException {
-		if (!isBoundedTraversal(conf)) {
-			return splits;
-		}
-
-		// Get the chunk lists (BAMFileSpans) in the intervals we want (chunks give start
-		// and end file pointers into a BAM file) by looking in all the indexes for the BAM
-		// files
-		Set<Path> bamFiles = new LinkedHashSet<>();
-		for (InputSplit split : splits) {
-			bamFiles.add(((FileVirtualSplit) split).getPath());
-		}
-		Map<Path, BAMFileSpan> fileToSpan = new LinkedHashMap<>();
-		SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
-				.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
-				.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
-				.setUseAsyncIo(false);
-
-		List<Interval> intervals = getIntervals(conf);
-
-		Map<Path, Long> fileToUnmapped = new LinkedHashMap<>();
-		boolean traverseUnplacedUnmapped = traverseUnplacedUnmapped(conf);
-
-		for (Path bamFile : bamFiles) {
-			FileSystem fs = bamFile.getFileSystem(conf);
-
-			try (SamReader samReader =
-							 readerFactory.open(NIOFileUtil.asPath(fs.makeQualified(bamFile).toUri()))) {
-				if (!samReader.hasIndex()) {
-					throw new IllegalArgumentException("Intervals set but no BAM index file found for " + bamFile);
-
-				}
-
-				try (FSDataInputStream in = fs.open(bamFile)) {
-					SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
-					SAMSequenceDictionary dict = header.getSequenceDictionary();
-					BAMIndex idx = samReader.indexing().getIndex();
-
-					if (intervals != null && !intervals.isEmpty()) {
-						QueryInterval[] queryIntervals = prepareQueryIntervals(intervals, dict);
-						fileToSpan.put(bamFile, BAMFileReader.getFileSpan(queryIntervals, idx));
-					}
-
-					if (traverseUnplacedUnmapped) {
-						long startOfLastLinearBin = idx.getStartOfLastLinearBin();
-						long noCoordinateCount = ((AbstractBAMFileIndex) idx).getNoCoordinateCount();
-						if (startOfLastLinearBin != -1 && noCoordinateCount > 0) {
-							// add FileVirtualSplit (with no intervals) from startOfLastLinearBin to
-							// end of file
-							fileToUnmapped.put(bamFile, startOfLastLinearBin);
-						}
-					}
-				}
-
-			}
-		}
-
-		// Use the chunks to filter the splits
-		List<InputSplit> filteredSplits = new ArrayList<>();
-		for (InputSplit split : splits) {
-			FileVirtualSplit virtualSplit = (FileVirtualSplit) split;
-			long splitStart = virtualSplit.getStartVirtualOffset();
-			long splitEnd = virtualSplit.getEndVirtualOffset();
-			BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(splitStart, splitEnd));
-			BAMFileSpan span = fileToSpan.get(virtualSplit.getPath());
-			if (span == null) {
-				continue;
-			}
-			span = (BAMFileSpan) span.removeContentsBefore(splitSpan);
-			span = (BAMFileSpan) span.removeContentsAfter(splitSpan);
-			if (!span.getChunks().isEmpty()) {
-				filteredSplits.add(new FileVirtualSplit(virtualSplit.getPath(), splitStart, splitEnd,
-						virtualSplit.getLocations(), span.toCoordinateArray()));
-			}
-		}
-
-		if (traverseUnplacedUnmapped) {
-			// add extra splits that contain only unmapped reads
-			for (Map.Entry<Path, Long> e : fileToUnmapped.entrySet()) {
-				Path file = e.getKey();
-				long unmappedStart = e.getValue();
-				boolean foundFirstSplit = false;
-				for (InputSplit split : splits) { // TODO: are splits in order of start position?
-					FileVirtualSplit virtualSplit = (FileVirtualSplit) split;
-					if (virtualSplit.getPath().equals(file)) {
-						long splitStart = virtualSplit.getStartVirtualOffset();
-						long splitEnd = virtualSplit.getEndVirtualOffset();
-						if (foundFirstSplit) {
-							filteredSplits.add(new FileVirtualSplit(virtualSplit.getPath(), splitStart, splitEnd,
-									virtualSplit.getLocations()));
-						} else if (splitStart <= unmappedStart && unmappedStart <= splitEnd) {
-							filteredSplits.add(new FileVirtualSplit(virtualSplit.getPath(), unmappedStart, splitEnd,
-									virtualSplit.getLocations()));
-							foundFirstSplit = true;
-						}
-					}
-				}
-			}
-		}
-
-		return filteredSplits;
-	}
-
-	/**
-	 * Converts a List of SimpleIntervals into the format required by the SamReader query API
-	 * @param rawIntervals SimpleIntervals to be converted
-	 * @return A sorted, merged list of QueryIntervals suitable for passing to the SamReader query API
-	 */
-	static QueryInterval[] prepareQueryIntervals( final List<Interval>
-			rawIntervals, final SAMSequenceDictionary sequenceDictionary ) {
-		if ( rawIntervals == null || rawIntervals.isEmpty() ) {
-			return null;
-		}
-
-		// Convert each SimpleInterval to a QueryInterval
-		final QueryInterval[] convertedIntervals =
-				rawIntervals.stream()
-						.map(rawInterval -> convertSimpleIntervalToQueryInterval(rawInterval, sequenceDictionary))
-						.toArray(QueryInterval[]::new);
-
-		// Intervals must be optimized (sorted and merged) in order to use the htsjdk query API
-		return QueryInterval.optimizeIntervals(convertedIntervals);
-	}
-	/**
-	 * Converts an interval in SimpleInterval format into an htsjdk QueryInterval.
-	 *
-	 * In doing so, a header lookup is performed to convert from contig name to index
-	 *
-	 * @param interval interval to convert
-	 * @param sequenceDictionary sequence dictionary used to perform the conversion
-	 * @return an equivalent interval in QueryInterval format
-	 */
-	private static QueryInterval convertSimpleIntervalToQueryInterval( final Interval interval,	final SAMSequenceDictionary sequenceDictionary ) {
-		if (interval == null) {
-			throw new IllegalArgumentException("interval may not be null");
-		}
-		if (sequenceDictionary == null) {
-			throw new IllegalArgumentException("sequence dictionary may not be null");
-		}
-
-		final int contigIndex = sequenceDictionary.getSequenceIndex(interval.getContig());
-		if ( contigIndex == -1 ) {
-			throw new IllegalArgumentException("Contig " + interval.getContig() + " not present in reads sequence " +
-					"dictionary");
-		}
-
-		return new QueryInterval(contigIndex, interval.getStart(), interval.getEnd());
-	}
-
-	@Override public boolean isSplitable(JobContext job, Path path) {
-		return true;
-	}
+
+        try (FSDataInputStream in = fs.open(bamFile)) {
+          SAMFileHeader header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
+          SAMSequenceDictionary dict = header.getSequenceDictionary();
+          BAMIndex idx = samReader.indexing().getIndex();
+
+          if (intervals != null && !intervals.isEmpty()) {
+            QueryInterval[] queryIntervals = prepareQueryIntervals(intervals, dict);
+            fileToSpan.put(bamFile, BAMFileReader.getFileSpan(queryIntervals, idx));
+          }
+
+          if (traverseUnplacedUnmapped) {
+            long startOfLastLinearBin = idx.getStartOfLastLinearBin();
+            long noCoordinateCount = ((AbstractBAMFileIndex) idx).getNoCoordinateCount();
+            if (startOfLastLinearBin != -1 && noCoordinateCount > 0) {
+              // add FileVirtualSplit (with no intervals) from startOfLastLinearBin to
+              // end of file
+              fileToUnmapped.put(bamFile, startOfLastLinearBin);
+            }
+          }
+        }
+      }
+    }
+
+    // Use the chunks to filter the splits
+    List<InputSplit> filteredSplits = new ArrayList<>();
+    for (InputSplit split : splits) {
+      FileVirtualSplit virtualSplit = (FileVirtualSplit) split;
+      long splitStart = virtualSplit.getStartVirtualOffset();
+      long splitEnd = virtualSplit.getEndVirtualOffset();
+      BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(splitStart, splitEnd));
+      BAMFileSpan span = fileToSpan.get(virtualSplit.getPath());
+      if (span == null) {
+        continue;
+      }
+      span = (BAMFileSpan) span.removeContentsBefore(splitSpan);
+      span = (BAMFileSpan) span.removeContentsAfter(splitSpan);
+      if (!span.getChunks().isEmpty()) {
+        filteredSplits.add(
+            new FileVirtualSplit(
+                virtualSplit.getPath(),
+                splitStart,
+                splitEnd,
+                virtualSplit.getLocations(),
+                span.toCoordinateArray()));
+      }
+    }
+
+    if (traverseUnplacedUnmapped) {
+      // add extra splits that contain only unmapped reads
+      for (Map.Entry<Path, Long> e : fileToUnmapped.entrySet()) {
+        Path file = e.getKey();
+        long unmappedStart = e.getValue();
+        boolean foundFirstSplit = false;
+        for (InputSplit split : splits) { // TODO: are splits in order of start position?
+          FileVirtualSplit virtualSplit = (FileVirtualSplit) split;
+          if (virtualSplit.getPath().equals(file)) {
+            long splitStart = virtualSplit.getStartVirtualOffset();
+            long splitEnd = virtualSplit.getEndVirtualOffset();
+            if (foundFirstSplit) {
+              filteredSplits.add(
+                  new FileVirtualSplit(
+                      virtualSplit.getPath(), splitStart, splitEnd, virtualSplit.getLocations()));
+            } else if (splitStart <= unmappedStart && unmappedStart <= splitEnd) {
+              filteredSplits.add(
+                  new FileVirtualSplit(
+                      virtualSplit.getPath(),
+                      unmappedStart,
+                      splitEnd,
+                      virtualSplit.getLocations()));
+              foundFirstSplit = true;
+            }
+          }
+        }
+      }
+    }
+
+    return filteredSplits;
+  }
+
+  @Override
+  public boolean isSplitable(JobContext job, Path path) {
+    return true;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BAMOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/BAMOutputFormat.java
index 951f910..4002dae 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BAMOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BAMOutputFormat.java
@@ -24,18 +24,17 @@
 
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
-/** Currently this only locks down the value type of the {@link
+/**
+ * Currently this only locks down the value type of the {@link
  * org.apache.hadoop.mapreduce.OutputFormat}: contains no functionality.
  */
-public abstract class BAMOutputFormat<K>
-	extends FileOutputFormat<K,SAMRecordWritable> {
-	/**
-	 * If set to <code>true</code>, write <i>.splitting-bai</i> files for every BAM file
-	 * (defaults to <code>false</code>).
-	 * A splitting BAI file (not to be confused with a regular BAI file) contains an
-	 * index of offsets that the BAM file can be read from; they are used by
-	 * {@link BAMInputFormat} to construct splits.
-	 */
-	public static final String WRITE_SPLITTING_BAI =
-			"hadoopbam.bam.write-splitting-bai";
+public abstract class BAMOutputFormat<K> extends FileOutputFormat<K, SAMRecordWritable> {
+
+  /**
+   * If set to <code>true</code>, write <i>.splitting-bai</i> files for every BAM file (defaults to
+   * <code>false</code>). A splitting BAI file (not to be confused with a regular BAI file) contains
+   * an index of offsets that the BAM file can be read from; they are used by {@link BAMInputFormat}
+   * to construct splits.
+   */
+  public static final String WRITE_SPLITTING_BAI = "hadoopbam.bam.write-splitting-bai";
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BAMRecordReader.java b/src/main/java/org/seqdoop/hadoop_bam/BAMRecordReader.java
index 8767a4c..4c1ab7c 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BAMRecordReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BAMRecordReader.java
@@ -26,17 +26,18 @@
 import htsjdk.samtools.BAMFileSpan;
 import htsjdk.samtools.Chunk;
 import htsjdk.samtools.QueryInterval;
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.SamFiles;
 import htsjdk.samtools.SamInputResource;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
 import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.util.CloseableIterator;
 import htsjdk.samtools.util.Interval;
 import java.io.IOException;
-import java.nio.file.Paths;
 import java.util.List;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -45,11 +46,6 @@
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import htsjdk.samtools.ValidationStringency;
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMRecord;
-
 import org.seqdoop.hadoop_bam.util.MurmurHash3;
 import org.seqdoop.hadoop_bam.util.NIOFileUtil;
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
@@ -57,177 +53,195 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** The key is the bitwise OR of the reference sequence ID in the upper 32 bits
- * and the 0-based leftmost coordinate in the lower.
+/**
+ * The key is the bitwise OR of the reference sequence ID in the upper 32 bits and the 0-based
+ * leftmost coordinate in the lower.
  */
-public class BAMRecordReader
-	extends RecordReader<LongWritable,SAMRecordWritable>
-{
-	private static final Logger logger = LoggerFactory.getLogger(BAMRecordReader.class);
-	private final LongWritable key = new LongWritable();
-	private final SAMRecordWritable record = new SAMRecordWritable();
-	private BAMFileReader bamFileReader;
-    
-	private CloseableIterator<SAMRecord> iterator;
-	private boolean reachedEnd;
-	private WrapSeekable<FSDataInputStream> in;
-	private long fileStart;
-	private long virtualEnd;
-	private boolean isInitialized = false;
-
-	/** Note: this is the only getKey function that handles unmapped reads
-	 * specially!
-	 */
-	public static long getKey(final SAMRecord rec) {
-		final int refIdx = rec.getReferenceIndex();
-		final int start  = rec.getAlignmentStart();
-
-		if (!(rec.getReadUnmappedFlag() || refIdx < 0 || start < 0))
-			return getKey(refIdx, start);
-
-		// Put unmapped reads at the end, but don't give them all the exact same
-		// key so that they can be distributed to different reducers.
-		//
-		// A random number would probably be best, but to ensure that the same
-		// record always gets the same key we use a fast hash instead.
-		//
-		// We avoid using hashCode(), because it's not guaranteed to have the
-		// same value across different processes.
-
-		int hash = 0;
-		byte[] var;
-		if ((var = rec.getVariableBinaryRepresentation()) != null) {
-			// Undecoded BAM record: just hash its raw data.
-			hash = (int)MurmurHash3.murmurhash3(var, hash);
-		} else {
-			// Decoded BAM record or any SAM record: hash a few representative
-			// fields together.
-			hash = (int)MurmurHash3.murmurhash3(rec.getReadName(), hash);
-			hash = (int)MurmurHash3.murmurhash3(rec.getReadBases(), hash);
-			hash = (int)MurmurHash3.murmurhash3(rec.getBaseQualities(), hash);
-			hash = (int)MurmurHash3.murmurhash3(rec.getCigarString(), hash);
-		}
-		return getKey0(Integer.MAX_VALUE, hash);
-	}
-
-	/** @param alignmentStart 1-based leftmost coordinate. */
-	public static long getKey(int refIdx, int alignmentStart) {
-		return getKey0(refIdx, alignmentStart-1);
-	}
-
-	/** @param alignmentStart0 0-based leftmost coordinate. */
-	public static long getKey0(int refIdx, int alignmentStart0) {
-		return (long)refIdx << 32 | alignmentStart0;
-	}
-
-	@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
-            throws IOException
-	{
-		// This method should only be called once (see Hadoop API). However,
-		// there seems to be disagreement between implementations that call
-		// initialize() and Hadoop-BAM's own code that relies on
-		// {@link BAMInputFormat} to call initialize() when the reader is
-		// created. Therefore we add this check for the time being. 
-		if(isInitialized)
-			close();
-		isInitialized = true;
-		reachedEnd = false;
-
-		final Configuration conf = ctx.getConfiguration();
-
-		final FileVirtualSplit split = (FileVirtualSplit)spl;
-		final Path             file  = split.getPath();
-		final FileSystem       fs    = file.getFileSystem(conf);
-
-		ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
-
-		java.nio.file.Path index = SamFiles.findIndex(NIOFileUtil.asPath(fs.makeQualified(file).toUri()));
-		Path fileIndex = index == null ? null : new Path(index.toUri());
-		SeekableStream indexStream = fileIndex == null ? null : WrapSeekable.openPath(fs, fileIndex);
-		in = WrapSeekable.openPath(fs, file);
-		SamReader samReader = createSamReader(in, indexStream, stringency);
-		final SAMFileHeader header = samReader.getFileHeader();
-
-		long virtualStart = split.getStartVirtualOffset();
-
-		fileStart  = virtualStart >>> 16;
-		virtualEnd = split.getEndVirtualOffset();
-
-		SamReader.PrimitiveSamReader primitiveSamReader =
-				((SamReader.PrimitiveSamReaderToSamReaderAdapter) samReader).underlyingReader();
-		bamFileReader = (BAMFileReader) primitiveSamReader;
-
-		if (logger.isDebugEnabled()) {
-			final long recordStart = virtualStart & 0xffff;
-			logger.debug("Initialized BAMRecordReader; byte offset: {}, record offset: {}",
-				fileStart, recordStart);
-		}
-
-		if (conf.getBoolean("hadoopbam.bam.keep-paired-reads-together", false)) {
-			throw new IllegalArgumentException("Property hadoopbam.bam.keep-paired-reads-together is no longer honored.");
-		}
-
-		boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf);
-		if (boundedTraversal && split.getIntervalFilePointers() != null) {
-			// return reads for intervals
-			List<Interval> intervals = BAMInputFormat.getIntervals(conf);
-			QueryInterval[] queryIntervals = BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
-			iterator = bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
-		} else if (boundedTraversal && split.getIntervalFilePointers() == null) {
-			// return unmapped reads
-			iterator = bamFileReader.queryUnmapped();
-		} else {
-			// return everything
-			BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(virtualStart, virtualEnd));
-			iterator = bamFileReader.getIterator(splitSpan);
-		}
-	}
-
-	private SamReader createSamReader(SeekableStream in, SeekableStream inIndex,
-			ValidationStringency stringency) {
-		SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
-				.setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
-				.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
-				.setUseAsyncIo(false);
-		if (stringency != null) {
-			readerFactory.validationStringency(stringency);
-		}
-		SamInputResource resource = SamInputResource.of(in);
-		if (inIndex != null) {
-			resource.index(inIndex);
-		}
-		return readerFactory.open(resource);
-	}
-
-	@Override public void close() throws IOException {
-		bamFileReader.close();
-        }
-
-	/** Unless the end has been reached, this only takes file position into
-	 * account, not the position within the block.
-	 */
-	@Override public float getProgress() throws IOException {
-		if (reachedEnd)
-			return 1;
-		else {
-			final long filePos = in.position();
-			final long fileEnd = virtualEnd >>> 16;
-			// Add 1 to the denominator to make sure it doesn't reach 1 here when
-			// filePos == fileEnd.
-			return (float)(filePos - fileStart) / (fileEnd - fileStart + 1);
-		}
-	}
-	@Override public LongWritable      getCurrentKey  () { return key; }
-	@Override public SAMRecordWritable getCurrentValue() { return record; }
-
-	@Override public boolean nextKeyValue() {
-			if (!iterator.hasNext()) {
-				reachedEnd = true;
-				return false;
-			}
-			final SAMRecord r = iterator.next();
-			key.set(getKey(r));
-			record.set(r);
-			return true;
-	}
+public class BAMRecordReader extends RecordReader<LongWritable, SAMRecordWritable> {
+
+  private static final Logger logger = LoggerFactory.getLogger(BAMRecordReader.class);
+  private final LongWritable key = new LongWritable();
+  private final SAMRecordWritable record = new SAMRecordWritable();
+  private BAMFileReader bamFileReader;
+
+  private CloseableIterator<SAMRecord> iterator;
+  private boolean reachedEnd;
+  private WrapSeekable<FSDataInputStream> in;
+  private long fileStart;
+  private long virtualEnd;
+  private boolean isInitialized = false;
+
+  /** Note: this is the only getKey function that handles unmapped reads specially! */
+  public static long getKey(final SAMRecord rec) {
+    final int refIdx = rec.getReferenceIndex();
+    final int start = rec.getAlignmentStart();
+
+    if (!(rec.getReadUnmappedFlag() || refIdx < 0 || start < 0)) {
+      return getKey(refIdx, start);
+    }
+
+    // Put unmapped reads at the end, but don't give them all the exact same
+    // key so that they can be distributed to different reducers.
+    //
+    // A random number would probably be best, but to ensure that the same
+    // record always gets the same key we use a fast hash instead.
+    //
+    // We avoid using hashCode(), because it's not guaranteed to have the
+    // same value across different processes.
+
+    int hash = 0;
+    byte[] var;
+    if ((var = rec.getVariableBinaryRepresentation()) != null) {
+      // Undecoded BAM record: just hash its raw data.
+      hash = (int) MurmurHash3.murmurhash3(var, hash);
+    } else {
+      // Decoded BAM record or any SAM record: hash a few representative
+      // fields together.
+      hash = (int) MurmurHash3.murmurhash3(rec.getReadName(), hash);
+      hash = (int) MurmurHash3.murmurhash3(rec.getReadBases(), hash);
+      hash = (int) MurmurHash3.murmurhash3(rec.getBaseQualities(), hash);
+      hash = (int) MurmurHash3.murmurhash3(rec.getCigarString(), hash);
+    }
+    return getKey0(Integer.MAX_VALUE, hash);
+  }
+
+  /** @param alignmentStart 1-based leftmost coordinate. */
+  public static long getKey(int refIdx, int alignmentStart) {
+    return getKey0(refIdx, alignmentStart - 1);
+  }
+
+  /** @param alignmentStart0 0-based leftmost coordinate. */
+  public static long getKey0(int refIdx, int alignmentStart0) {
+    return (long) refIdx << 32 | alignmentStart0;
+  }
+
+  @Override
+  public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException {
+    // This method should only be called once (see Hadoop API). However,
+    // there seems to be disagreement between implementations that call
+    // initialize() and Hadoop-BAM's own code that relies on
+    // {@link BAMInputFormat} to call initialize() when the reader is
+    // created. Therefore we add this check for the time being.
+    if (isInitialized) {
+      close();
+    }
+    isInitialized = true;
+    reachedEnd = false;
+
+    final Configuration conf = ctx.getConfiguration();
+
+    final FileVirtualSplit split = (FileVirtualSplit) spl;
+    final Path file = split.getPath();
+    final FileSystem fs = file.getFileSystem(conf);
+
+    ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
+
+    java.nio.file.Path index =
+        SamFiles.findIndex(NIOFileUtil.asPath(fs.makeQualified(file).toUri()));
+    Path fileIndex = index == null ? null : new Path(index.toUri());
+    SeekableStream indexStream = fileIndex == null ? null : WrapSeekable.openPath(fs, fileIndex);
+    in = WrapSeekable.openPath(fs, file);
+    SamReader samReader = createSamReader(in, indexStream, stringency);
+    final SAMFileHeader header = samReader.getFileHeader();
+
+    long virtualStart = split.getStartVirtualOffset();
+
+    fileStart = virtualStart >>> 16;
+    virtualEnd = split.getEndVirtualOffset();
+
+    SamReader.PrimitiveSamReader primitiveSamReader =
+        ((SamReader.PrimitiveSamReaderToSamReaderAdapter) samReader).underlyingReader();
+    bamFileReader = (BAMFileReader) primitiveSamReader;
+
+    if (logger.isDebugEnabled()) {
+      final long recordStart = virtualStart & 0xffff;
+      logger.debug(
+          "Initialized BAMRecordReader; byte offset: {}, record offset: {}",
+          fileStart,
+          recordStart);
+    }
+
+    if (conf.getBoolean("hadoopbam.bam.keep-paired-reads-together", false)) {
+      throw new IllegalArgumentException(
+          "Property hadoopbam.bam.keep-paired-reads-together is no longer honored.");
+    }
+
+    boolean boundedTraversal = BAMInputFormat.isBoundedTraversal(conf);
+    if (boundedTraversal && split.getIntervalFilePointers() != null) {
+      // return reads for intervals
+      List<Interval> intervals = BAMInputFormat.getIntervals(conf);
+      QueryInterval[] queryIntervals =
+          BAMInputFormat.prepareQueryIntervals(intervals, header.getSequenceDictionary());
+      iterator =
+          bamFileReader.createIndexIterator(queryIntervals, false, split.getIntervalFilePointers());
+    } else if (boundedTraversal && split.getIntervalFilePointers() == null) {
+      // return unmapped reads
+      iterator = bamFileReader.queryUnmapped();
+    } else {
+      // return everything
+      BAMFileSpan splitSpan = new BAMFileSpan(new Chunk(virtualStart, virtualEnd));
+      iterator = bamFileReader.getIterator(splitSpan);
+    }
+  }
+
+  private SamReader createSamReader(
+      SeekableStream in, SeekableStream inIndex, ValidationStringency stringency) {
+    SamReaderFactory readerFactory =
+        SamReaderFactory.makeDefault()
+            .setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, true)
+            .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
+            .setUseAsyncIo(false);
+    if (stringency != null) {
+      readerFactory.validationStringency(stringency);
+    }
+    SamInputResource resource = SamInputResource.of(in);
+    if (inIndex != null) {
+      resource.index(inIndex);
+    }
+    return readerFactory.open(resource);
+  }
+
+  @Override
+  public void close() throws IOException {
+    bamFileReader.close();
+  }
+
+  /**
+   * Unless the end has been reached, this only takes file position into account, not the position
+   * within the block.
+   */
+  @Override
+  public float getProgress() throws IOException {
+    if (reachedEnd) {
+      return 1;
+    } else {
+      final long filePos = in.position();
+      final long fileEnd = virtualEnd >>> 16;
+      // Add 1 to the denominator to make sure it doesn't reach 1 here when
+      // filePos == fileEnd.
+      return (float) (filePos - fileStart) / (fileEnd - fileStart + 1);
+    }
+  }
+
+  @Override
+  public LongWritable getCurrentKey() {
+    return key;
+  }
+
+  @Override
+  public SAMRecordWritable getCurrentValue() {
+    return record;
+  }
+
+  @Override
+  public boolean nextKeyValue() {
+    if (!iterator.hasNext()) {
+      reachedEnd = true;
+      return false;
+    }
+    final SAMRecord r = iterator.next();
+    key.set(getKey(r));
+    record.set(r);
+    return true;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BAMRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/BAMRecordWriter.java
index 5a8f3da..9085d01 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BAMRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BAMRecordWriter.java
@@ -22,11 +22,6 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.StringWriter;
-import java.io.Writer;
-
 import htsjdk.samtools.BAMRecordCodec;
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SAMRecord;
@@ -35,122 +30,113 @@
 import htsjdk.samtools.SAMTextHeaderCodec;
 import htsjdk.samtools.util.BinaryCodec;
 import htsjdk.samtools.util.BlockCompressedOutputStream;
-
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.StringWriter;
+import java.io.Writer;
 import java.nio.charset.Charset;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
-/** A base {@link RecordWriter} for BAM records.
+/**
+ * A base {@link RecordWriter} for BAM records.
  *
- * <p>Handles the output stream, writing the header if requested, and provides
- * the {@link #writeAlignment} function for subclasses.</p>
+ * <p>Handles the output stream, writing the header if requested, and provides the {@link
+ * #writeAlignment} function for subclasses.
  */
-public abstract class BAMRecordWriter<K>
-	extends RecordWriter<K,SAMRecordWritable>
-{
-	private OutputStream   origOutput;
-	private BinaryCodec    binaryCodec;
-	private BAMRecordCodec recordCodec;
-	private BlockCompressedOutputStream compressedOut;
-	private SplittingBAMIndexer splittingBAMIndexer;
-
-	/** A SAMFileHeader is read from the input Path. */
-	public BAMRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output,
-			SAMHeaderReader.readSAMHeaderFrom(input, ctx.getConfiguration()),
-			writeHeader, ctx);
-		if (ctx.getConfiguration().getBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, false)) {
-			Path splittingIndex = BAMInputFormat.getIdxPath(output);
-			OutputStream splittingIndexOutput =
-					output.getFileSystem(ctx.getConfiguration()).create(splittingIndex);
-			splittingBAMIndexer = new SplittingBAMIndexer(splittingIndexOutput);
-		}
-	}
-	public BAMRecordWriter(
-			Path output, SAMFileHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader);
-		if (ctx.getConfiguration().getBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, false)) {
-			Path splittingIndex = BAMInputFormat.getIdxPath(output);
-			OutputStream splittingIndexOutput =
-					output.getFileSystem(ctx.getConfiguration()).create(splittingIndex);
-			splittingBAMIndexer = new SplittingBAMIndexer(splittingIndexOutput);
-		}
-	}
-
-	// Working around not being able to call a constructor other than as the
-	// first statement...
-	private void init(
-			Path output, SAMFileHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader);
-	}
-	private void init(
-			OutputStream output, SAMFileHeader header, boolean writeHeader)
-		throws IOException
-	{
-		origOutput = output;
-
-		compressedOut = new BlockCompressedOutputStream(origOutput, null);
-
-		binaryCodec = new BinaryCodec(compressedOut);
-		recordCodec = new BAMRecordCodec(header);
-		recordCodec.setOutputStream(compressedOut);
-
-		if (writeHeader)
-			this.writeHeader(header);
-	}
-
-	@Override public void close(TaskAttemptContext ctx) throws IOException {
-		// Don't close the codec, we don't want BlockCompressedOutputStream's
-		// file terminator to be output. But do flush the stream.
-		binaryCodec.getOutputStream().flush();
-
-		// Finish indexer with file length
-		if (splittingBAMIndexer != null) {
-			splittingBAMIndexer.finish(compressedOut.getFilePointer() >> 16);
-		}
-
-		// And close the original output.
-		origOutput.close();
-	}
-
-	protected void writeAlignment(final SAMRecord rec) throws IOException {
-		if (splittingBAMIndexer != null) {
-			splittingBAMIndexer.processAlignment(compressedOut.getFilePointer());
-		}
-		recordCodec.encode(rec);
-	}
-
-	private void writeHeader(final SAMFileHeader header) {
-		binaryCodec.writeBytes("BAM\001".getBytes(Charset.forName("UTF8")));
-
-		final Writer sw = new StringWriter();
-		new SAMTextHeaderCodec().encode(sw, header);
-
-		binaryCodec.writeString(sw.toString(), true, false);
-
-		final SAMSequenceDictionary dict = header.getSequenceDictionary();
-
-		binaryCodec.writeInt(dict.size());
-		for (final SAMSequenceRecord rec : dict.getSequences()) {
-			binaryCodec.writeString(rec.getSequenceName(), true, true);
-			binaryCodec.writeInt   (rec.getSequenceLength());
-		}
-	}
+public abstract class BAMRecordWriter<K> extends RecordWriter<K, SAMRecordWritable> {
+
+  private OutputStream origOutput;
+  private BinaryCodec binaryCodec;
+  private BAMRecordCodec recordCodec;
+  private BlockCompressedOutputStream compressedOut;
+  private SplittingBAMIndexer splittingBAMIndexer;
+
+  /** A SAMFileHeader is read from the input Path. */
+  public BAMRecordWriter(Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(
+        output, SAMHeaderReader.readSAMHeaderFrom(input, ctx.getConfiguration()), writeHeader, ctx);
+    if (ctx.getConfiguration().getBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, false)) {
+      Path splittingIndex = BAMInputFormat.getIdxPath(output);
+      OutputStream splittingIndexOutput =
+          output.getFileSystem(ctx.getConfiguration()).create(splittingIndex);
+      splittingBAMIndexer = new SplittingBAMIndexer(splittingIndexOutput);
+    }
+  }
+
+  public BAMRecordWriter(
+      Path output, SAMFileHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader);
+    if (ctx.getConfiguration().getBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, false)) {
+      Path splittingIndex = BAMInputFormat.getIdxPath(output);
+      OutputStream splittingIndexOutput =
+          output.getFileSystem(ctx.getConfiguration()).create(splittingIndex);
+      splittingBAMIndexer = new SplittingBAMIndexer(splittingIndexOutput);
+    }
+  }
+
+  // Working around not being able to call a constructor other than as the
+  // first statement...
+  private void init(Path output, SAMFileHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader);
+  }
+
+  private void init(OutputStream output, SAMFileHeader header, boolean writeHeader)
+      throws IOException {
+    origOutput = output;
+
+    compressedOut = new BlockCompressedOutputStream(origOutput, null);
+
+    binaryCodec = new BinaryCodec(compressedOut);
+    recordCodec = new BAMRecordCodec(header);
+    recordCodec.setOutputStream(compressedOut);
+
+    if (writeHeader) {
+      this.writeHeader(header);
+    }
+  }
+
+  @Override
+  public void close(TaskAttemptContext ctx) throws IOException {
+    // Don't close the codec, we don't want BlockCompressedOutputStream's
+    // file terminator to be output. But do flush the stream.
+    binaryCodec.getOutputStream().flush();
+
+    // Finish indexer with file length
+    if (splittingBAMIndexer != null) {
+      splittingBAMIndexer.finish(compressedOut.getFilePointer() >> 16);
+    }
+
+    // And close the original output.
+    origOutput.close();
+  }
+
+  protected void writeAlignment(final SAMRecord rec) throws IOException {
+    if (splittingBAMIndexer != null) {
+      splittingBAMIndexer.processAlignment(compressedOut.getFilePointer());
+    }
+    recordCodec.encode(rec);
+  }
+
+  private void writeHeader(final SAMFileHeader header) {
+    binaryCodec.writeBytes("BAM\001".getBytes(Charset.forName("UTF8")));
+
+    final Writer sw = new StringWriter();
+    new SAMTextHeaderCodec().encode(sw, header);
+
+    binaryCodec.writeString(sw.toString(), true, false);
+
+    final SAMSequenceDictionary dict = header.getSequenceDictionary();
+
+    binaryCodec.writeInt(dict.size());
+    for (final SAMSequenceRecord rec : dict.getSequences()) {
+      binaryCodec.writeString(rec.getSequenceName(), true, true);
+      binaryCodec.writeInt(rec.getSequenceLength());
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BAMSplitGuesser.java b/src/main/java/org/seqdoop/hadoop_bam/BAMSplitGuesser.java
index 7e1bd7f..91fecc7 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BAMSplitGuesser.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BAMSplitGuesser.java
@@ -23,379 +23,373 @@
 package org.seqdoop.hadoop_bam;
 
 import htsjdk.samtools.BAMFileSpan;
+import htsjdk.samtools.BAMRecordCodec;
+import htsjdk.samtools.FileTruncatedException;
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SAMFileSpan;
+import htsjdk.samtools.SAMFormatException;
 import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.SAMRecordHelper;
 import htsjdk.samtools.SamInputResource;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
 import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
-import java.io.InputStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.samtools.util.BlockCompressedInputStream;
+import htsjdk.samtools.util.RuntimeEOFException;
+import htsjdk.samtools.util.RuntimeIOException;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.Arrays;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.util.GenericOptionsParser;
-
-import htsjdk.samtools.BAMRecordCodec;
-import htsjdk.samtools.FileTruncatedException;
-import htsjdk.samtools.SAMFormatException;
-import htsjdk.samtools.seekablestream.SeekableStream;
-import htsjdk.samtools.util.BlockCompressedInputStream;
-import htsjdk.samtools.util.RuntimeEOFException;
-import htsjdk.samtools.util.RuntimeIOException;
-
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 import org.seqdoop.hadoop_bam.util.WrapSeekable;
 
-/** A class for heuristically finding BAM record positions inside an area of
- * a BAM file.
- */
+/** A class for heuristically finding BAM record positions inside an area of a BAM file. */
 public class BAMSplitGuesser extends BaseSplitGuesser {
-	private       SeekableStream             inFile;
-	private       BlockCompressedInputStream bgzf;
-	private final BAMRecordCodec             bamCodec;
-	private final int                        referenceSequenceCount;
-	private final SAMFileHeader              header;
-
-	// We want to go through this many BGZF blocks fully, checking that they
-	// contain valid BAM records, when guessing a BAM record position.
-	private final static byte BLOCKS_NEEDED_FOR_GUESS = 3;
-
-	// Since the max size of a BGZF block is 0xffff (64K), and we might be just
-	// one byte off from the start of the previous one, we need 0xfffe bytes for
-	// the start, and then 0xffff times the number of blocks we want to go
-	// through.
-	private final static int MAX_BYTES_READ =
-		BLOCKS_NEEDED_FOR_GUESS * 0xffff + 0xfffe;
-
-	private final static int SHORTEST_POSSIBLE_BAM_RECORD = 4*9 + 1 + 1 + 1;
-
-	/** The stream must point to a valid BAM file, because the header is read
-	 * from it.
-	 */
-	public BAMSplitGuesser(
-			SeekableStream ss, Configuration conf)
-		throws IOException
-	{
-		this(ss, ss, conf);
-
-		// Secondary check that the header points to a BAM file: Picard can get
-		// things wrong due to its autodetection.
-		ss.seek(0);
-		if (ss.read(buf.array(), 0, 4) != 4 || buf.getInt(0) != BGZF_MAGIC)
-			throw new SAMFormatException("Does not seem like a BAM file");
-	}
-
-	public BAMSplitGuesser(
-			SeekableStream ss, InputStream headerStream, Configuration conf)
-		throws IOException
-	{
-		inFile = ss;
-
-		header = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf);
-		referenceSequenceCount = header.getSequenceDictionary().size();
-
-		bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory());
-	}
-
-	/** Finds a virtual BAM record position in the physical position range
-	 * [beg,end). Returns end if no BAM record was found.
-	 */
-	public long guessNextBAMRecordStart(long beg, long end)
-		throws IOException
-	{
-		// Use a reader to skip through the headers at the beginning of a BAM file, since
-		// the headers may exceed MAX_BYTES_READ in length. Don't close the reader
-		// otherwise it will close the underlying stream, which we continue to read from
-		// on subsequent calls to this method.
-		if (beg == 0) {
-			this.inFile.seek(beg);
-			SamReader open = SamReaderFactory.makeDefault().setUseAsyncIo(false)
-					.open(SamInputResource.of(inFile));
-			SAMFileSpan span = open.indexing().getFilePointerSpanningReads();
-			if (span instanceof BAMFileSpan) {
-				return ((BAMFileSpan) span).getFirstOffset();
-			}
-		}
-
-		// Buffer what we need to go through.
-
-		byte[] arr = new byte[MAX_BYTES_READ];
-
-		this.inFile.seek(beg);
-		int totalRead = 0;
-		for (int left = Math.min((int)(end - beg), arr.length); left > 0;) {
-			final int r = inFile.read(arr, totalRead, left);
-			if (r < 0)
-				break;
-			totalRead += r;
-			left -= r;
-		}
-		arr = Arrays.copyOf(arr, totalRead);
-
-		this.in = new ByteArraySeekableStream(arr);
-
-		this.bgzf = new BlockCompressedInputStream(this.in);
-		this.bgzf.setCheckCrcs(true);
-
-		this.bamCodec.setInputStream(bgzf);
-
-		final int firstBGZFEnd = Math.min((int)(end - beg), 0xffff);
-
-		// cp: Compressed Position, indexes the entire BGZF input.
-		for (int cp = 0;; ++cp) {
-			final PosSize psz = guessNextBGZFPos(cp, firstBGZFEnd);
-			if (psz == null)
-				return end;
-
-			final int  cp0     = cp = psz.pos;
-			final long cp0Virt = (long)cp0 << 16;
-			try {
-				bgzf.seek(cp0Virt);
-
-			// This has to catch Throwable, because it's possible to get an
-			// OutOfMemoryError due to an overly large size.
-			} catch (Throwable e) {
-				// Guessed BGZF position incorrectly: try the next guess.
-				continue;
-			}
-
-			// up: Uncompressed Position, indexes the data inside the BGZF block.
-			for (int up = 0;; ++up) {
-				final int up0 = up = guessNextBAMPos(cp0Virt, up, psz.size);
-
-				if (up0 < 0) {
-					// No BAM records found in the BGZF block: try the next BGZF
-					// block.
-					break;
-				}
-
-				// Verify that we can actually decode BLOCKS_NEEDED_FOR_GUESS worth
-				// of records starting at (cp0,up0).
-				bgzf.seek(cp0Virt | up0);
-				boolean decodedAny = false;
-				try {
-					byte b = 0;
-					int prevCP = cp0;
-					while (b < BLOCKS_NEEDED_FOR_GUESS)
-					{
-						SAMRecord record = bamCodec.decode();
-						if (record == null) {
-							break;
-						}
-						record.setHeaderStrict(header);
-						SAMRecordHelper.eagerDecode(record); // force decoding of fields
-						decodedAny = true;
-
-						final int cp2 = (int)(bgzf.getFilePointer() >>> 16);
-						if (cp2 != prevCP) {
-							// The compressed position changed so we must be in a new
-							// block.
-							assert cp2 > prevCP;
-							prevCP = cp2;
-							++b;
-						}
-					}
-
-					// Running out of records to verify is fine as long as we
-					// verified at least something. It should only happen if we
-					// couldn't fill the array.
-					if (b < BLOCKS_NEEDED_FOR_GUESS) {
-						assert arr.length < MAX_BYTES_READ;
-						if (!decodedAny)
-							continue;
-					}
-				}
-                                  catch (SAMFormatException     e) { continue; }
-				  catch (OutOfMemoryError       e) { continue; }
-				  catch (IllegalArgumentException e) { continue; }
-				  catch (IndexOutOfBoundsException e) { continue; }
-				  catch (RuntimeIOException     e) { continue; }
-				  // EOF can happen legitimately if the [beg,end) range is too
-				  // small to accommodate BLOCKS_NEEDED_FOR_GUESS and we get cut
-				  // off in the middle of a record. In that case, our stream
-				  // should have hit EOF as well. If we've then verified at least
-				  // something, go ahead with it and hope for the best.
-				  catch (FileTruncatedException e) {
-						if (!decodedAny && this.in.eof())
-							continue;
-				}
-				  catch (RuntimeEOFException    e) {
-						if (!decodedAny && this.in.eof())
-							continue;
-				}
-
-				return beg+cp0 << 16 | up0;
-			}
-		}
-	}
-
-	private int guessNextBAMPos(long cpVirt, int up, int cSize) {
-		// What we're actually searching for is what's at offset [4], not [0]. So
-		// skip ahead by 4, thus ensuring that whenever we find a valid [0] it's
-		// at position up or greater.
-		up += 4;
-
-		try {
-			while (up + SHORTEST_POSSIBLE_BAM_RECORD - 4 < cSize) {
-				bgzf.seek(cpVirt | up);
-				IOUtils.readFully(bgzf, buf.array(), 0, 8);
-
-				// If the first two checks fail we have what looks like a valid
-				// reference sequence ID. Assume we're at offset [4] or [24], i.e.
-				// the ID of either this read or its mate, respectively. So check
-				// the next integer ([8] or [28]) to make sure it's a 0-based
-				// leftmost coordinate.
-				final int id  = buf.getInt(0);
-				final int pos = buf.getInt(4);
-				if (id < -1 || id > referenceSequenceCount || pos < -1) {
-					++up;
-					continue;
-				}
-
-				// Okay, we could be at [4] or [24]. Assuming we're at [4], check
-				// that [24] is valid. Assume [4] because we should hit it first:
-				// the only time we expect to hit [24] is at the beginning of the
-				// split, as part of the first read we should skip.
-
-				bgzf.seek(cpVirt | up+20);
-				IOUtils.readFully(bgzf, buf.array(), 0, 8);
-
-				final int nid  = buf.getInt(0);
-				final int npos = buf.getInt(4);
-				if (nid < -1 || nid > referenceSequenceCount || npos < -1) {
-					++up;
-					continue;
-				}
-
-				// So far so good: [4] and [24] seem okay. Now do something a bit
-				// more involved: make sure that [36 + [12]&0xff - 1] == 0: that
-				// is, the name of the read should be null terminated.
-
-				// Move up to 0 just to make it less likely that we get confused
-				// with offsets. Remember where we should continue from if we
-				// reject this up.
-				final int nextUP = up + 1;
-				up -= 4;
-
-				bgzf.seek(cpVirt | up+12);
-				IOUtils.readFully(bgzf, buf.array(), 0, 4);
-
-				final int nameLength = buf.getInt(0) & 0xff;
-				if (nameLength < 1) {
-					// Names are null-terminated so length must be at least one
-					up = nextUP;
-					continue;
-				}
-
-				final int nullTerminator = up + 36 + nameLength-1;
-
-				if (nullTerminator >= cSize) {
-					// This BAM record can't fit here. But maybe there's another in
-					// the remaining space, so try again.
-					up = nextUP;
-					continue;
-				}
-
-				bgzf.seek(cpVirt | nullTerminator);
-				IOUtils.readFully(bgzf, buf.array(), 0, 1);
-
-				if (buf.get(0) != 0) {
-					up = nextUP;
-					continue;
-				}
-
-				// All of [4], [24], and [36 + [12]&0xff] look good. If [0] is also
-				// sensible, that's good enough for us. "Sensible" to us means the
-				// following:
-				//
-				// [0] >= 4*([16]&0xffff) + [20] + ([20]+1)/2 + 4*8 + ([12]&0xff)
-
-				// Note that [0] is "length of the _remainder_ of the alignment
-				// record", which is why this uses 4*8 instead of 4*9.
-				int zeroMin = 4*8 + nameLength;
-
-				bgzf.seek(cpVirt | up+16);
-				IOUtils.readFully(bgzf, buf.array(), 0, 8);
-
-				zeroMin += (buf.getInt(0) & 0xffff) * 4;
-				zeroMin += buf.getInt(4) + (buf.getInt(4)+1)/2;
-
-				bgzf.seek(cpVirt | up);
-				IOUtils.readFully(bgzf, buf.array(), 0, 4);
-
-				if (buf.getInt(0) < zeroMin) {
-					up = nextUP;
-					continue;
-				}
-				return up;
-			}
-		} catch (IOException e) {}
-		return -1;
-	}
-
-	public static void main(String[] args) throws IOException {
-		final GenericOptionsParser parser;
-		try {
-			parser = new GenericOptionsParser(args);
-
-		// This should be IOException but Hadoop 0.20.2 doesn't throw it...
-		} catch (Exception e) {
-			System.err.printf("Error in Hadoop arguments: %s\n", e.getMessage());
-			System.exit(1);
-
-			// Hooray for javac
-			return;
-		}
-
-		args = parser.getRemainingArgs();
-                final Configuration conf = parser.getConfiguration();
-
-		long beg = 0;
-
-		if (args.length < 2 || args.length > 3) {
-			System.err.println(
-				"Usage: BAMSplitGuesser path-or-uri header-path-or-uri [beg]");
-			System.exit(2);
-		}
-
-		try {
-			if (args.length > 2) beg = Long.decode(args[2]);
-		} catch (NumberFormatException e) {
-			System.err.println("Invalid beg offset.");
-			if (e.getMessage() != null)
-				System.err.println(e.getMessage());
-			System.exit(2);
-		}
-
-		SeekableStream ss = WrapSeekable.openPath(conf, new Path(args[0]));
-		SeekableStream hs = WrapSeekable.openPath(conf, new Path(args[1]));
-
-		final long end = beg + MAX_BYTES_READ;
-
-		System.out.printf(
-			"Will look for a BGZF block within: [%1$#x,%2$#x) = [%1$d,%2$d)\n"+
-			"Will then verify BAM data within:  [%1$#x,%3$#x) = [%1$d,%3$d)\n",
-			beg, beg + 0xffff, end);
-
-		final long g =
-			new BAMSplitGuesser(ss, hs, conf).guessNextBAMRecordStart(beg, end);
-
-		ss.close();
-
-		if (g == end) {
-			System.out.println(
-				"Didn't find any acceptable BAM record in any BGZF block.");
-			System.exit(1);
-		}
-
-		System.out.printf(
-			"Accepted BGZF block at offset %1$#x (%1$d).\n"+
-			"Accepted BAM record at offset %2$#x (%2$d) therein.\n",
-			g >> 16, g & 0xffff);
-	}
+
+  // We want to go through this many BGZF blocks fully, checking that they
+  // contain valid BAM records, when guessing a BAM record position.
+  private static final byte BLOCKS_NEEDED_FOR_GUESS = 3;
+  // Since the max size of a BGZF block is 0xffff (64K), and we might be just
+  // one byte off from the start of the previous one, we need 0xfffe bytes for
+  // the start, and then 0xffff times the number of blocks we want to go
+  // through.
+  private static final int MAX_BYTES_READ = BLOCKS_NEEDED_FOR_GUESS * 0xffff + 0xfffe;
+  private static final int SHORTEST_POSSIBLE_BAM_RECORD = 4 * 9 + 1 + 1 + 1;
+  private final BAMRecordCodec bamCodec;
+  private final int referenceSequenceCount;
+  private final SAMFileHeader header;
+  private SeekableStream inFile;
+  private BlockCompressedInputStream bgzf;
+
+  /** The stream must point to a valid BAM file, because the header is read from it. */
+  public BAMSplitGuesser(SeekableStream ss, Configuration conf) throws IOException {
+    this(ss, ss, conf);
+
+    // Secondary check that the header points to a BAM file: Picard can get
+    // things wrong due to its autodetection.
+    ss.seek(0);
+    if (ss.read(buf.array(), 0, 4) != 4 || buf.getInt(0) != BGZF_MAGIC) {
+      throw new SAMFormatException("Does not seem like a BAM file");
+    }
+  }
+
+  public BAMSplitGuesser(SeekableStream ss, InputStream headerStream, Configuration conf)
+      throws IOException {
+    inFile = ss;
+
+    header = SAMHeaderReader.readSAMHeaderFrom(headerStream, conf);
+    referenceSequenceCount = header.getSequenceDictionary().size();
+
+    bamCodec = new BAMRecordCodec(null, new LazyBAMRecordFactory());
+  }
+
+  public static void main(String[] args) throws IOException {
+    final GenericOptionsParser parser;
+    try {
+      parser = new GenericOptionsParser(args);
+
+      // This should be IOException but Hadoop 0.20.2 doesn't throw it...
+    } catch (Exception e) {
+      System.err.printf("Error in Hadoop arguments: %s\n", e.getMessage());
+      System.exit(1);
+
+      // Hooray for javac
+      return;
+    }
+
+    args = parser.getRemainingArgs();
+    final Configuration conf = parser.getConfiguration();
+
+    long beg = 0;
+
+    if (args.length < 2 || args.length > 3) {
+      System.err.println("Usage: BAMSplitGuesser path-or-uri header-path-or-uri [beg]");
+      System.exit(2);
+    }
+
+    try {
+      if (args.length > 2) {
+        beg = Long.decode(args[2]);
+      }
+    } catch (NumberFormatException e) {
+      System.err.println("Invalid beg offset.");
+      if (e.getMessage() != null) {
+        System.err.println(e.getMessage());
+      }
+      System.exit(2);
+    }
+
+    SeekableStream ss = WrapSeekable.openPath(conf, new Path(args[0]));
+    SeekableStream hs = WrapSeekable.openPath(conf, new Path(args[1]));
+
+    final long end = beg + MAX_BYTES_READ;
+
+    System.out.printf(
+        "Will look for a BGZF block within: [%1$#x,%2$#x) = [%1$d,%2$d)\n"
+            + "Will then verify BAM data within:  [%1$#x,%3$#x) = [%1$d,%3$d)\n",
+        beg, beg + 0xffff, end);
+
+    final long g = new BAMSplitGuesser(ss, hs, conf).guessNextBAMRecordStart(beg, end);
+
+    ss.close();
+
+    if (g == end) {
+      System.out.println("Didn't find any acceptable BAM record in any BGZF block.");
+      System.exit(1);
+    }
+
+    System.out.printf(
+        "Accepted BGZF block at offset %1$#x (%1$d).\n"
+            + "Accepted BAM record at offset %2$#x (%2$d) therein.\n",
+        g >> 16, g & 0xffff);
+  }
+
+  /**
+   * Finds a virtual BAM record position in the physical position range [beg,end). Returns end if no
+   * BAM record was found.
+   */
+  public long guessNextBAMRecordStart(long beg, long end) throws IOException {
+    // Use a reader to skip through the headers at the beginning of a BAM file, since
+    // the headers may exceed MAX_BYTES_READ in length. Don't close the reader
+    // otherwise it will close the underlying stream, which we continue to read from
+    // on subsequent calls to this method.
+    if (beg == 0) {
+      this.inFile.seek(beg);
+      SamReader open =
+          SamReaderFactory.makeDefault().setUseAsyncIo(false).open(SamInputResource.of(inFile));
+      SAMFileSpan span = open.indexing().getFilePointerSpanningReads();
+      if (span instanceof BAMFileSpan) {
+        return ((BAMFileSpan) span).getFirstOffset();
+      }
+    }
+
+    // Buffer what we need to go through.
+
+    byte[] arr = new byte[MAX_BYTES_READ];
+
+    this.inFile.seek(beg);
+    int totalRead = 0;
+    for (int left = Math.min((int) (end - beg), arr.length); left > 0; ) {
+      final int r = inFile.read(arr, totalRead, left);
+      if (r < 0) {
+        break;
+      }
+      totalRead += r;
+      left -= r;
+    }
+    arr = Arrays.copyOf(arr, totalRead);
+
+    this.in = new ByteArraySeekableStream(arr);
+
+    this.bgzf = new BlockCompressedInputStream(this.in);
+    this.bgzf.setCheckCrcs(true);
+
+    this.bamCodec.setInputStream(bgzf);
+
+    final int firstBGZFEnd = Math.min((int) (end - beg), 0xffff);
+
+    // cp: Compressed Position, indexes the entire BGZF input.
+    for (int cp = 0; ; ++cp) {
+      final PosSize psz = guessNextBGZFPos(cp, firstBGZFEnd);
+      if (psz == null) {
+        return end;
+      }
+
+      final int cp0 = cp = psz.pos;
+      final long cp0Virt = (long) cp0 << 16;
+      try {
+        bgzf.seek(cp0Virt);
+
+        // This has to catch Throwable, because it's possible to get an
+        // OutOfMemoryError due to an overly large size.
+      } catch (Throwable e) {
+        // Guessed BGZF position incorrectly: try the next guess.
+        continue;
+      }
+
+      // up: Uncompressed Position, indexes the data inside the BGZF block.
+      for (int up = 0; ; ++up) {
+        final int up0 = up = guessNextBAMPos(cp0Virt, up, psz.size);
+
+        if (up0 < 0) {
+          // No BAM records found in the BGZF block: try the next BGZF
+          // block.
+          break;
+        }
+
+        // Verify that we can actually decode BLOCKS_NEEDED_FOR_GUESS worth
+        // of records starting at (cp0,up0).
+        bgzf.seek(cp0Virt | up0);
+        boolean decodedAny = false;
+        try {
+          byte b = 0;
+          int prevCP = cp0;
+          while (b < BLOCKS_NEEDED_FOR_GUESS) {
+            SAMRecord record = bamCodec.decode();
+            if (record == null) {
+              break;
+            }
+            record.setHeaderStrict(header);
+            SAMRecordHelper.eagerDecode(record); // force decoding of fields
+            decodedAny = true;
+
+            final int cp2 = (int) (bgzf.getFilePointer() >>> 16);
+            if (cp2 != prevCP) {
+              // The compressed position changed so we must be in a new
+              // block.
+              assert cp2 > prevCP;
+              prevCP = cp2;
+              ++b;
+            }
+          }
+
+          // Running out of records to verify is fine as long as we
+          // verified at least something. It should only happen if we
+          // couldn't fill the array.
+          if (b < BLOCKS_NEEDED_FOR_GUESS) {
+            assert arr.length < MAX_BYTES_READ;
+            if (!decodedAny) {
+              continue;
+            }
+          }
+        } catch (SAMFormatException e) {
+          continue;
+        } catch (OutOfMemoryError e) {
+          continue;
+        } catch (IllegalArgumentException e) {
+          continue;
+        } catch (IndexOutOfBoundsException e) {
+          continue;
+        } catch (RuntimeIOException e) {
+          continue;
+        }
+        // EOF can happen legitimately if the [beg,end) range is too
+        // small to accommodate BLOCKS_NEEDED_FOR_GUESS and we get cut
+        // off in the middle of a record. In that case, our stream
+        // should have hit EOF as well. If we've then verified at least
+        // something, go ahead with it and hope for the best.
+        catch (FileTruncatedException e) {
+          if (!decodedAny && this.in.eof()) {
+            continue;
+          }
+        } catch (RuntimeEOFException e) {
+          if (!decodedAny && this.in.eof()) {
+            continue;
+          }
+        }
+
+        return beg + cp0 << 16 | up0;
+      }
+    }
+  }
+
+  private int guessNextBAMPos(long cpVirt, int up, int cSize) {
+    // What we're actually searching for is what's at offset [4], not [0]. So
+    // skip ahead by 4, thus ensuring that whenever we find a valid [0] it's
+    // at position up or greater.
+    up += 4;
+
+    try {
+      while (up + SHORTEST_POSSIBLE_BAM_RECORD - 4 < cSize) {
+        bgzf.seek(cpVirt | up);
+        IOUtils.readFully(bgzf, buf.array(), 0, 8);
+
+        // If the first two checks fail we have what looks like a valid
+        // reference sequence ID. Assume we're at offset [4] or [24], i.e.
+        // the ID of either this read or its mate, respectively. So check
+        // the next integer ([8] or [28]) to make sure it's a 0-based
+        // leftmost coordinate.
+        final int id = buf.getInt(0);
+        final int pos = buf.getInt(4);
+        if (id < -1 || id > referenceSequenceCount || pos < -1) {
+          ++up;
+          continue;
+        }
+
+        // Okay, we could be at [4] or [24]. Assuming we're at [4], check
+        // that [24] is valid. Assume [4] because we should hit it first:
+        // the only time we expect to hit [24] is at the beginning of the
+        // split, as part of the first read we should skip.
+
+        bgzf.seek(cpVirt | up + 20);
+        IOUtils.readFully(bgzf, buf.array(), 0, 8);
+
+        final int nid = buf.getInt(0);
+        final int npos = buf.getInt(4);
+        if (nid < -1 || nid > referenceSequenceCount || npos < -1) {
+          ++up;
+          continue;
+        }
+
+        // So far so good: [4] and [24] seem okay. Now do something a bit
+        // more involved: make sure that [36 + [12]&0xff - 1] == 0: that
+        // is, the name of the read should be null terminated.
+
+        // Move up to 0 just to make it less likely that we get confused
+        // with offsets. Remember where we should continue from if we
+        // reject this up.
+        final int nextUP = up + 1;
+        up -= 4;
+
+        bgzf.seek(cpVirt | up + 12);
+        IOUtils.readFully(bgzf, buf.array(), 0, 4);
+
+        final int nameLength = buf.getInt(0) & 0xff;
+        if (nameLength < 1) {
+          // Names are null-terminated so length must be at least one
+          up = nextUP;
+          continue;
+        }
+
+        final int nullTerminator = up + 36 + nameLength - 1;
+
+        if (nullTerminator >= cSize) {
+          // This BAM record can't fit here. But maybe there's another in
+          // the remaining space, so try again.
+          up = nextUP;
+          continue;
+        }
+
+        bgzf.seek(cpVirt | nullTerminator);
+        IOUtils.readFully(bgzf, buf.array(), 0, 1);
+
+        if (buf.get(0) != 0) {
+          up = nextUP;
+          continue;
+        }
+
+        // All of [4], [24], and [36 + [12]&0xff] look good. If [0] is also
+        // sensible, that's good enough for us. "Sensible" to us means the
+        // following:
+        //
+        // [0] >= 4*([16]&0xffff) + [20] + ([20]+1)/2 + 4*8 + ([12]&0xff)
+
+        // Note that [0] is "length of the _remainder_ of the alignment
+        // record", which is why this uses 4*8 instead of 4*9.
+        int zeroMin = 4 * 8 + nameLength;
+
+        bgzf.seek(cpVirt | up + 16);
+        IOUtils.readFully(bgzf, buf.array(), 0, 8);
+
+        zeroMin += (buf.getInt(0) & 0xffff) * 4;
+        zeroMin += buf.getInt(4) + (buf.getInt(4) + 1) / 2;
+
+        bgzf.seek(cpVirt | up);
+        IOUtils.readFully(bgzf, buf.array(), 0, 4);
+
+        if (buf.getInt(0) < zeroMin) {
+          up = nextUP;
+          continue;
+        }
+        return up;
+      }
+    } catch (IOException e) {
+    }
+    return -1;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java
index 5ed4a7c..6b6b626 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java
@@ -22,13 +22,18 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.InputStream;
+import htsjdk.samtools.util.BlockCompressedInputStream;
+import htsjdk.tribble.readers.PositionalBufferedStream;
+import htsjdk.variant.bcf2.BCF2Codec;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFContigHeaderLine;
+import htsjdk.variant.vcf.VCFHeader;
 import java.io.IOException;
+import java.io.InputStream;
 import java.util.HashMap;
 import java.util.Map;
-
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IOUtils;
 import org.apache.hadoop.io.LongWritable;
@@ -36,201 +41,213 @@
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-
-import htsjdk.samtools.util.BlockCompressedInputStream;
-import htsjdk.tribble.readers.PositionalBufferedStream;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.bcf2.BCF2Codec;
-import htsjdk.variant.vcf.VCFContigHeaderLine;
-import htsjdk.variant.vcf.VCFHeader;
-
 import org.seqdoop.hadoop_bam.util.MurmurHash3;
 import org.seqdoop.hadoop_bam.util.WrapSeekable;
 
 /** See {@link VCFRecordReader} for the meaning of the key. */
-public class BCFRecordReader
-	extends RecordReader<LongWritable,VariantContextWritable>
-{
-	private final LongWritable          key = new LongWritable();
-	private final VariantContextWritable vc = new VariantContextWritable();
-
-	private BCF2Codec codec = new BCF2Codec();
-	private PositionalBufferedStream in;
-
-	private final Map<String,Integer> contigDict =
-		new HashMap<String,Integer>();
-
-	private boolean isBGZF;
-	private BlockCompressedInputStream bci;
-
-	// If isBGZF, length refers only to the distance of the last BGZF block from
-	// the first.
-	private long fileStart, length;
-
-	@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
-		throws IOException
-	{
-		isBGZF = spl instanceof FileVirtualSplit;
-		if (isBGZF) {
-			final FileVirtualSplit split = (FileVirtualSplit)spl;
-
-			final Path file = split.getPath();
-			final FileSystem fs = file.getFileSystem(ctx.getConfiguration());
-
-			final FSDataInputStream inFile = fs.open(file);
-
-			bci = new BlockCompressedInputStream(inFile);
-			in = new PositionalBufferedStream(bci);
-			initContigDict();
-
-			inFile.seek(0);
-			bci =
-				new BlockCompressedInputStream(
-					new WrapSeekable<FSDataInputStream>(
-						inFile, fs.getFileStatus(file).getLen(), file));
-
-			final long virtualStart = split.getStartVirtualOffset(),
-			           virtualEnd   = split.getEndVirtualOffset();
-
-			this.fileStart = virtualStart >>> 16;
-			this.length    = (virtualEnd  >>> 16) - fileStart;
-
-			bci.seek(virtualStart);
-
-			// Since PositionalBufferedStream does its own buffering, we have to
-			// prevent it from going too far by using a BGZFLimitingStream. It
-			// also allows nextKeyValue() to simply check for EOF instead of
-			// looking at virtualEnd.
-			in = new PositionalBufferedStream(
-				new BGZFLimitingStream(bci, virtualEnd));
-		} else {
-			final FileSplit split = (FileSplit)spl;
-
-			this.fileStart = split.getStart();
-			this.length    = split.getLength();
-
-			final Path file = split.getPath();
-
-			in = new PositionalBufferedStream(
-				file.getFileSystem(ctx.getConfiguration()).open(file));
-
-			initContigDict();
-
-			IOUtils.skipFully(in, fileStart - in.getPosition());
-		}
-	}
-	@Override public void close() throws IOException { in.close(); }
-
-	private void initContigDict() {
-		final VCFHeader header =
-			(VCFHeader)codec.readHeader(in).getHeaderValue();
-
-		contigDict.clear();
-		int i = 0;
-		for (final VCFContigHeaderLine contig : header.getContigLines())
-			contigDict.put(contig.getID(), i++);
-	}
-
-	/** For compressed BCF, unless the end has been reached, this is quite
-	 * inaccurate.
-	 */
-	@Override public float getProgress() {
-		if (length == 0)
-			return 1;
-
-		if (!isBGZF)
-			return (float)(in.getPosition() - fileStart) / length;
-
-		try {
-			if (in.peek() == -1)
-				return 1;
-		} catch (IOException e) {
-			return 1;
-		}
-
-		// Add 1 to the denominator to make sure that we never report 1 here.
-		return (float)((bci.getFilePointer() >>> 16) - fileStart) / (length + 1);
-	}
-	@Override public LongWritable           getCurrentKey  () { return key; }
-	@Override public VariantContextWritable getCurrentValue() { return vc; }
-
-	@Override public boolean nextKeyValue() throws IOException {
-		if (in.peek() == -1)
-			return false;
-
-		if (!isBGZF && in.getPosition() >= fileStart + length)
-			return false;
-
-		final VariantContext v = codec.decode(in);
-
-		Integer chromIdx = contigDict.get(v.getContig());
-		if (chromIdx == null)
-			chromIdx = (int)MurmurHash3.murmurhash3(v.getContig(), 0);
-
-		key.set((long)chromIdx << 32 | (long)(v.getStart() - 1));
-		vc.set(v);
-		return true;
-	}
+public class BCFRecordReader extends RecordReader<LongWritable, VariantContextWritable> {
+
+  private final LongWritable key = new LongWritable();
+  private final VariantContextWritable vc = new VariantContextWritable();
+  private final Map<String, Integer> contigDict = new HashMap<String, Integer>();
+  private BCF2Codec codec = new BCF2Codec();
+  private PositionalBufferedStream in;
+  private boolean isBGZF;
+  private BlockCompressedInputStream bci;
+
+  // If isBGZF, length refers only to the distance of the last BGZF block from
+  // the first.
+  private long fileStart, length;
+
+  @Override
+  public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException {
+    isBGZF = spl instanceof FileVirtualSplit;
+    if (isBGZF) {
+      final FileVirtualSplit split = (FileVirtualSplit) spl;
+
+      final Path file = split.getPath();
+      final FileSystem fs = file.getFileSystem(ctx.getConfiguration());
+
+      final FSDataInputStream inFile = fs.open(file);
+
+      bci = new BlockCompressedInputStream(inFile);
+      in = new PositionalBufferedStream(bci);
+      initContigDict();
+
+      inFile.seek(0);
+      bci =
+          new BlockCompressedInputStream(
+              new WrapSeekable<FSDataInputStream>(inFile, fs.getFileStatus(file).getLen(), file));
+
+      final long virtualStart = split.getStartVirtualOffset(),
+          virtualEnd = split.getEndVirtualOffset();
+
+      this.fileStart = virtualStart >>> 16;
+      this.length = (virtualEnd >>> 16) - fileStart;
+
+      bci.seek(virtualStart);
+
+      // Since PositionalBufferedStream does its own buffering, we have to
+      // prevent it from going too far by using a BGZFLimitingStream. It
+      // also allows nextKeyValue() to simply check for EOF instead of
+      // looking at virtualEnd.
+      in = new PositionalBufferedStream(new BGZFLimitingStream(bci, virtualEnd));
+    } else {
+      final FileSplit split = (FileSplit) spl;
+
+      this.fileStart = split.getStart();
+      this.length = split.getLength();
+
+      final Path file = split.getPath();
+
+      in = new PositionalBufferedStream(file.getFileSystem(ctx.getConfiguration()).open(file));
+
+      initContigDict();
+
+      IOUtils.skipFully(in, fileStart - in.getPosition());
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    in.close();
+  }
+
+  private void initContigDict() {
+    final VCFHeader header = (VCFHeader) codec.readHeader(in).getHeaderValue();
+
+    contigDict.clear();
+    int i = 0;
+    for (final VCFContigHeaderLine contig : header.getContigLines()) {
+      contigDict.put(contig.getID(), i++);
+    }
+  }
+
+  /** For compressed BCF, unless the end has been reached, this is quite inaccurate. */
+  @Override
+  public float getProgress() {
+    if (length == 0) {
+      return 1;
+    }
+
+    if (!isBGZF) {
+      return (float) (in.getPosition() - fileStart) / length;
+    }
+
+    try {
+      if (in.peek() == -1) {
+        return 1;
+      }
+    } catch (IOException e) {
+      return 1;
+    }
+
+    // Add 1 to the denominator to make sure that we never report 1 here.
+    return (float) ((bci.getFilePointer() >>> 16) - fileStart) / (length + 1);
+  }
+
+  @Override
+  public LongWritable getCurrentKey() {
+    return key;
+  }
+
+  @Override
+  public VariantContextWritable getCurrentValue() {
+    return vc;
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException {
+    if (in.peek() == -1) {
+      return false;
+    }
+
+    if (!isBGZF && in.getPosition() >= fileStart + length) {
+      return false;
+    }
+
+    final VariantContext v = codec.decode(in);
+
+    Integer chromIdx = contigDict.get(v.getContig());
+    if (chromIdx == null) {
+      chromIdx = (int) MurmurHash3.murmurhash3(v.getContig(), 0);
+    }
+
+    key.set((long) chromIdx << 32 | (long) (v.getStart() - 1));
+    vc.set(v);
+    return true;
+  }
 }
 
 class BGZFLimitingStream extends InputStream {
-	private final BlockCompressedInputStream bgzf;
-	private final long virtEnd;
-
-	public BGZFLimitingStream(
-			BlockCompressedInputStream stream, long virtualEnd)
-	{
-		bgzf = stream;
-		virtEnd = virtualEnd;
-	}
-
-	@Override public void close() throws IOException { bgzf.close(); }
-
-	private byte[] readBuf = new byte[1];
-	@Override public int read() throws IOException {
-		switch (read(readBuf)) {
-			case  1: return readBuf[0];
-			case -1: return -1;
-			default: assert false; return -1;
-		}
-	}
-
-	@Override public int read(byte[] buf, int off, int len) throws IOException {
-
-		int totalRead = 0;
-		long virt;
-
-		final int lastLen = (int)virtEnd & 0xffff;
-
-		while ((virt = bgzf.getFilePointer()) >>> 16 != virtEnd >>> 16) {
-			// We're not in the last BGZF block yet. Unfortunately
-			// BlockCompressedInputStream doesn't expose the length of the current
-			// block, so we can't simply (possibly repeatedly) read the current
-			// block to the end. Instead, we read at most virtEnd & 0xffff at a
-			// time, which ensures that we can't overshoot virtEnd even if the
-			// next block starts immediately.
-			final int r = bgzf.read(buf, off, Math.min(len, lastLen));
-			if (r == -1)
-				return totalRead == 0 ? -1 : totalRead;
-
-			totalRead += r;
-			len -= r;
-			if (len == 0)
-				return totalRead;
-			off += r;
-		}
-
-		// We're in the last BGZF block: read only up to lastLen.
-		len = Math.min(len, ((int)virt & 0xffff) - lastLen);
-		while (len > 0) {
-			final int r = bgzf.read(buf, off, len);
-			if (r == -1)
-				return totalRead == 0 ? -1 : totalRead;
-
-			totalRead += r;
-			len -= r;
-			off += r;
-		}
-		return totalRead == 0 ? -1 : totalRead;
-	}
+
+  private final BlockCompressedInputStream bgzf;
+  private final long virtEnd;
+  private byte[] readBuf = new byte[1];
+
+  public BGZFLimitingStream(BlockCompressedInputStream stream, long virtualEnd) {
+    bgzf = stream;
+    virtEnd = virtualEnd;
+  }
+
+  @Override
+  public void close() throws IOException {
+    bgzf.close();
+  }
+
+  @Override
+  public int read() throws IOException {
+    switch (read(readBuf)) {
+      case 1:
+        return readBuf[0];
+      case -1:
+        return -1;
+      default:
+        assert false;
+        return -1;
+    }
+  }
+
+  @Override
+  public int read(byte[] buf, int off, int len) throws IOException {
+
+    int totalRead = 0;
+    long virt;
+
+    final int lastLen = (int) virtEnd & 0xffff;
+
+    while ((virt = bgzf.getFilePointer()) >>> 16 != virtEnd >>> 16) {
+      // We're not in the last BGZF block yet. Unfortunately
+      // BlockCompressedInputStream doesn't expose the length of the current
+      // block, so we can't simply (possibly repeatedly) read the current
+      // block to the end. Instead, we read at most virtEnd & 0xffff at a
+      // time, which ensures that we can't overshoot virtEnd even if the
+      // next block starts immediately.
+      final int r = bgzf.read(buf, off, Math.min(len, lastLen));
+      if (r == -1) {
+        return totalRead == 0 ? -1 : totalRead;
+      }
+
+      totalRead += r;
+      len -= r;
+      if (len == 0) {
+        return totalRead;
+      }
+      off += r;
+    }
+
+    // We're in the last BGZF block: read only up to lastLen.
+    len = Math.min(len, ((int) virt & 0xffff) - lastLen);
+    while (len > 0) {
+      final int r = bgzf.read(buf, off, len);
+      if (r == -1) {
+        return totalRead == 0 ? -1 : totalRead;
+      }
+
+      totalRead += r;
+      len -= r;
+      off += r;
+    }
+    return totalRead == 0 ? -1 : totalRead;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java
index 72733ee..0ec2664 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java
@@ -22,119 +22,106 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.FilterOutputStream;
-import java.io.IOException;
-import java.io.OutputStream;
-
 import htsjdk.samtools.util.BlockCompressedOutputStream;
-
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapreduce.RecordWriter;
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import htsjdk.variant.variantcontext.GenotypesContext;
 import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.variantcontext.writer.Options;
 import htsjdk.variant.variantcontext.writer.VariantContextWriter;
 import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
 import htsjdk.variant.vcf.VCFHeader;
-
+import java.io.FilterOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapreduce.RecordWriter;
+import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.seqdoop.hadoop_bam.util.VCFHeaderReader;
 import org.seqdoop.hadoop_bam.util.WrapSeekable;
 
-/** A base {@link RecordWriter} for compressed BCF.
+/**
+ * A base {@link RecordWriter} for compressed BCF.
  *
- * <p>Handles the output stream, writing the header if requested, and provides
- * the {@link #writeRecord} function for subclasses.</p>
+ * <p>Handles the output stream, writing the header if requested, and provides the {@link
+ * #writeRecord} function for subclasses.
  */
-public abstract class BCFRecordWriter<K>
-	extends RecordWriter<K,VariantContextWritable>
-{
-	private VariantContextWriter writer;
-	private LazyVCFGenotypesContext.HeaderDataCache vcfHeaderDataCache =
-		new LazyVCFGenotypesContext.HeaderDataCache();
-	private LazyBCFGenotypesContext.HeaderDataCache bcfHeaderDataCache =
-		new LazyBCFGenotypesContext.HeaderDataCache();
-
-	/** A VCF header is read from the input Path, which should refer to a VCF or
-	 * BCF file.
-	 */
-	public BCFRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		final WrapSeekable in =
-			WrapSeekable.openPath(ctx.getConfiguration(), input);
-		final VCFHeader header = VCFHeaderReader.readHeaderFrom(in);
-		in.close();
-
-		init(output, header, writeHeader, ctx);
-	}
-	public BCFRecordWriter(
-			Path output, VCFHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader);
-	}
-	public BCFRecordWriter(
-			OutputStream output, VCFHeader header, boolean writeHeader)
-		throws IOException
-	{
-		init(output, header, writeHeader);
-	}
-
-	// Working around not being able to call a constructor other than as the
-	// first statement...
-	private void init(
-			Path output, VCFHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader);
-	}
-	private void init(
-			OutputStream output, VCFHeader header, final boolean writeHeader)
-		throws IOException
-	{
-		final BCFStoppableOutputStream stopOut =
-			new BCFStoppableOutputStream(!writeHeader, output);
-
-		writer = new VariantContextWriterBuilder().clearOptions()
-				.setOption(Options.FORCE_BCF)
-				.setOutputBCFStream(stopOut).build();
-
-		writer.writeHeader(header);
-		stopOut.stopped = false;
-
-		setInputHeader(header);
-	}
-
-	@Override public void close(TaskAttemptContext ctx) throws IOException {
-		writer.close();
-	}
-
-	/** Used for lazy decoding of genotype data. Of course, each input record
-	 * may have a different header, but we currently only support one header
-	 * here... This is in part due to the fact that it's not clear what the best
-	 * solution is. */
-	public void setInputHeader(VCFHeader header) {
-		vcfHeaderDataCache.setHeader(header);
-		bcfHeaderDataCache.setHeader(header);
-	}
-
-	protected void writeRecord(VariantContext vc) {
-		final GenotypesContext gc = vc.getGenotypes();
-		if (gc instanceof LazyParsingGenotypesContext)
-			((LazyParsingGenotypesContext)gc).getParser().setHeaderDataCache(
-				gc instanceof LazyVCFGenotypesContext ? vcfHeaderDataCache
-				                                      : bcfHeaderDataCache);
-
-		writer.add(vc);
-	}
+public abstract class BCFRecordWriter<K> extends RecordWriter<K, VariantContextWritable> {
+
+  private VariantContextWriter writer;
+  private LazyVCFGenotypesContext.HeaderDataCache vcfHeaderDataCache =
+      new LazyVCFGenotypesContext.HeaderDataCache();
+  private LazyBCFGenotypesContext.HeaderDataCache bcfHeaderDataCache =
+      new LazyBCFGenotypesContext.HeaderDataCache();
+
+  /** A VCF header is read from the input Path, which should refer to a VCF or BCF file. */
+  public BCFRecordWriter(Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    final WrapSeekable in = WrapSeekable.openPath(ctx.getConfiguration(), input);
+    final VCFHeader header = VCFHeaderReader.readHeaderFrom(in);
+    in.close();
+
+    init(output, header, writeHeader, ctx);
+  }
+
+  public BCFRecordWriter(Path output, VCFHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader);
+  }
+
+  public BCFRecordWriter(OutputStream output, VCFHeader header, boolean writeHeader)
+      throws IOException {
+    init(output, header, writeHeader);
+  }
+
+  // Working around not being able to call a constructor other than as the
+  // first statement...
+  private void init(Path output, VCFHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader);
+  }
+
+  private void init(OutputStream output, VCFHeader header, final boolean writeHeader)
+      throws IOException {
+    final BCFStoppableOutputStream stopOut = new BCFStoppableOutputStream(!writeHeader, output);
+
+    writer =
+        new VariantContextWriterBuilder()
+            .clearOptions()
+            .setOption(Options.FORCE_BCF)
+            .setOutputBCFStream(stopOut)
+            .build();
+
+    writer.writeHeader(header);
+    stopOut.stopped = false;
+
+    setInputHeader(header);
+  }
+
+  @Override
+  public void close(TaskAttemptContext ctx) throws IOException {
+    writer.close();
+  }
+
+  /**
+   * Used for lazy decoding of genotype data. Of course, each input record may have a different
+   * header, but we currently only support one header here... This is in part due to the fact that
+   * it's not clear what the best solution is.
+   */
+  public void setInputHeader(VCFHeader header) {
+    vcfHeaderDataCache.setHeader(header);
+    bcfHeaderDataCache.setHeader(header);
+  }
+
+  protected void writeRecord(VariantContext vc) {
+    final GenotypesContext gc = vc.getGenotypes();
+    if (gc instanceof LazyParsingGenotypesContext) {
+      ((LazyParsingGenotypesContext) gc)
+          .getParser()
+          .setHeaderDataCache(
+              gc instanceof LazyVCFGenotypesContext ? vcfHeaderDataCache : bcfHeaderDataCache);
+    }
+
+    writer.add(vc);
+  }
 }
 
 // We must always call writer.writeHeader() because the writer requires
@@ -148,31 +135,44 @@ protected void writeRecord(VariantContext vc) {
 //
 // In addition we do BGZF compression here, to simplify things.
 final class BCFStoppableOutputStream extends FilterOutputStream {
-	public boolean stopped;
-	private final OutputStream origOut;
-
-	public BCFStoppableOutputStream(boolean startStopped, OutputStream out) {
-		super(new BlockCompressedOutputStream(out, null));
-		origOut = out;
-		stopped = startStopped;
-	}
-
-	@Override public void write(int b) throws IOException {
-		if (!stopped) super.write(b);
-	}
-	@Override public void write(byte[] b) throws IOException {
-		if (!stopped) super.write(b);
-	}
-	@Override public void write(byte[] b, int off, int len) throws IOException {
-		if (!stopped) super.write(b, off, len);
-	}
-
-	@Override public void close() throws IOException {
-		// Don't close the BlockCompressedOutputStream, as we don't want
-		// the BGZF terminator.
-		this.out.flush();
-
-		// Instead, close the lower-level output stream directly.
-		origOut.close();
-	}
+
+  private final OutputStream origOut;
+  public boolean stopped;
+
+  public BCFStoppableOutputStream(boolean startStopped, OutputStream out) {
+    super(new BlockCompressedOutputStream(out, null));
+    origOut = out;
+    stopped = startStopped;
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    if (!stopped) {
+      super.write(b);
+    }
+  }
+
+  @Override
+  public void write(byte[] b) throws IOException {
+    if (!stopped) {
+      super.write(b);
+    }
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    if (!stopped) {
+      super.write(b, off, len);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    // Don't close the BlockCompressedOutputStream, as we don't want
+    // the BGZF terminator.
+    this.out.flush();
+
+    // Instead, close the lower-level output stream directly.
+    origOut.close();
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BCFSplitGuesser.java b/src/main/java/org/seqdoop/hadoop_bam/BCFSplitGuesser.java
index 70cb533..5071da8 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BCFSplitGuesser.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BCFSplitGuesser.java
@@ -22,18 +22,8 @@
 
 package org.seqdoop.hadoop_bam;
 
-import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
-import java.io.BufferedInputStream;
-import java.io.InputStream;
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IOUtils;
-import org.apache.hadoop.util.GenericOptionsParser;
-
 import htsjdk.samtools.FileTruncatedException;
+import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
 import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.RuntimeEOFException;
@@ -41,402 +31,421 @@
 import htsjdk.tribble.readers.PositionalBufferedStream;
 import htsjdk.variant.bcf2.BCF2Codec;
 import htsjdk.variant.vcf.VCFHeader;
-
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.util.GenericOptionsParser;
 import org.seqdoop.hadoop_bam.util.WrapSeekable;
 
-/** A class for heuristically finding BCF record positions inside an area of
- * a BCF file. Handles both compressed and uncompressed BCF.
+/**
+ * A class for heuristically finding BCF record positions inside an area of a BCF file. Handles both
+ * compressed and uncompressed BCF.
  */
 public class BCFSplitGuesser extends BaseSplitGuesser {
-	// cin is the compressed input: a BlockCompressedInputStream for compressed
-	// BCF, otherwise equal to in. Unfortunately the closest common type is then
-	// InputStream, which is why we have the cinSeek() method.
-	private       InputStream    cin;
-	private       SeekableStream inFile;
-	private final boolean        bgzf;
-	private final BCF2Codec      bcfCodec = new BCF2Codec();
-	private final int            contigDictionaryLength, genotypeSampleCount;
-
-	// The amount of data we verify for uncompressed BCF.
-	private final static int UNCOMPRESSED_BYTES_NEEDED = 0x80000;
-
-	// We want to go through this many BGZF blocks fully, checking that they
-	// contain valid BCF records, when guessing a BCF record position.
-	private final static byte BGZF_BLOCKS_NEEDED_FOR_GUESS = 2;
-
-	// Since the max size of a BGZF block is 0xffff (64K), and we might be just
-	// one byte off from the start of the previous one, we need 0xfffe bytes for
-	// the start, and then 0xffff times the number of blocks we want to go
-	// through.
-	private final static int BGZF_MAX_BYTES_READ =
-		BGZF_BLOCKS_NEEDED_FOR_GUESS * 0xffff + 0xfffe;
-
-	// This is probably too conservative.
-	private final static int SHORTEST_POSSIBLE_BCF_RECORD = 4*8 + 1;
-
-	/** The stream must point to a valid BCF file, because the header is read
-	 * from it.
-	 */
-	public BCFSplitGuesser(SeekableStream ss) throws IOException {
-		this(ss, ss);
-	}
-
-	public BCFSplitGuesser(SeekableStream ss, InputStream headerStream)
-		throws IOException
-	{
-		inFile = ss;
-
-		InputStream bInFile = new BufferedInputStream(inFile);
-
-		bgzf = BlockCompressedInputStream.isValidFile(bInFile);
-		if (bgzf)
-			bInFile = new BlockCompressedInputStream(bInFile);
-
-		// Excess buffering here but it can't be helped that BCF2Codec only takes
-		// PositionalBufferedStream.
-		final VCFHeader header =
-			(VCFHeader)bcfCodec.readHeader(
-				new PositionalBufferedStream(bInFile)).getHeaderValue();
-
-		contigDictionaryLength = header.getContigLines().size();
-		genotypeSampleCount    = header.getNGenotypeSamples();
-	}
-
-	public boolean isBGZF() { return bgzf; }
-
-	private void cinSeek(long virt) throws IOException {
-		if (bgzf)
-			((BlockCompressedInputStream)cin).seek(virt);
-		else
-			((SeekableStream)cin).seek(virt);
-	}
-
-	/** Finds a (virtual in the case of BGZF) BCF record position in the
-	 * physical position range [beg,end). Returns end if no BCF record was
-	 * found.
-	 */
-	public long guessNextBCFRecordStart(long beg, long end)
-		throws IOException
-	{
-		// Buffer what we need to go through.
-
-		byte[] arr = new byte[
-			bgzf ? BGZF_MAX_BYTES_READ : UNCOMPRESSED_BYTES_NEEDED];
-
-		this.inFile.seek(beg);
-		int totalRead = 0;
-		for (int left = Math.min((int)(end - beg), arr.length); left > 0;) {
-			final int r = inFile.read(arr, totalRead, left);
-			if (r < 0)
-				break;
-			totalRead += r;
-			left -= r;
-		}
-		arr = Arrays.copyOf(arr, totalRead);
-
-		this.in = new ByteArraySeekableStream(arr);
-
-		final int firstBGZFEnd;
-
-		if (this.bgzf) {
-			firstBGZFEnd = Math.min((int)(end - beg), 0xffff);
-
-			BlockCompressedInputStream bgzfStream =
-				new BlockCompressedInputStream(this.in);
-			bgzfStream.setCheckCrcs(true);
-			this.cin = bgzfStream;
-		} else {
-			this.cin = this.in;
-
-			firstBGZFEnd = 0; // Actually unused
-		}
-
-		// cp: Compressed Position, indexes the entire BGZF input. If
-		// we have uncompressed BCF, this loop does nothing.
-		for (int cp = 0;; ++cp) {
-
-			final int  cp0;
-			final long cp0Virt;
-			final int  blockLen;
-
-			if (this.bgzf) {
-				final PosSize psz = guessNextBGZFPos(cp, firstBGZFEnd);
-				if (psz == null)
-					break;
-
-				cp0 = cp = psz.pos;
-				cp0Virt = (long)cp0 << 16;
-				try {
-					cinSeek(cp0Virt);
-
-				// This has to catch Throwable, because it's possible to get an
-				// OutOfMemoryError due to an overly large size.
-				} catch (Throwable e) {
-					// Guessed BGZF position incorrectly: try the next guess.
-					continue;
-				}
-				blockLen = psz.size;
-			} else {
-				cp0 = 0; // Actually unused
-				cp0Virt = 0;
-				blockLen = Math.max(arr.length, UNCOMPRESSED_BYTES_NEEDED);
-			}
-
-			// up: Uncompressed Position, indexes the data inside the BGZF block.
-			for (int up = 0;; ++up) {
-				final int up0 = up = guessNextBCFPos(cp0Virt, up, blockLen);
-
-				if (up0 < 0) {
-					// No BCF records found in the BGZF block: try the next BGZF
-					// block.
-					break;
-				}
-
-				// Verification time.
-
-				cinSeek(cp0Virt | up0);
-
-				final PositionalBufferedStream pbIn =
-					new PositionalBufferedStream(cin);
-
-				boolean decodedAny = false;
-				try {
-					if (bgzf) {
-						byte b = 0;
-						int prevCP = cp0;
-						while (b < BGZF_BLOCKS_NEEDED_FOR_GUESS && pbIn.peek() != -1)
-						{
-							bcfCodec.decode(pbIn);
-							decodedAny = true;
-
-							final int cp2 = (int)
-								(((BlockCompressedInputStream)cin).getFilePointer()
-								 >>> 16);
-							if (cp2 != prevCP) {
-								// The compressed position changed so we must be in a
-								// new block.
-								assert cp2 > prevCP;
-								cp = cp2;
-								++b;
-							}
-						}
-
-						// Running out of records to verify is fine as long as we
-						// verified at least something. It should only happen if we
-						// couldn't fill the array.
-						if (b < BGZF_BLOCKS_NEEDED_FOR_GUESS) {
-							assert arr.length < BGZF_MAX_BYTES_READ;
-							if (!decodedAny)
-								continue;
-						}
-					} else {
-						while (pbIn.getPosition() - up0 < UNCOMPRESSED_BYTES_NEEDED
-						    && pbIn.peek() != -1)
-						{
-							bcfCodec.decode(pbIn);
-							decodedAny = true;
-						}
-
-						// As in the BGZF case.
-						if (pbIn.getPosition() - up0 < UNCOMPRESSED_BYTES_NEEDED) {
-							assert arr.length < UNCOMPRESSED_BYTES_NEEDED;
-							if (!decodedAny)
-								continue;
-						}
-					}
-
-				} catch (FileTruncatedException e) { continue; }
-				  catch (OutOfMemoryError       e) { continue; }
-				  catch (RuntimeEOFException    e) { continue; }
-				  catch (TribbleException       e) {
-					// This is the way in which BCF2Codec reports unexpected EOF.
-					// Unfortunately, it also reports every other kind of error with
-					// the same exception. It even wraps IOException in
-					// TribbleException!
-					//
-					// We need to catch EOF in the middle of a record, which can
-					// happen legitimately if the [beg,end) range is too small and
-					// cuts off a record. First, require decodedAny, and then, assume
-					// that this exception means EOF if the stream has hit EOF.
-					if (!(decodedAny && pbIn.peek() == -1))
-						continue;
-				}
-
-				return this.bgzf ? beg+cp0 << 16 | up0 : beg + up0;
-			}
-			if (!this.bgzf)
-				break;
-		}
-		return end;
-	}
-
-	private int guessNextBCFPos(long cpVirt, int up, int cSize) {
-		try {
-			for (; up + SHORTEST_POSSIBLE_BCF_RECORD < cSize; ++up) {
-				// Note! The BCF2 spec has a table listing the fields and their
-				// types, but QUAL is misplaced there! It should be before
-				// n_allele_info, not after n_fmt_sample! The "Putting it all
-				// together" section shows the correct field order.
-
-				// Check that [0] and [4] are big enough to make sense.
-
-				cinSeek(cpVirt | up);
-				IOUtils.readFully(cin, buf.array(), 0, 8);
-
-				final long sharedLen = getUInt(0);
-				final long  indivLen = getUInt(4);
-				if (sharedLen + indivLen < (long)SHORTEST_POSSIBLE_BCF_RECORD)
-					continue;
-
-				// Check that [8] looks like a valid CHROM field and that [12] is a
-				// 0-based leftmost coordinate.
-
-				cinSeek(cpVirt | up+8);
-				IOUtils.readFully(cin, buf.array(), 0, 8);
-
-				final int chrom = buf.getInt(0);
-				final int pos   = buf.getInt(4);
-				if (chrom < 0 || chrom >= contigDictionaryLength || pos < 0)
-					continue;
-
-				// [24] and [26] are lengths and should thus be nonnegative.
-
-				cinSeek(cpVirt | up+24);
-				IOUtils.readFully(cin, buf.array(), 0, 4);
-				final int alleleInfo = buf.getInt(0);
-
-				final int alleleCount = alleleInfo >> 16;
-				final int infoCount   = alleleInfo & 0xffff;
-				if (alleleCount < 0) // don't check infoCount since it is always nonnegative
-					continue;
-
-				// Make sure that [28] matches to the same value in the header.
-
-				cinSeek(cpVirt | up+28);
-				IOUtils.readFully(cin, buf.array(), 0, 1);
-
-				final short nSamples = getUByte(0);
-				if ((int)nSamples != genotypeSampleCount)
-					continue;
-
-				// Check that the ID string has a sensible type encoding. That is,
-				// it should claim to be a character string: [32] & 0x0f == 0x07.
-				// Further, if it has length 15 or more, i.e. [32] & 0xf0 == 0xf0,
-				// then it should be followed by an integer, i.e. [33] & 0x0f
-				// should be in the range [1, 3], and the value of that integer
-				// should be in the range [15, [0] - x) where x is the guaranteed
-				// number of bytes in the first part of this record (before the
-				// genotype block).
-
-				cinSeek(cpVirt | up+32);
-				IOUtils.readFully(cin, buf.array(), 0, 6);
-
-				final byte idType = buf.get(0);
-				if ((idType & 0x0f) != 0x07)
-					continue;
-
-				if ((idType & 0xf0) == 0xf0) {
-					final byte idLenType = buf.get(1);
-					final long idLen;
-					switch (idLenType & 0x0f) {
-					case 0x01: idLen = getUByte (2); break;
-					case 0x02: idLen = getUShort(2); break;
-					case 0x03: idLen = getUInt  (2); break;
-					default: continue;
-					}
-
-					if (idLen < 15
-					 || idLen > sharedLen - (4*8 + alleleCount + infoCount*2))
-						continue;
-				}
-
-				// Good enough.
-				return up;
-			}
-		} catch (IOException e) {
-			// fall through
-		}
-		return -1;
-	}
-	private long getUInt(final int idx) {
-		return (long)buf.getInt(idx) & 0xffffffff;
-	}
-	private short getUByte(final int idx) {
-		return (short)((short)buf.get(idx) & 0xff);
-	}
-
-	public static void main(String[] args) throws IOException {
-		final GenericOptionsParser parser;
-		try {
-			parser = new GenericOptionsParser(args);
-
-		// This should be IOException but Hadoop 0.20.2 doesn't throw it...
-		} catch (Exception e) {
-			System.err.printf("Error in Hadoop arguments: %s\n", e.getMessage());
-			System.exit(1);
-			return;
-		}
-
-		args = parser.getRemainingArgs();
-                final Configuration conf = parser.getConfiguration();
-
-		long beg = 0;
-
-		if (args.length < 2 || args.length > 3) {
-			System.err.println(
-				"Usage: BCFSplitGuesser path-or-uri header-path-or-uri [beg]");
-			System.exit(2);
-		}
-
-		try {
-			if (args.length > 2) beg = Long.decode(args[2]);
-		} catch (NumberFormatException e) {
-			System.err.println("Invalid beg offset.");
-			if (e.getMessage() != null)
-				System.err.println(e.getMessage());
-			System.exit(2);
-		}
-
-		SeekableStream ss = WrapSeekable.openPath(conf, new Path(args[0]));
-		SeekableStream hs = WrapSeekable.openPath(conf, new Path(args[1]));
-
-		final BCFSplitGuesser guesser = new BCFSplitGuesser(ss, hs);
-		final long end;
-
-		if (guesser.isBGZF()) {
-			end = beg + BGZF_MAX_BYTES_READ;
-
-			System.out.printf(
-				"This looks like a BGZF-compressed BCF file.\n"+
-				"Will look for a BGZF block within: [%1$#x,%2$#x) = [%1$d,%2$d)\n"+
-				"Will then verify BCF data within:  [%1$#x,%3$#x) = [%1$d,%3$d)\n",
-				beg, beg + 0xffff, end);
-		} else {
-			end = beg + UNCOMPRESSED_BYTES_NEEDED;
-
-			System.out.printf(
-				"This looks like an uncompressed BCF file.\n"+
-				"Will look for a BCF record within: [%1$#x,%2$#x) = [%1$d,%2$d)\n"+
-				"And then will verify all following data in that range.\n",
-				beg, end);
-		}
-
-		final long g = guesser.guessNextBCFRecordStart(beg, end);
-
-		ss.close();
-
-		if (g == end) {
-			System.out.println(
-				"Didn't find any acceptable BCF record in any BGZF block.");
-			System.exit(1);
-		}
-
-		if (guesser.isBGZF())
-			System.out.printf(
-				"Accepted BGZF block at offset %1$#x (%1$d).\n"+
-				"Accepted BCF record at offset %2$#x (%2$d) therein.\n",
-				g >> 16, g & 0xffff);
-		else
-			System.out.printf("Accepted BCF record at offset %1$#x (%1$d).\n", g);
-	}
+
+  // The amount of data we verify for uncompressed BCF.
+  private static final int UNCOMPRESSED_BYTES_NEEDED = 0x80000;
+  // We want to go through this many BGZF blocks fully, checking that they
+  // contain valid BCF records, when guessing a BCF record position.
+  private static final byte BGZF_BLOCKS_NEEDED_FOR_GUESS = 2;
+  // Since the max size of a BGZF block is 0xffff (64K), and we might be just
+  // one byte off from the start of the previous one, we need 0xfffe bytes for
+  // the start, and then 0xffff times the number of blocks we want to go
+  // through.
+  private static final int BGZF_MAX_BYTES_READ = BGZF_BLOCKS_NEEDED_FOR_GUESS * 0xffff + 0xfffe;
+  // This is probably too conservative.
+  private static final int SHORTEST_POSSIBLE_BCF_RECORD = 4 * 8 + 1;
+  private final boolean bgzf;
+  private final BCF2Codec bcfCodec = new BCF2Codec();
+  private final int contigDictionaryLength, genotypeSampleCount;
+  // cin is the compressed input: a BlockCompressedInputStream for compressed
+  // BCF, otherwise equal to in. Unfortunately the closest common type is then
+  // InputStream, which is why we have the cinSeek() method.
+  private InputStream cin;
+  private SeekableStream inFile;
+
+  /** The stream must point to a valid BCF file, because the header is read from it. */
+  public BCFSplitGuesser(SeekableStream ss) throws IOException {
+    this(ss, ss);
+  }
+
+  public BCFSplitGuesser(SeekableStream ss, InputStream headerStream) throws IOException {
+    inFile = ss;
+
+    InputStream bInFile = new BufferedInputStream(inFile);
+
+    bgzf = BlockCompressedInputStream.isValidFile(bInFile);
+    if (bgzf) {
+      bInFile = new BlockCompressedInputStream(bInFile);
+    }
+
+    // Excess buffering here but it can't be helped that BCF2Codec only takes
+    // PositionalBufferedStream.
+    final VCFHeader header =
+        (VCFHeader) bcfCodec.readHeader(new PositionalBufferedStream(bInFile)).getHeaderValue();
+
+    contigDictionaryLength = header.getContigLines().size();
+    genotypeSampleCount = header.getNGenotypeSamples();
+  }
+
+  public static void main(String[] args) throws IOException {
+    final GenericOptionsParser parser;
+    try {
+      parser = new GenericOptionsParser(args);
+
+      // This should be IOException but Hadoop 0.20.2 doesn't throw it...
+    } catch (Exception e) {
+      System.err.printf("Error in Hadoop arguments: %s\n", e.getMessage());
+      System.exit(1);
+      return;
+    }
+
+    args = parser.getRemainingArgs();
+    final Configuration conf = parser.getConfiguration();
+
+    long beg = 0;
+
+    if (args.length < 2 || args.length > 3) {
+      System.err.println("Usage: BCFSplitGuesser path-or-uri header-path-or-uri [beg]");
+      System.exit(2);
+    }
+
+    try {
+      if (args.length > 2) {
+        beg = Long.decode(args[2]);
+      }
+    } catch (NumberFormatException e) {
+      System.err.println("Invalid beg offset.");
+      if (e.getMessage() != null) {
+        System.err.println(e.getMessage());
+      }
+      System.exit(2);
+    }
+
+    SeekableStream ss = WrapSeekable.openPath(conf, new Path(args[0]));
+    SeekableStream hs = WrapSeekable.openPath(conf, new Path(args[1]));
+
+    final BCFSplitGuesser guesser = new BCFSplitGuesser(ss, hs);
+    final long end;
+
+    if (guesser.isBGZF()) {
+      end = beg + BGZF_MAX_BYTES_READ;
+
+      System.out.printf(
+          "This looks like a BGZF-compressed BCF file.\n"
+              + "Will look for a BGZF block within: [%1$#x,%2$#x) = [%1$d,%2$d)\n"
+              + "Will then verify BCF data within:  [%1$#x,%3$#x) = [%1$d,%3$d)\n",
+          beg, beg + 0xffff, end);
+    } else {
+      end = beg + UNCOMPRESSED_BYTES_NEEDED;
+
+      System.out.printf(
+          "This looks like an uncompressed BCF file.\n"
+              + "Will look for a BCF record within: [%1$#x,%2$#x) = [%1$d,%2$d)\n"
+              + "And then will verify all following data in that range.\n",
+          beg, end);
+    }
+
+    final long g = guesser.guessNextBCFRecordStart(beg, end);
+
+    ss.close();
+
+    if (g == end) {
+      System.out.println("Didn't find any acceptable BCF record in any BGZF block.");
+      System.exit(1);
+    }
+
+    if (guesser.isBGZF()) {
+      System.out.printf(
+          "Accepted BGZF block at offset %1$#x (%1$d).\n"
+              + "Accepted BCF record at offset %2$#x (%2$d) therein.\n",
+          g >> 16, g & 0xffff);
+    } else {
+      System.out.printf("Accepted BCF record at offset %1$#x (%1$d).\n", g);
+    }
+  }
+
+  public boolean isBGZF() {
+    return bgzf;
+  }
+
+  private void cinSeek(long virt) throws IOException {
+    if (bgzf) {
+      ((BlockCompressedInputStream) cin).seek(virt);
+    } else {
+      ((SeekableStream) cin).seek(virt);
+    }
+  }
+
+  /**
+   * Finds a (virtual in the case of BGZF) BCF record position in the physical position range
+   * [beg,end). Returns end if no BCF record was found.
+   */
+  public long guessNextBCFRecordStart(long beg, long end) throws IOException {
+    // Buffer what we need to go through.
+
+    byte[] arr = new byte[bgzf ? BGZF_MAX_BYTES_READ : UNCOMPRESSED_BYTES_NEEDED];
+
+    this.inFile.seek(beg);
+    int totalRead = 0;
+    for (int left = Math.min((int) (end - beg), arr.length); left > 0; ) {
+      final int r = inFile.read(arr, totalRead, left);
+      if (r < 0) {
+        break;
+      }
+      totalRead += r;
+      left -= r;
+    }
+    arr = Arrays.copyOf(arr, totalRead);
+
+    this.in = new ByteArraySeekableStream(arr);
+
+    final int firstBGZFEnd;
+
+    if (this.bgzf) {
+      firstBGZFEnd = Math.min((int) (end - beg), 0xffff);
+
+      BlockCompressedInputStream bgzfStream = new BlockCompressedInputStream(this.in);
+      bgzfStream.setCheckCrcs(true);
+      this.cin = bgzfStream;
+    } else {
+      this.cin = this.in;
+
+      firstBGZFEnd = 0; // Actually unused
+    }
+
+    // cp: Compressed Position, indexes the entire BGZF input. If
+    // we have uncompressed BCF, this loop does nothing.
+    for (int cp = 0; ; ++cp) {
+
+      final int cp0;
+      final long cp0Virt;
+      final int blockLen;
+
+      if (this.bgzf) {
+        final PosSize psz = guessNextBGZFPos(cp, firstBGZFEnd);
+        if (psz == null) {
+          break;
+        }
+
+        cp0 = cp = psz.pos;
+        cp0Virt = (long) cp0 << 16;
+        try {
+          cinSeek(cp0Virt);
+
+          // This has to catch Throwable, because it's possible to get an
+          // OutOfMemoryError due to an overly large size.
+        } catch (Throwable e) {
+          // Guessed BGZF position incorrectly: try the next guess.
+          continue;
+        }
+        blockLen = psz.size;
+      } else {
+        cp0 = 0; // Actually unused
+        cp0Virt = 0;
+        blockLen = Math.max(arr.length, UNCOMPRESSED_BYTES_NEEDED);
+      }
+
+      // up: Uncompressed Position, indexes the data inside the BGZF block.
+      for (int up = 0; ; ++up) {
+        final int up0 = up = guessNextBCFPos(cp0Virt, up, blockLen);
+
+        if (up0 < 0) {
+          // No BCF records found in the BGZF block: try the next BGZF
+          // block.
+          break;
+        }
+
+        // Verification time.
+
+        cinSeek(cp0Virt | up0);
+
+        final PositionalBufferedStream pbIn = new PositionalBufferedStream(cin);
+
+        boolean decodedAny = false;
+        try {
+          if (bgzf) {
+            byte b = 0;
+            int prevCP = cp0;
+            while (b < BGZF_BLOCKS_NEEDED_FOR_GUESS && pbIn.peek() != -1) {
+              bcfCodec.decode(pbIn);
+              decodedAny = true;
+
+              final int cp2 = (int) (((BlockCompressedInputStream) cin).getFilePointer() >>> 16);
+              if (cp2 != prevCP) {
+                // The compressed position changed so we must be in a
+                // new block.
+                assert cp2 > prevCP;
+                cp = cp2;
+                ++b;
+              }
+            }
+
+            // Running out of records to verify is fine as long as we
+            // verified at least something. It should only happen if we
+            // couldn't fill the array.
+            if (b < BGZF_BLOCKS_NEEDED_FOR_GUESS) {
+              assert arr.length < BGZF_MAX_BYTES_READ;
+              if (!decodedAny) {
+                continue;
+              }
+            }
+          } else {
+            while (pbIn.getPosition() - up0 < UNCOMPRESSED_BYTES_NEEDED && pbIn.peek() != -1) {
+              bcfCodec.decode(pbIn);
+              decodedAny = true;
+            }
+
+            // As in the BGZF case.
+            if (pbIn.getPosition() - up0 < UNCOMPRESSED_BYTES_NEEDED) {
+              assert arr.length < UNCOMPRESSED_BYTES_NEEDED;
+              if (!decodedAny) {
+                continue;
+              }
+            }
+          }
+
+        } catch (FileTruncatedException e) {
+          continue;
+        } catch (OutOfMemoryError e) {
+          continue;
+        } catch (RuntimeEOFException e) {
+          continue;
+        } catch (TribbleException e) {
+          // This is the way in which BCF2Codec reports unexpected EOF.
+          // Unfortunately, it also reports every other kind of error with
+          // the same exception. It even wraps IOException in
+          // TribbleException!
+          //
+          // We need to catch EOF in the middle of a record, which can
+          // happen legitimately if the [beg,end) range is too small and
+          // cuts off a record. First, require decodedAny, and then, assume
+          // that this exception means EOF if the stream has hit EOF.
+          if (!(decodedAny && pbIn.peek() == -1)) {
+            continue;
+          }
+        }
+
+        return this.bgzf ? beg + cp0 << 16 | up0 : beg + up0;
+      }
+      if (!this.bgzf) {
+        break;
+      }
+    }
+    return end;
+  }
+
+  private int guessNextBCFPos(long cpVirt, int up, int cSize) {
+    try {
+      for (; up + SHORTEST_POSSIBLE_BCF_RECORD < cSize; ++up) {
+        // Note! The BCF2 spec has a table listing the fields and their
+        // types, but QUAL is misplaced there! It should be before
+        // n_allele_info, not after n_fmt_sample! The "Putting it all
+        // together" section shows the correct field order.
+
+        // Check that [0] and [4] are big enough to make sense.
+
+        cinSeek(cpVirt | up);
+        IOUtils.readFully(cin, buf.array(), 0, 8);
+
+        final long sharedLen = getUInt(0);
+        final long indivLen = getUInt(4);
+        if (sharedLen + indivLen < (long) SHORTEST_POSSIBLE_BCF_RECORD) {
+          continue;
+        }
+
+        // Check that [8] looks like a valid CHROM field and that [12] is a
+        // 0-based leftmost coordinate.
+
+        cinSeek(cpVirt | up + 8);
+        IOUtils.readFully(cin, buf.array(), 0, 8);
+
+        final int chrom = buf.getInt(0);
+        final int pos = buf.getInt(4);
+        if (chrom < 0 || chrom >= contigDictionaryLength || pos < 0) {
+          continue;
+        }
+
+        // [24] and [26] are lengths and should thus be nonnegative.
+
+        cinSeek(cpVirt | up + 24);
+        IOUtils.readFully(cin, buf.array(), 0, 4);
+        final int alleleInfo = buf.getInt(0);
+
+        final int alleleCount = alleleInfo >> 16;
+        final int infoCount = alleleInfo & 0xffff;
+        if (alleleCount < 0) // don't check infoCount since it is always nonnegative
+        {
+          continue;
+        }
+
+        // Make sure that [28] matches to the same value in the header.
+
+        cinSeek(cpVirt | up + 28);
+        IOUtils.readFully(cin, buf.array(), 0, 1);
+
+        final short nSamples = getUByte(0);
+        if ((int) nSamples != genotypeSampleCount) {
+          continue;
+        }
+
+        // Check that the ID string has a sensible type encoding. That is,
+        // it should claim to be a character string: [32] & 0x0f == 0x07.
+        // Further, if it has length 15 or more, i.e. [32] & 0xf0 == 0xf0,
+        // then it should be followed by an integer, i.e. [33] & 0x0f
+        // should be in the range [1, 3], and the value of that integer
+        // should be in the range [15, [0] - x) where x is the guaranteed
+        // number of bytes in the first part of this record (before the
+        // genotype block).
+
+        cinSeek(cpVirt | up + 32);
+        IOUtils.readFully(cin, buf.array(), 0, 6);
+
+        final byte idType = buf.get(0);
+        if ((idType & 0x0f) != 0x07) {
+          continue;
+        }
+
+        if ((idType & 0xf0) == 0xf0) {
+          final byte idLenType = buf.get(1);
+          final long idLen;
+          switch (idLenType & 0x0f) {
+            case 0x01:
+              idLen = getUByte(2);
+              break;
+            case 0x02:
+              idLen = getUShort(2);
+              break;
+            case 0x03:
+              idLen = getUInt(2);
+              break;
+            default:
+              continue;
+          }
+
+          if (idLen < 15 || idLen > sharedLen - (4 * 8 + alleleCount + infoCount * 2)) {
+            continue;
+          }
+        }
+
+        // Good enough.
+        return up;
+      }
+    } catch (IOException e) {
+      // fall through
+    }
+    return -1;
+  }
+
+  private long getUInt(final int idx) {
+    return (long) buf.getInt(idx) & 0xffffffff;
+  }
+
+  private short getUByte(final int idx) {
+    return (short) ((short) buf.get(idx) & 0xff);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BaseSplitGuesser.java b/src/main/java/org/seqdoop/hadoop_bam/BaseSplitGuesser.java
index 6f96b39..99cad61 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BaseSplitGuesser.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BaseSplitGuesser.java
@@ -8,106 +8,114 @@
 
 class BaseSplitGuesser {
 
-  protected final static int BGZF_MAGIC     = 0x04088b1f;
-  protected final static int BGZF_MAGIC_SUB = 0x00024342;
-  protected final static int BGZF_SUB_SIZE  = 4 + 2;
-
-  protected SeekableStream in;
+  protected static final int BGZF_MAGIC = 0x04088b1f;
+  protected static final int BGZF_MAGIC_SUB = 0x00024342;
+  protected static final int BGZF_SUB_SIZE = 4 + 2;
   protected final ByteBuffer buf;
+  protected SeekableStream in;
 
   public BaseSplitGuesser() {
     buf = ByteBuffer.allocate(8);
     buf.order(ByteOrder.LITTLE_ENDIAN);
   }
 
-  protected static class PosSize {
-    public int pos;
-    public int size;
-    public PosSize(int p, int s) { pos = p; size = s; }
-  }
-
   // Gives the compressed size on the side. Returns null if it doesn't find
   // anything.
   protected PosSize guessNextBGZFPos(int p, int end) {
-    try { for (;;) {
-      for (;;) {
-        in.seek(p);
-        IOUtils.readFully(in, buf.array(), 0, 4);
-        int n = buf.getInt(0);
-
-        if (n == BGZF_MAGIC)
-          break;
-
-        // Skip ahead a bit more than 1 byte if you can.
-        if (n >>> 8 == BGZF_MAGIC << 8 >>> 8)
-          ++p;
-        else if (n >>> 16 == BGZF_MAGIC << 16 >>> 16)
-          p += 2;
-        else
-          p += 3;
-
-        if (p >= end)
-          return null;
-      }
-      // Found what looks like a gzip block header: now get XLEN and
-      // search for the BGZF subfield.
-      final int p0 = p;
-      p += 10;
-      in.seek(p);
-      IOUtils.readFully(in, buf.array(), 0, 2);
-      p += 2;
-      final int xlen   = getUShort(0);
-      final int subEnd = p + xlen;
-
-      while (p < subEnd) {
-        IOUtils.readFully(in, buf.array(), 0, 4);
-
-        if (buf.getInt(0) != BGZF_MAGIC_SUB) {
-          p += 4 + getUShort(2);
+    try {
+      for (; ; ) {
+        for (; ; ) {
           in.seek(p);
-          continue;
+          IOUtils.readFully(in, buf.array(), 0, 4);
+          int n = buf.getInt(0);
+
+          if (n == BGZF_MAGIC) {
+            break;
+          }
+
+          // Skip ahead a bit more than 1 byte if you can.
+          if (n >>> 8 == BGZF_MAGIC << 8 >>> 8) {
+            ++p;
+          } else if (n >>> 16 == BGZF_MAGIC << 16 >>> 16) {
+            p += 2;
+          } else {
+            p += 3;
+          }
+
+          if (p >= end) {
+            return null;
+          }
         }
-
-        // Found it: this is close enough to a BGZF block, make it
-        // our guess.
-
-        // But find out the size before returning. First, grab bsize:
-        // we'll need it later.
+        // Found what looks like a gzip block header: now get XLEN and
+        // search for the BGZF subfield.
+        final int p0 = p;
+        p += 10;
+        in.seek(p);
         IOUtils.readFully(in, buf.array(), 0, 2);
-        int bsize = getUShort(0);
+        p += 2;
+        final int xlen = getUShort(0);
+        final int subEnd = p + xlen;
 
-        // Then skip the rest of the subfields.
-        p += BGZF_SUB_SIZE;
         while (p < subEnd) {
+          IOUtils.readFully(in, buf.array(), 0, 4);
+
+          if (buf.getInt(0) != BGZF_MAGIC_SUB) {
+            p += 4 + getUShort(2);
+            in.seek(p);
+            continue;
+          }
+
+          // Found it: this is close enough to a BGZF block, make it
+          // our guess.
+
+          // But find out the size before returning. First, grab bsize:
+          // we'll need it later.
+          IOUtils.readFully(in, buf.array(), 0, 2);
+          int bsize = getUShort(0);
+
+          // Then skip the rest of the subfields.
+          p += BGZF_SUB_SIZE;
+          while (p < subEnd) {
+            in.seek(p);
+            IOUtils.readFully(in, buf.array(), 0, 4);
+            p += 4 + getUShort(2);
+          }
+          if (p != subEnd) {
+            // Cancel our guess because the xlen field didn't match the
+            // data.
+            break;
+          }
+
+          // Now skip past the compressed data and the CRC-32.
+          p += bsize - xlen - 19 + 4;
           in.seek(p);
           IOUtils.readFully(in, buf.array(), 0, 4);
-          p += 4 + getUShort(2);
+          return new PosSize(p0, buf.getInt(0));
         }
-        if (p != subEnd) {
-          // Cancel our guess because the xlen field didn't match the
-          // data.
-          break;
-        }
-
-        // Now skip past the compressed data and the CRC-32.
-        p += bsize - xlen - 19 + 4;
-        in.seek(p);
-        IOUtils.readFully(in, buf.array(), 0, 4);
-        return new PosSize(p0, buf.getInt(0));
+        // No luck: look for the next gzip block header. Start right after
+        // where we last saw the identifiers, although we could probably
+        // safely skip further ahead. (If we find the correct one right
+        // now, the previous block contained 0x1f8b0804 bytes of data: that
+        // seems... unlikely.)
+        p = p0 + 4;
       }
-      // No luck: look for the next gzip block header. Start right after
-      // where we last saw the identifiers, although we could probably
-      // safely skip further ahead. (If we find the correct one right
-      // now, the previous block contained 0x1f8b0804 bytes of data: that
-      // seems... unlikely.)
-      p = p0 + 4;
-
-    }} catch (IOException e) {
+    } catch (IOException e) {
       return null;
     }
   }
 
   protected int getUShort(final int idx) {
-    return (int)buf.getShort(idx) & 0xffff;
+    return (int) buf.getShort(idx) & 0xffff;
+  }
+
+  protected static class PosSize {
+
+    public int pos;
+    public int size;
+
+    public PosSize(int p, int s) {
+      pos = p;
+      size = s;
+    }
   }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/CRAMInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/CRAMInputFormat.java
index 7909310..5e1b8d8 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/CRAMInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/CRAMInputFormat.java
@@ -23,6 +23,30 @@ public class CRAMInputFormat extends FileInputFormat<LongWritable, SAMRecordWrit
   public static final String REFERENCE_SOURCE_PATH_PROPERTY =
       "hadoopbam.cram.reference-source-path";
 
+  private static List<Long> getContainerOffsets(Configuration conf, Path cramFile)
+      throws IOException {
+    SeekableStream seekableStream = WrapSeekable.openPath(conf, cramFile);
+    CramContainerIterator cci = new CramContainerIterator(seekableStream);
+    List<Long> containerOffsets = new ArrayList<Long>();
+    containerOffsets.add(seekableStream.position());
+    while (cci.hasNext()) {
+      cci.next();
+      containerOffsets.add(seekableStream.position());
+    }
+    containerOffsets.add(seekableStream.length());
+    return containerOffsets;
+  }
+
+  private static long nextContainerOffset(List<Long> containerOffsets, long position) {
+    for (long offset : containerOffsets) {
+      if (offset >= position) {
+        return offset;
+      }
+    }
+    throw new IllegalStateException(
+        "Could not find position " + position + " in " + "container offsets: " + containerOffsets);
+  }
+
   @Override
   public List<InputSplit> getSplits(JobContext job) throws IOException {
     return getSplits(super.getSplits(job), job.getConfiguration());
@@ -42,45 +66,22 @@ public List<InputSplit> getSplits(List<InputSplit> splits, Configuration conf)
         fileToOffsets.put(path, containerOffsets);
       }
       long newStart = nextContainerOffset(containerOffsets, fileSplit.getStart());
-      long newEnd = nextContainerOffset(containerOffsets, fileSplit.getStart() +
-          fileSplit.getLength());
+      long newEnd =
+          nextContainerOffset(containerOffsets, fileSplit.getStart() + fileSplit.getLength());
       long newLength = newEnd - newStart;
       if (newLength == 0) { // split is wholly within a container
         continue;
       }
-      FileSplit newSplit = new FileSplit(fileSplit.getPath(), newStart, newLength,
-          fileSplit.getLocations());
+      FileSplit newSplit =
+          new FileSplit(fileSplit.getPath(), newStart, newLength, fileSplit.getLocations());
       newSplits.add(newSplit);
     }
     return newSplits;
   }
 
-  private static List<Long> getContainerOffsets(Configuration conf, Path cramFile)
-      throws IOException {
-    SeekableStream seekableStream = WrapSeekable.openPath(conf, cramFile);
-    CramContainerIterator cci = new CramContainerIterator(seekableStream);
-    List<Long> containerOffsets = new ArrayList<Long>();
-    containerOffsets.add(seekableStream.position());
-    while (cci.hasNext()) {
-      cci.next();
-      containerOffsets.add(seekableStream.position());
-    }
-    containerOffsets.add(seekableStream.length());
-    return containerOffsets;
-  }
-
-  private static long nextContainerOffset(List<Long> containerOffsets, long position) {
-    for (long offset : containerOffsets) {
-      if (offset >= position) {
-        return offset;
-      }
-    }
-    throw new IllegalStateException("Could not find position " + position + " in " +
-        "container offsets: " + containerOffsets);
-  }
-
   @Override
-  public RecordReader<LongWritable, SAMRecordWritable> createRecordReader(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
+  public RecordReader<LongWritable, SAMRecordWritable> createRecordReader(
+      InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException {
     RecordReader<LongWritable, SAMRecordWritable> rr = new CRAMRecordReader();
     rr.initialize(split, context);
     return rr;
diff --git a/src/main/java/org/seqdoop/hadoop_bam/CRAMOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/CRAMOutputFormat.java
index abaf734..4b0e841 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/CRAMOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/CRAMOutputFormat.java
@@ -2,9 +2,8 @@
 
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
-/** Currently this only locks down the value type of the {@link
+/**
+ * Currently this only locks down the value type of the {@link
  * org.apache.hadoop.mapreduce.OutputFormat}: contains no functionality.
  */
-public abstract class CRAMOutputFormat<K>
-        extends FileOutputFormat<K,SAMRecordWritable>
-{}
+public abstract class CRAMOutputFormat<K> extends FileOutputFormat<K, SAMRecordWritable> {}
diff --git a/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordReader.java b/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordReader.java
index 577e97d..49ca269 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordReader.java
@@ -6,8 +6,6 @@
 import htsjdk.samtools.cram.ref.ReferenceSource;
 import htsjdk.samtools.seekablestream.SeekableStream;
 import java.io.IOException;
-import java.net.URI;
-import java.nio.file.Paths;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
@@ -31,18 +29,18 @@ public class CRAMRecordReader extends RecordReader<LongWritable, SAMRecordWritab
 
   @Override
   public void initialize(InputSplit split, TaskAttemptContext context) throws IOException {
-    if(isInitialized) {
+    if (isInitialized) {
       close();
     }
     isInitialized = true;
 
     final Configuration conf = context.getConfiguration();
     final FileSplit fileSplit = (FileSplit) split;
-    final Path file  = fileSplit.getPath();
+    final Path file = fileSplit.getPath();
 
     String refSourcePath = conf.get(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
-    ReferenceSource refSource = new ReferenceSource(refSourcePath == null ? null :
-        NIOFileUtil.asPath(refSourcePath));
+    ReferenceSource refSource =
+        new ReferenceSource(refSourcePath == null ? null : NIOFileUtil.asPath(refSourcePath));
 
     seekableStream = WrapSeekable.openPath(conf, file);
     start = fileSplit.getStart();
@@ -78,7 +76,7 @@ public SAMRecordWritable getCurrentValue() {
 
   @Override
   public float getProgress() throws IOException {
-    return (float)(seekableStream.position() - start) / length;
+    return (float) (seekableStream.position() - start) / length;
   }
 
   @Override
diff --git a/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordWriter.java
index 2050b08..5e43e9e 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/CRAMRecordWriter.java
@@ -1,121 +1,109 @@
 package org.seqdoop.hadoop_bam;
 
-import java.io.*;
-import java.net.URI;
-import java.nio.file.Paths;
-
 import htsjdk.samtools.CRAMContainerStreamWriter;
-import htsjdk.samtools.SAMTextHeaderCodec;
-import htsjdk.samtools.cram.ref.ReferenceSource;
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.reference.ReferenceSequenceFileFactory;
-import htsjdk.samtools.util.StringLineReader;
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import java.io.*;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
 import org.seqdoop.hadoop_bam.util.NIOFileUtil;
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
-/** A base {@link RecordWriter} for CRAM records.
+/**
+ * A base {@link RecordWriter} for CRAM records.
+ *
+ * <p>Handles the output stream, writing the header if requested, and provides the {@link
+ * #writeAlignment} function for subclasses.
  *
- * <p>Handles the output stream, writing the header if requested, and provides
- * the {@link #writeAlignment} function for subclasses.</p>
- * <p>Note that each file created by this class consists of a fragment of a
- * complete CRAM file containing only one or more CRAM containers that do not
- * include a CRAM file header, a SAMFileHeader, or a CRAM EOF container.</p>
+ * <p>Note that each file created by this class consists of a fragment of a complete CRAM file
+ * containing only one or more CRAM containers that do not include a CRAM file header, a
+ * SAMFileHeader, or a CRAM EOF container.
  */
-public abstract class CRAMRecordWriter<K>
-        extends RecordWriter<K,SAMRecordWritable>
-{
-    // generic ID passed to CRAM code for internal error reporting
-    private static final String HADOOP_BAM_PART_ID= "Hadoop-BAM-Part";
-    private OutputStream   origOutput;
-    private CRAMContainerStreamWriter cramContainerStream = null;
-    private ReferenceSource refSource = null;
-    private boolean writeHeader = true;
+public abstract class CRAMRecordWriter<K> extends RecordWriter<K, SAMRecordWritable> {
 
-    /** A SAMFileHeader is read from the input Path. */
-    public CRAMRecordWriter(
-            final Path output,
-            final Path input,
-            final boolean writeHeader,
-            final TaskAttemptContext ctx) throws IOException
-    {
-        init(
-                output,
-                SAMHeaderReader.readSAMHeaderFrom(input, ctx.getConfiguration()),
-                writeHeader, ctx);
-    }
+  // generic ID passed to CRAM code for internal error reporting
+  private static final String HADOOP_BAM_PART_ID = "Hadoop-BAM-Part";
+  private OutputStream origOutput;
+  private CRAMContainerStreamWriter cramContainerStream = null;
+  private ReferenceSource refSource = null;
+  private boolean writeHeader = true;
 
-    public CRAMRecordWriter(
-            final Path output, final SAMFileHeader header, final boolean writeHeader,
-            final TaskAttemptContext ctx)
-            throws IOException
-    {
-        init(
-                output.getFileSystem(ctx.getConfiguration()).create(output),
-                header, writeHeader, ctx);
-    }
+  /** A SAMFileHeader is read from the input Path. */
+  public CRAMRecordWriter(
+      final Path output, final Path input, final boolean writeHeader, final TaskAttemptContext ctx)
+      throws IOException {
+    init(
+        output, SAMHeaderReader.readSAMHeaderFrom(input, ctx.getConfiguration()), writeHeader, ctx);
+  }
 
-    // Working around not being able to call a constructor other than as the
-    // first statement...
-    private void init(
-            final Path output, final SAMFileHeader header, final boolean writeHeader,
-            final TaskAttemptContext ctx)
-            throws IOException
-    {
-        init(
-                output.getFileSystem(ctx.getConfiguration()).create(output),
-                header, writeHeader, ctx);
-    }
+  public CRAMRecordWriter(
+      final Path output,
+      final SAMFileHeader header,
+      final boolean writeHeader,
+      final TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader, ctx);
+  }
 
-    private void init(
-            final OutputStream output, final SAMFileHeader header, final boolean writeHeader,
-            final TaskAttemptContext ctx)
-            throws IOException
-    {
-        origOutput = output;
-        this.writeHeader = writeHeader;
+  // Working around not being able to call a constructor other than as the
+  // first statement...
+  private void init(
+      final Path output,
+      final SAMFileHeader header,
+      final boolean writeHeader,
+      final TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader, ctx);
+  }
 
-        final String referenceURI =
-                ctx.getConfiguration().get(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
-        refSource = new ReferenceSource(referenceURI == null ? null :
-            NIOFileUtil.asPath(referenceURI));
+  private void init(
+      final OutputStream output,
+      final SAMFileHeader header,
+      final boolean writeHeader,
+      final TaskAttemptContext ctx)
+      throws IOException {
+    origOutput = output;
+    this.writeHeader = writeHeader;
 
-        // A SAMFileHeader must be supplied at CRAMContainerStreamWriter creation time; if
-        // we don't have one then delay creation until we do
-        if (header != null) {
-            cramContainerStream = new CRAMContainerStreamWriter(
-                    origOutput, null, refSource, header, HADOOP_BAM_PART_ID);
-            if (writeHeader) {
-                this.writeHeader(header);
-            }
-        }
-    }
+    final String referenceURI =
+        ctx.getConfiguration().get(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
+    refSource = new ReferenceSource(referenceURI == null ? null : NIOFileUtil.asPath(referenceURI));
 
-    @Override public void close(TaskAttemptContext ctx) throws IOException {
-        cramContainerStream.finish(false); // Close, but suppress CRAM EOF container
-        origOutput.close(); // And close the original output.
+    // A SAMFileHeader must be supplied at CRAMContainerStreamWriter creation time; if
+    // we don't have one then delay creation until we do
+    if (header != null) {
+      cramContainerStream =
+          new CRAMContainerStreamWriter(origOutput, null, refSource, header, HADOOP_BAM_PART_ID);
+      if (writeHeader) {
+        this.writeHeader(header);
+      }
     }
+  }
 
-    protected void writeAlignment(final SAMRecord rec) {
-        if (null == cramContainerStream) {
-            final SAMFileHeader header = rec.getHeader();
-            if (header == null) {
-                throw new RuntimeException("Cannot write record to CRAM: null header in SAM record");
-            }
-            if (writeHeader) {
-                this.writeHeader(header);
-            }
-            cramContainerStream = new CRAMContainerStreamWriter(
-                    origOutput, null, refSource, header, HADOOP_BAM_PART_ID);
-        }
-        cramContainerStream.writeAlignment(rec);
-    }
+  @Override
+  public void close(TaskAttemptContext ctx) throws IOException {
+    cramContainerStream.finish(false); // Close, but suppress CRAM EOF container
+    origOutput.close(); // And close the original output.
+  }
 
-    private void writeHeader(final SAMFileHeader header) {
-        cramContainerStream.writeHeader(header);
+  protected void writeAlignment(final SAMRecord rec) {
+    if (null == cramContainerStream) {
+      final SAMFileHeader header = rec.getHeader();
+      if (header == null) {
+        throw new RuntimeException("Cannot write record to CRAM: null header in SAM record");
+      }
+      if (writeHeader) {
+        this.writeHeader(header);
+      }
+      cramContainerStream =
+          new CRAMContainerStreamWriter(origOutput, null, refSource, header, HADOOP_BAM_PART_ID);
     }
+    cramContainerStream.writeAlignment(rec);
+  }
+
+  private void writeHeader(final SAMFileHeader header) {
+    cramContainerStream.writeHeader(header);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/FastaInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/FastaInputFormat.java
index 19768eb..6037c32 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/FastaInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/FastaInputFormat.java
@@ -29,7 +29,6 @@
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -47,343 +46,335 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * Reads the FASTA reference sequence format.
- * Key: sequence description and position offset, delimited by ':' characters.
- * Value:  a ReferenceFragment object representing the entry.
+ * Reads the FASTA reference sequence format. Key: sequence description and position offset,
+ * delimited by ':' characters. Value: a ReferenceFragment object representing the entry.
  *
- * Note: here sections in the input file are assumed to be delimited by single
- * line descriptions that start with '>'.
+ * <p>Note: here sections in the input file are assumed to be delimited by single line descriptions
+ * that start with '>'.
  */
-public class FastaInputFormat extends FileInputFormat<Text,ReferenceFragment>
-{
-	private static final Logger logger = LoggerFactory.getLogger(FastaInputFormat.class);
-	public static final Charset UTF8 = Charset.forName("UTF8");
-
-    @Override public List<InputSplit> getSplits(JobContext job) throws IOException
-	{
-
-	    // Note: We generate splits that correspond to different sections in the FASTA
-	    // input (which here are called "chromosomes", delimited by '>' and
-	    // followed by a single line description.
-	    // Some locality is preserved since the locations are formed from the input
-	    // splits, although no special attention is given to this issues (FASTA files
-	    // are assumed to be smallish).
-	    // The splits are generated on the client. In the future the split generation
-	    // should be only performed once and an index file stored inside HDFS for
-	    // peformance reasons. Currently this is not attempted (again: FASTA files
-	    // aren't all that big).
-
-	    // we first make sure we are given only a single file
-
-            List<InputSplit> splits = super.getSplits(job);
-            
-            // first sort by input path
-            Collections.sort(splits, new Comparator<InputSplit>()
-                             {
-                                 public int compare(InputSplit a, InputSplit b) {
-                                     FileSplit fa = (FileSplit)a, fb = (FileSplit)b;
-                                     return fa.getPath().compareTo(fb.getPath());
-                                 }
-                             });
-
-            for (int i = 0; i < splits.size()-1; i++) {
-                FileSplit fa = (FileSplit)splits.get(i);
-                FileSplit fb = (FileSplit)splits.get(i+1);
-                    
-                if(fa.getPath().compareTo(fb.getPath()) != 0)
-                    throw new IOException("FastaInputFormat assumes single FASTA input file!");
+public class FastaInputFormat extends FileInputFormat<Text, ReferenceFragment> {
+
+  public static final Charset UTF8 = Charset.forName("UTF8");
+  private static final Logger logger = LoggerFactory.getLogger(FastaInputFormat.class);
+
+  @Override
+  public List<InputSplit> getSplits(JobContext job) throws IOException {
+
+    // Note: We generate splits that correspond to different sections in the FASTA
+    // input (which here are called "chromosomes", delimited by '>' and
+    // followed by a single line description.
+    // Some locality is preserved since the locations are formed from the input
+    // splits, although no special attention is given to this issues (FASTA files
+    // are assumed to be smallish).
+    // The splits are generated on the client. In the future the split generation
+    // should be only performed once and an index file stored inside HDFS for
+    // peformance reasons. Currently this is not attempted (again: FASTA files
+    // aren't all that big).
+
+    // we first make sure we are given only a single file
+
+    List<InputSplit> splits = super.getSplits(job);
+
+    // first sort by input path
+    Collections.sort(
+        splits,
+        new Comparator<InputSplit>() {
+          public int compare(InputSplit a, InputSplit b) {
+            FileSplit fa = (FileSplit) a, fb = (FileSplit) b;
+            return fa.getPath().compareTo(fb.getPath());
+          }
+        });
+
+    for (int i = 0; i < splits.size() - 1; i++) {
+      FileSplit fa = (FileSplit) splits.get(i);
+      FileSplit fb = (FileSplit) splits.get(i + 1);
+
+      if (fa.getPath().compareTo(fb.getPath()) != 0) {
+        throw new IOException("FastaInputFormat assumes single FASTA input file!");
+      }
+    }
+
+    // now we are sure we only have one FASTA input file
+
+    final List<InputSplit> newSplits = new ArrayList<InputSplit>(splits.size());
+    FileSplit fileSplit = (FileSplit) splits.get(0);
+    Path path = fileSplit.getPath();
+
+    FileSystem fs = path.getFileSystem(job.getConfiguration());
+    FSDataInputStream fis = fs.open(path);
+    byte[] buffer = new byte[1024];
+
+    long byte_counter = 0;
+    long prev_chromosome_byte_offset = 0;
+    boolean first_chromosome = true;
+
+    for (int j = 0; j < splits.size(); j++) {
+      FileSplit origsplit = (FileSplit) splits.get(j);
+
+      while (byte_counter < origsplit.getStart() + origsplit.getLength()) {
+        long bytes_read =
+            fis.read(
+                byte_counter,
+                buffer,
+                0,
+                (int)
+                    Math.min(
+                        buffer.length,
+                        origsplit.getStart() + origsplit.getLength() - byte_counter));
+        if (logger.isDebugEnabled()) {
+          logger.debug("bytes_read: {} of {} splits", bytes_read, splits.size());
+        }
+        if (bytes_read > 0) {
+          for (int i = 0; i < bytes_read; i++) {
+            if (buffer[i] == (byte) '>') {
+              if (logger.isDebugEnabled()) {
+                logger.debug("found chromosome at position {}", byte_counter + i);
+              }
+
+              if (!first_chromosome) {
+                FileSplit fsplit =
+                    new FileSplit(
+                        path,
+                        prev_chromosome_byte_offset,
+                        byte_counter + i - 1 - prev_chromosome_byte_offset,
+                        origsplit.getLocations());
+
+                if (logger.isDebugEnabled()) {
+                  logger.debug(
+                      "adding split: start: {}, length: {}", fsplit.getStart(), fsplit.getLength());
+                }
+                newSplits.add(fsplit);
+              }
+              first_chromosome = false;
+              prev_chromosome_byte_offset = byte_counter + i;
             }
-
-            // now we are sure we only have one FASTA input file
-
-	    final List<InputSplit> newSplits = new ArrayList<InputSplit>(splits.size());
-	    FileSplit fileSplit = (FileSplit)splits.get(0);
-	    Path path = fileSplit.getPath();
-
-	    FileSystem fs = path.getFileSystem(job.getConfiguration());
-	    FSDataInputStream fis = fs.open(path);
-	    byte[] buffer = new byte[1024];
-
-	    long byte_counter = 0;
-	    long prev_chromosome_byte_offset = 0;
-	    boolean first_chromosome = true;
-
-	    for(int j = 0; j < splits.size(); j++) {
-		FileSplit origsplit = (FileSplit)splits.get(j);
-
-		while(byte_counter < origsplit.getStart()+origsplit.getLength()) {
-		    long bytes_read = fis.read(byte_counter, buffer, 0, (int)Math.min(buffer.length,
-										      origsplit.getStart()+origsplit.getLength()- byte_counter));
-		    if (logger.isDebugEnabled()) {
-			logger.debug("bytes_read: {} of {} splits", bytes_read, splits.size());
-		    }
-		    if(bytes_read > 0) {
-			for(int i=0;i<bytes_read;i++) {
-			    if(buffer[i] == (byte)'>') {
-				if (logger.isDebugEnabled()) {
-					logger.debug("found chromosome at position {}", byte_counter + i);
-				}
-				
-				if(!first_chromosome) {
-				    FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset, byte_counter + i-1 - prev_chromosome_byte_offset, origsplit.getLocations());
-
-					if (logger.isDebugEnabled()) {
-						logger.debug("adding split: start: {}, length: {}", fsplit.getStart(), fsplit.getLength());
-					}
-				    newSplits.add(fsplit);
-				}
-				first_chromosome = false;
-				prev_chromosome_byte_offset = byte_counter + i;
-			    }
-			}
-			byte_counter += bytes_read;
-		    }
-		}
-
-		if(j == splits.size()-1) {
-		    FileSplit fsplit = new FileSplit(path, prev_chromosome_byte_offset, byte_counter - prev_chromosome_byte_offset, origsplit.getLocations());
-		    newSplits.add(fsplit);
-			if (logger.isDebugEnabled()) {
-				logger.debug("adding split: {}", fsplit);
-			}
-		    break;
-		}
-	    }
-	    
-	    return newSplits;
-	}
-
-	public static class FastaRecordReader extends RecordReader<Text,ReferenceFragment>
-	{
-		
-		// start:  first valid data index
-		private long start;
-		// end:  first index value beyond the slice, i.e. slice is in range [start,end)
-		private long end;
-		// pos: current position in file
-		private long pos;
-		// file:  the file being read
-		private Path file;
-
-		// current_split_pos: the current (chromosome) position within the split
-		private int current_split_pos;
-		// current_split_indexseq: the description/chromosome name
-		private String current_split_indexseq = null;
-
-		private LineReader lineReader;
-		private InputStream inputStream;
-		private Text currentKey = new Text();
-		private ReferenceFragment currentValue = new ReferenceFragment();
-
-		private Text buffer = new Text();
-
-		// How long can a FASTA line get?
-		public static final int MAX_LINE_LENGTH = 20000;
-
-		public FastaRecordReader(Configuration conf, FileSplit split) throws IOException
-		{
-			setConf(conf);
-			file = split.getPath();
-			start = split.getStart();
-			end = start + split.getLength();
-			current_split_pos = 1;
-
-			FileSystem fs = file.getFileSystem(conf);
-			FSDataInputStream fileIn = fs.open(file);
-
-			CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
-			CompressionCodec        codec        = codecFactory.getCodec(file);
-
-			if (codec == null) // no codec.  Uncompressed file.
-			{
-				positionAtFirstRecord(fileIn);
-				inputStream = fileIn;
-			}
-			else
-			{ // compressed file
-				if (start != 0)
-					throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
-
-				inputStream = codec.createInputStream(fileIn);
-				end = Long.MAX_VALUE; // read until the end of the file
-			}
-
-			lineReader = new LineReader(inputStream);
-		}
-
-		/*
-		 * Position the input stream at the start of the first record.
-		 */
-		private void positionAtFirstRecord(FSDataInputStream stream) throws IOException
-		{
-		    if (start > 0)
-			{
-			    stream.seek(start);
-			}
-
-		    // we are now in a new chromosome/fragment, so read its name/index sequence
-		    // and reset position counter
-
-		    // index sequence
-		    LineReader reader = new LineReader(stream);
-		    int bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
-
-		    current_split_indexseq = buffer.toString();
-		    // now get rid of '>' character
-		    current_split_indexseq = current_split_indexseq.substring(1,current_split_indexseq.length());
-		    
-		    // initialize position counter
-		    current_split_pos = 1;
-
-			if (logger.isDebugEnabled()) {
-				logger.debug("read index sequence: {}", current_split_indexseq);
-			}
-		    start = start + bytesRead;
-		    stream.seek(start);
-		    pos = start;
-		}
-
-		protected void setConf(Configuration conf)
-		{
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException
-		{
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public Text getCurrentKey()
-		{
-			return currentKey;
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public ReferenceFragment getCurrentValue()
-	 	{
-			return currentValue;
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public boolean nextKeyValue() throws IOException, InterruptedException
-		{
-			return next(currentKey, currentValue);
-		}
-
-		/**
-		 * Close this RecordReader to future operations.
-		 */
-		public void close() throws IOException
-		{
-			inputStream.close();
-		}
-
-		/**
-		 * Create an object of the appropriate type to be used as a key.
-		 */
-		public Text createKey()
-		{
-			return new Text();
-		}
-
-		/**
-		 * Create an object of the appropriate type to be used as a value.
-		 */
-		public ReferenceFragment createValue()
-		{
-			return new ReferenceFragment();
-		}
-
-		/**
-		 * Returns the current position in the input.
-		 */
-		public long getPos() { return pos; }
-
-		/**
-		 * How much of the input has the RecordReader consumed i.e.
-		 */
-		public float getProgress()
-		{
-			if (start == end)
-				return 1.0f;
-			else
-				return Math.min(1.0f, (pos - start) / (float)(end - start));
-		}
-
-		public String makePositionMessage(long pos)
-		{
-			return file.toString() + ":" + pos;
-		}
-
-		public String makePositionMessage()
-		{
-			return file.toString() + ":" + pos;
-		}
-
-		/**
-		 * Reads the next key/value pair from the input for processing.
-		 */
-		public boolean next(Text key, ReferenceFragment value) throws IOException
-		{
-			if (pos >= end)
-				return false; // past end of slice
-
-			int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
-			pos += bytesRead;
-			if (bytesRead >= MAX_LINE_LENGTH)
-				throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " + makePositionMessage(pos - bytesRead) + ": " + Text.decode(buffer.getBytes(), 0, 500));
-			else if (bytesRead <= 0)
-				return false; // EOF
-			else
-			{
-				scanFastaLine(buffer, key, value);
-				current_split_pos += bytesRead;
-				return true;
-			}
-		}
-
-		private void scanFastaLine(Text line, Text key, ReferenceFragment fragment)
-		{
-		    // Build the key.  We concatenate the chromosome/fragment descripion and
-		    // the start position of the FASTA sequence line, replacing the tabs with colons.
-		    key.clear();
-		    
-		    key.append(current_split_indexseq.getBytes(UTF8), 0, current_split_indexseq.getBytes(UTF8).length);
-		    key.append(Integer.toString(current_split_pos).getBytes(UTF8), 0, Integer.toString(current_split_pos).getBytes(UTF8).length);
-		    // replace tabs with :
-		    byte[] bytes = key.getBytes();
-		    int temporaryEnd = key.getLength();
-		    for (int i = 0; i < temporaryEnd; ++i)
-			if (bytes[i] == '\t')
-			    bytes[i] = ':';
-		    
-		    fragment.clear();
-		    fragment.setPosition(current_split_pos);
-		    fragment.setIndexSequence(current_split_indexseq);
-		    fragment.getSequence().append(line.getBytes(), 0, line.getBytes().length);
-		}
-	}
-
-	@Override
-	public boolean isSplitable(JobContext context, Path path)
-	{
-		CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
-		return codec == null;
-	}
-
-	public RecordReader<Text, ReferenceFragment> createRecordReader(
-	                                        InputSplit genericSplit,
-	                                        TaskAttemptContext context) throws IOException, InterruptedException
-	{
-		context.setStatus(genericSplit.toString());
-		return new FastaRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
-	}
+          }
+          byte_counter += bytes_read;
+        }
+      }
+
+      if (j == splits.size() - 1) {
+        FileSplit fsplit =
+            new FileSplit(
+                path,
+                prev_chromosome_byte_offset,
+                byte_counter - prev_chromosome_byte_offset,
+                origsplit.getLocations());
+        newSplits.add(fsplit);
+        if (logger.isDebugEnabled()) {
+          logger.debug("adding split: {}", fsplit);
+        }
+        break;
+      }
+    }
+
+    return newSplits;
+  }
+
+  @Override
+  public boolean isSplitable(JobContext context, Path path) {
+    CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
+    return codec == null;
+  }
+
+  public RecordReader<Text, ReferenceFragment> createRecordReader(
+      InputSplit genericSplit, TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    context.setStatus(genericSplit.toString());
+    return new FastaRecordReader(
+        context.getConfiguration(),
+        (FileSplit) genericSplit); // cast as per example in TextInputFormat
+  }
+
+  public static class FastaRecordReader extends RecordReader<Text, ReferenceFragment> {
+
+    // How long can a FASTA line get?
+    public static final int MAX_LINE_LENGTH = 20000;
+    // start:  first valid data index
+    private long start;
+    // end:  first index value beyond the slice, i.e. slice is in range [start,end)
+    private long end;
+    // pos: current position in file
+    private long pos;
+    // file:  the file being read
+    private Path file;
+    // current_split_pos: the current (chromosome) position within the split
+    private int current_split_pos;
+    // current_split_indexseq: the description/chromosome name
+    private String current_split_indexseq = null;
+    private LineReader lineReader;
+    private InputStream inputStream;
+    private Text currentKey = new Text();
+    private ReferenceFragment currentValue = new ReferenceFragment();
+    private Text buffer = new Text();
+
+    public FastaRecordReader(Configuration conf, FileSplit split) throws IOException {
+      setConf(conf);
+      file = split.getPath();
+      start = split.getStart();
+      end = start + split.getLength();
+      current_split_pos = 1;
+
+      FileSystem fs = file.getFileSystem(conf);
+      FSDataInputStream fileIn = fs.open(file);
+
+      CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
+      CompressionCodec codec = codecFactory.getCodec(file);
+
+      if (codec == null) // no codec.  Uncompressed file.
+      {
+        positionAtFirstRecord(fileIn);
+        inputStream = fileIn;
+      } else { // compressed file
+        if (start != 0) {
+          throw new RuntimeException(
+              "Start position for compressed file is not 0! (found " + start + ")");
+        }
+
+        inputStream = codec.createInputStream(fileIn);
+        end = Long.MAX_VALUE; // read until the end of the file
+      }
+
+      lineReader = new LineReader(inputStream);
+    }
+
+    /*
+     * Position the input stream at the start of the first record.
+     */
+    private void positionAtFirstRecord(FSDataInputStream stream) throws IOException {
+      if (start > 0) {
+        stream.seek(start);
+      }
+
+      // we are now in a new chromosome/fragment, so read its name/index sequence
+      // and reset position counter
+
+      // index sequence
+      LineReader reader = new LineReader(stream);
+      int bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
+
+      current_split_indexseq = buffer.toString();
+      // now get rid of '>' character
+      current_split_indexseq = current_split_indexseq.substring(1, current_split_indexseq.length());
+
+      // initialize position counter
+      current_split_pos = 1;
+
+      if (logger.isDebugEnabled()) {
+        logger.debug("read index sequence: {}", current_split_indexseq);
+      }
+      start = start + bytesRead;
+      stream.seek(start);
+      pos = start;
+    }
+
+    protected void setConf(Configuration conf) {}
+
+    /** Added to use mapreduce API. */
+    public void initialize(InputSplit split, TaskAttemptContext context)
+        throws IOException, InterruptedException {}
+
+    /** Added to use mapreduce API. */
+    public Text getCurrentKey() {
+      return currentKey;
+    }
+
+    /** Added to use mapreduce API. */
+    public ReferenceFragment getCurrentValue() {
+      return currentValue;
+    }
+
+    /** Added to use mapreduce API. */
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      return next(currentKey, currentValue);
+    }
+
+    /** Close this RecordReader to future operations. */
+    public void close() throws IOException {
+      inputStream.close();
+    }
+
+    /** Create an object of the appropriate type to be used as a key. */
+    public Text createKey() {
+      return new Text();
+    }
+
+    /** Create an object of the appropriate type to be used as a value. */
+    public ReferenceFragment createValue() {
+      return new ReferenceFragment();
+    }
+
+    /** Returns the current position in the input. */
+    public long getPos() {
+      return pos;
+    }
+
+    /** How much of the input has the RecordReader consumed i.e. */
+    public float getProgress() {
+      if (start == end) {
+        return 1.0f;
+      } else {
+        return Math.min(1.0f, (pos - start) / (float) (end - start));
+      }
+    }
+
+    public String makePositionMessage(long pos) {
+      return file.toString() + ":" + pos;
+    }
+
+    public String makePositionMessage() {
+      return file.toString() + ":" + pos;
+    }
+
+    /** Reads the next key/value pair from the input for processing. */
+    public boolean next(Text key, ReferenceFragment value) throws IOException {
+      if (pos >= end) {
+        return false; // past end of slice
+      }
+
+      int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
+      pos += bytesRead;
+      if (bytesRead >= MAX_LINE_LENGTH) {
+        throw new RuntimeException(
+            "found abnormally large line (length "
+                + bytesRead
+                + ") at "
+                + makePositionMessage(pos - bytesRead)
+                + ": "
+                + Text.decode(buffer.getBytes(), 0, 500));
+      } else if (bytesRead <= 0) {
+        return false; // EOF
+      } else {
+        scanFastaLine(buffer, key, value);
+        current_split_pos += bytesRead;
+        return true;
+      }
+    }
+
+    private void scanFastaLine(Text line, Text key, ReferenceFragment fragment) {
+      // Build the key.  We concatenate the chromosome/fragment descripion and
+      // the start position of the FASTA sequence line, replacing the tabs with colons.
+      key.clear();
+
+      key.append(
+          current_split_indexseq.getBytes(UTF8), 0, current_split_indexseq.getBytes(UTF8).length);
+      key.append(
+          Integer.toString(current_split_pos).getBytes(UTF8),
+          0,
+          Integer.toString(current_split_pos).getBytes(UTF8).length);
+      // replace tabs with :
+      byte[] bytes = key.getBytes();
+      int temporaryEnd = key.getLength();
+      for (int i = 0; i < temporaryEnd; ++i) {
+        if (bytes[i] == '\t') {
+          bytes[i] = ':';
+        }
+      }
+
+      fragment.clear();
+      fragment.setPosition(current_split_pos);
+      fragment.setIndexSequence(current_split_indexseq);
+      fragment.getSequence().append(line.getBytes(), 0, line.getBytes().length);
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/FastqInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/FastqInputFormat.java
index 7c008c7..f9131ed 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/FastqInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/FastqInputFormat.java
@@ -22,386 +22,367 @@
 
 package org.seqdoop.hadoop_bam;
 
+import java.io.EOFException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.regex.*;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.compress.*;
 import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
-import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.apache.hadoop.io.compress.*;
 import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import java.io.InputStream;
-import java.io.IOException;
-import java.io.EOFException;
-
-import java.util.regex.*;
-
+import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
+import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 import org.seqdoop.hadoop_bam.FormatConstants.BaseQualityEncoding;
 import org.seqdoop.hadoop_bam.util.ConfHelper;
 
-public class FastqInputFormat extends FileInputFormat<Text,SequencedFragment>
-{
-	public static final String CONF_BASE_QUALITY_ENCODING = "hbam.fastq-input.base-quality-encoding";
-	public static final String CONF_FILTER_FAILED_QC      = "hbam.fastq-input.filter-failed-qc";
-	public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "sanger";
-
-	public static class FastqRecordReader extends RecordReader<Text,SequencedFragment>
-	{
-		/*
-		 * fastq format:
-		 * <fastq>	:=	<block>+
-		 * <block>	:=	@<seqname>\n<seq>\n+[<seqname>]\n<qual>\n
-		 * <seqname>	:=	[A-Za-z0-9_.:-]+
-		 * <seq>	:=	[A-Za-z\n\.~]+
-		 * <qual>	:=	[!-~\n]+
-		 *
-		 * LP: this format is broken, no?  You can have multi-line sequence and quality strings,
-		 * and the quality encoding includes '@' in its valid character range.  So how should one
-		 * distinguish between \n@ as a record delimiter and and \n@ as part of a multi-line
-		 * quality string?
-		 *
-		 * For now I'm going to assume single-line sequences.  This works for our sequencing
-		 * application.  We'll see if someone complains in other applications.
-		 */
-
-		// start:  first valid data index
-		private long start;
-		// end:  first index value beyond the slice, i.e. slice is in range [start,end)
-		private long end;
-		// pos: current position in file
-		private long pos;
-		// file:  the file being read
-		private Path file;
-
-		private LineReader lineReader;
-		private InputStream inputStream;
-		private Text currentKey = new Text();
-		private SequencedFragment currentValue = new SequencedFragment();
-
-		/* If true, will scan the identifier for read data as specified in the Casava
-		 * users' guide v1.8:
-		 * @<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos> <read>:<is filtered>:<control number>:<index sequence>
-		 * After the first name that doesn't match lookForIlluminaIdentifier will be
-		 * set to false and no further scanning will be done.
-		 */
-		private boolean lookForIlluminaIdentifier = true;
-		private static final Pattern ILLUMINA_PATTERN = Pattern.compile("([^:]+):(\\d+):([^:]*):(\\d+):(\\d+):(-?\\d+):(-?\\d+)\\s+([123]):([YN]):(\\d+):(.*)");
-
-		private Text buffer = new Text();
-
-		private BaseQualityEncoding qualityEncoding;
-		private boolean filterFailedQC = false;
-
-		// How long can a read get?
-		private static final int MAX_LINE_LENGTH = 10000;
-
-		public FastqRecordReader(Configuration conf, FileSplit split) throws IOException
-		{
-			setConf(conf);
-			file = split.getPath();
-			start = split.getStart();
-			end = start + split.getLength();
-
-			FileSystem fs = file.getFileSystem(conf);
-			FSDataInputStream fileIn = fs.open(file);
-
-			CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
-			CompressionCodec        codec        = codecFactory.getCodec(file);
-
-			if (codec == null) // no codec.  Uncompressed file.
-			{
-				positionAtFirstRecord(fileIn);
-				inputStream = fileIn;
-			}
-			else
-			{ // compressed file
-				if (start != 0)
-					throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
-
-				inputStream = codec.createInputStream(fileIn);
-				end = Long.MAX_VALUE; // read until the end of the file
-			}
-
-			lineReader = new LineReader(inputStream);
-		}
-
-		protected void setConf(Configuration conf)
-		{
-			String encoding =
-			  conf.get(FastqInputFormat.CONF_BASE_QUALITY_ENCODING,
-			    conf.get(FormatConstants.CONF_INPUT_BASE_QUALITY_ENCODING,
-			      FastqInputFormat.CONF_BASE_QUALITY_ENCODING_DEFAULT));
-
-			if ("illumina".equals(encoding))
-				qualityEncoding = BaseQualityEncoding.Illumina;
-			else if ("sanger".equals(encoding))
-				qualityEncoding = BaseQualityEncoding.Sanger;
-			else
-				throw new RuntimeException("Unknown input base quality encoding value " + encoding);
-
-			filterFailedQC = ConfHelper.parseBoolean(
-			  conf.get(FastqInputFormat.CONF_FILTER_FAILED_QC,
-			    conf.get(FormatConstants.CONF_INPUT_FILTER_FAILED_QC)),
-			      false);
-		}
-
-		/*
-		 * Position the input stream at the start of the first record.
-		 */
-		private void positionAtFirstRecord(FSDataInputStream stream) throws IOException
-		{
-			if (start > 0)
-			{
-				// Advance to the start of the first record
-				// We use a temporary LineReader to read lines until we find the
-				// position of the right one.  We then seek the file to that position.
-				stream.seek(start);
-				LineReader reader = new LineReader(stream);
-
-				int bytesRead = 0;
-				do
-			 	{
-					bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
-					if (bytesRead > 0 && (buffer.getLength() <= 0 || buffer.getBytes()[0] != '@'))
-						start += bytesRead;
-					else
-					{
-						// line starts with @.  Read two more and verify that it starts with a +
-						//
-						// If this isn't the start of a record, we want to backtrack to its end
-						long backtrackPosition = start + bytesRead;
-
-						bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
-						bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
-						if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+')
-							break; // all good!
-						else
-						{
-							// backtrack to the end of the record we thought was the start.
-							start = backtrackPosition;
-							stream.seek(start);
-							reader = new LineReader(stream);
-						}
-					}
-				} while (bytesRead > 0);
-
-				stream.seek(start);
-			}
-			// else
-			//	if start == 0 we presume it starts with a valid fastq record
-			pos = start;
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException
-		{
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public Text getCurrentKey()
-		{
-			return currentKey;
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public SequencedFragment getCurrentValue()
-	 	{
-			return currentValue;
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public boolean nextKeyValue() throws IOException, InterruptedException
-		{
-			return next(currentKey, currentValue);
-		}
-
-		/**
-		 * Close this RecordReader to future operations.
-		 */
-		public void close() throws IOException
-		{
-			inputStream.close();
-		}
-
-		/**
-		 * Create an object of the appropriate type to be used as a key.
-		 */
-		public Text createKey()
-		{
-			return new Text();
-		}
-
-		/**
-		 * Create an object of the appropriate type to be used as a value.
-		 */
-		public SequencedFragment createValue()
-		{
-			return new SequencedFragment();
-		}
-
-		/**
-		 * Returns the current position in the input.
-		 */
-		public long getPos() { return pos; }
-
-		/**
-		 * How much of the input has the RecordReader consumed i.e.
-		 */
-		public float getProgress()
-		{
-			if (start == end)
-				return 1.0f;
-			else
-				return Math.min(1.0f, (pos - start) / (float)(end - start));
-		}
-
-		public String makePositionMessage()
-		{
-			return file.toString() + ":" + pos;
-		}
-
-		protected boolean lowLevelFastqRead(Text key, SequencedFragment value) throws IOException
-		{
-			// ID line
-			long skipped = lineReader.skip(1); // skip @
-			pos += skipped;
-			if (skipped == 0)
-				return false; // EOF
-
-			// ID
-			readLineInto(key);
-			// sequence
-			value.clear();
-			readLineInto(value.getSequence());
-			readLineInto(buffer);
-			if (buffer.getLength() == 0 || buffer.getBytes()[0] != '+')
-				throw new RuntimeException("unexpected fastq line separating sequence and quality at " + makePositionMessage() + ". Line: " + buffer + ". \nSequence ID: " + key);
-			readLineInto(value.getQuality());
-
-			// look for the Illumina-formatted name.  Once it isn't found lookForIlluminaIdentifier will be set to false
-			lookForIlluminaIdentifier = lookForIlluminaIdentifier && scanIlluminaId(key, value);
-			if (!lookForIlluminaIdentifier)
-				scanNameForReadNumber(key, value);
-			return true;
-		}
-
-
-		/**
-		 * Reads the next key/value pair from the input for processing.
-		 */
-		public boolean next(Text key, SequencedFragment value) throws IOException
-		{
-			if (pos >= end)
-				return false; // past end of slice
-			try
-			{
-				boolean gotData;
-				boolean goodRecord;
-				do {
-					gotData = lowLevelFastqRead(key, value);
-					goodRecord = gotData && (!filterFailedQC || value.getFilterPassed() == null || value.getFilterPassed());
-				} while (gotData && !goodRecord);
-
-				if (goodRecord) // goodRecord falso also when we couldn't read any more data
-				{
-					if (qualityEncoding == BaseQualityEncoding.Illumina)
-					{
-						try
-						{
-							// convert illumina to sanger scale
-							SequencedFragment.convertQuality(value.getQuality(), BaseQualityEncoding.Illumina, BaseQualityEncoding.Sanger);
-						} catch (FormatException e) {
-							throw new FormatException(e.getMessage() + " Position: " + makePositionMessage() + "; Sequence ID: " + key);
-						}
-					}
-					else // sanger qualities.
-					{
-						int outOfRangeElement = SequencedFragment.verifyQuality(value.getQuality(), BaseQualityEncoding.Sanger);
-						if (outOfRangeElement >= 0)
-						{
-							throw new FormatException("fastq base quality score out of range for Sanger Phred+33 format (found " +
-									(value.getQuality().getBytes()[outOfRangeElement] - FormatConstants.SANGER_OFFSET) + ").\n" +
-									"Although Sanger format has been requested, maybe qualities are in Illumina Phred+64 format?\n" +
-									"Position: " + makePositionMessage() + "; Sequence ID: " + key);
-						}
-					}
-				}
-				return goodRecord;
-			}
-			catch (EOFException e) {
-				throw new RuntimeException("unexpected end of file in fastq record at " + makePositionMessage() + ".  Id: " + key.toString());
-			}
-		}
-
-		private void scanNameForReadNumber(Text name, SequencedFragment fragment)
-		{
-			// look for a /[0-9] at the end of the name
-			if (name.getLength() >= 2)
-			{
-				byte[] bytes = name.getBytes();
-				int last = name.getLength() - 1;
-
-				if (bytes[last-1] == '/' && bytes[last] >= '0' && bytes[last] <= '9')
-					fragment.setRead(bytes[last] - '0');
-			}
-		}
-
-		private boolean scanIlluminaId(Text name, SequencedFragment fragment)
-		{
-			Matcher m = ILLUMINA_PATTERN.matcher(name.toString());
-			boolean matches = m.matches();
-			if (matches)
-			{
-				fragment.setInstrument(m.group(1));
-				fragment.setRunNumber(Integer.parseInt(m.group(2)));
-				fragment.setFlowcellId(m.group(3));
-				fragment.setLane(Integer.parseInt(m.group(4)));
-				fragment.setTile(Integer.parseInt(m.group(5)));
-				fragment.setXpos(Integer.parseInt(m.group(6)));
-				fragment.setYpos(Integer.parseInt(m.group(7)));
-				fragment.setRead(Integer.parseInt(m.group(8)));
-				fragment.setFilterPassed("N".equals(m.group(9)));
-				fragment.setControlNumber(Integer.parseInt(m.group(10)));
-				fragment.setIndexSequence(m.group(11));
-			}
-			return matches;
-		}
-
-		private int readLineInto(Text dest) throws EOFException, IOException
-		{
-			int bytesRead = lineReader.readLine(dest, MAX_LINE_LENGTH);
-			if (bytesRead <= 0)
-				throw new EOFException();
-			pos += bytesRead;
-			return bytesRead;
-		}
-	}
-
-	@Override
-	public boolean isSplitable(JobContext context, Path path)
-	{
-		CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
-		return codec == null;
-	}
-
-	public RecordReader<Text, SequencedFragment> createRecordReader(
-	                                        InputSplit genericSplit,
-	                                        TaskAttemptContext context) throws IOException, InterruptedException
-	{
-		context.setStatus(genericSplit.toString());
-		return new FastqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
-	}
+public class FastqInputFormat extends FileInputFormat<Text, SequencedFragment> {
+
+  public static final String CONF_BASE_QUALITY_ENCODING = "hbam.fastq-input.base-quality-encoding";
+  public static final String CONF_FILTER_FAILED_QC = "hbam.fastq-input.filter-failed-qc";
+  public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "sanger";
+
+  @Override
+  public boolean isSplitable(JobContext context, Path path) {
+    CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
+    return codec == null;
+  }
+
+  public RecordReader<Text, SequencedFragment> createRecordReader(
+      InputSplit genericSplit, TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    context.setStatus(genericSplit.toString());
+    return new FastqRecordReader(
+        context.getConfiguration(),
+        (FileSplit) genericSplit); // cast as per example in TextInputFormat
+  }
+
+  public static class FastqRecordReader extends RecordReader<Text, SequencedFragment> {
+    /*
+     * fastq format:
+     * <fastq>	:=	<block>+
+     * <block>	:=	@<seqname>\n<seq>\n+[<seqname>]\n<qual>\n
+     * <seqname>	:=	[A-Za-z0-9_.:-]+
+     * <seq>	:=	[A-Za-z\n\.~]+
+     * <qual>	:=	[!-~\n]+
+     *
+     * LP: this format is broken, no?  You can have multi-line sequence and quality strings,
+     * and the quality encoding includes '@' in its valid character range.  So how should one
+     * distinguish between \n@ as a record delimiter and and \n@ as part of a multi-line
+     * quality string?
+     *
+     * For now I'm going to assume single-line sequences.  This works for our sequencing
+     * application.  We'll see if someone complains in other applications.
+     */
+
+    private static final Pattern ILLUMINA_PATTERN =
+        Pattern.compile(
+            "([^:]+):(\\d+):([^:]*):(\\d+):(\\d+):(-?\\d+):(-?\\d+)\\s+([123]):([YN]):(\\d+):(.*)");
+    // How long can a read get?
+    private static final int MAX_LINE_LENGTH = 10000;
+    // start:  first valid data index
+    private long start;
+    // end:  first index value beyond the slice, i.e. slice is in range [start,end)
+    private long end;
+    // pos: current position in file
+    private long pos;
+    // file:  the file being read
+    private Path file;
+    private LineReader lineReader;
+    private InputStream inputStream;
+    private Text currentKey = new Text();
+    private SequencedFragment currentValue = new SequencedFragment();
+    /* If true, will scan the identifier for read data as specified in the Casava
+     * users' guide v1.8:
+     * @<instrument>:<run number>:<flowcell ID>:<lane>:<tile>:<x-pos>:<y-pos> <read>:<is filtered>:<control number>:<index sequence>
+     * After the first name that doesn't match lookForIlluminaIdentifier will be
+     * set to false and no further scanning will be done.
+     */
+    private boolean lookForIlluminaIdentifier = true;
+    private Text buffer = new Text();
+    private BaseQualityEncoding qualityEncoding;
+    private boolean filterFailedQC = false;
+
+    public FastqRecordReader(Configuration conf, FileSplit split) throws IOException {
+      setConf(conf);
+      file = split.getPath();
+      start = split.getStart();
+      end = start + split.getLength();
+
+      FileSystem fs = file.getFileSystem(conf);
+      FSDataInputStream fileIn = fs.open(file);
+
+      CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
+      CompressionCodec codec = codecFactory.getCodec(file);
+
+      if (codec == null) // no codec.  Uncompressed file.
+      {
+        positionAtFirstRecord(fileIn);
+        inputStream = fileIn;
+      } else { // compressed file
+        if (start != 0) {
+          throw new RuntimeException(
+              "Start position for compressed file is not 0! (found " + start + ")");
+        }
+
+        inputStream = codec.createInputStream(fileIn);
+        end = Long.MAX_VALUE; // read until the end of the file
+      }
+
+      lineReader = new LineReader(inputStream);
+    }
+
+    protected void setConf(Configuration conf) {
+      String encoding =
+          conf.get(
+              FastqInputFormat.CONF_BASE_QUALITY_ENCODING,
+              conf.get(
+                  FormatConstants.CONF_INPUT_BASE_QUALITY_ENCODING,
+                  FastqInputFormat.CONF_BASE_QUALITY_ENCODING_DEFAULT));
+
+      if ("illumina".equals(encoding)) {
+        qualityEncoding = BaseQualityEncoding.Illumina;
+      } else if ("sanger".equals(encoding)) {
+        qualityEncoding = BaseQualityEncoding.Sanger;
+      } else {
+        throw new RuntimeException("Unknown input base quality encoding value " + encoding);
+      }
+
+      filterFailedQC =
+          ConfHelper.parseBoolean(
+              conf.get(
+                  FastqInputFormat.CONF_FILTER_FAILED_QC,
+                  conf.get(FormatConstants.CONF_INPUT_FILTER_FAILED_QC)),
+              false);
+    }
+
+    /*
+     * Position the input stream at the start of the first record.
+     */
+    private void positionAtFirstRecord(FSDataInputStream stream) throws IOException {
+      if (start > 0) {
+        // Advance to the start of the first record
+        // We use a temporary LineReader to read lines until we find the
+        // position of the right one.  We then seek the file to that position.
+        stream.seek(start);
+        LineReader reader = new LineReader(stream);
+
+        int bytesRead = 0;
+        do {
+          bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
+          if (bytesRead > 0 && (buffer.getLength() <= 0 || buffer.getBytes()[0] != '@')) {
+            start += bytesRead;
+          } else {
+            // line starts with @.  Read two more and verify that it starts with a +
+            //
+            // If this isn't the start of a record, we want to backtrack to its end
+            long backtrackPosition = start + bytesRead;
+
+            bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
+            bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
+            if (bytesRead > 0 && buffer.getLength() > 0 && buffer.getBytes()[0] == '+') {
+              break; // all good!
+            } else {
+              // backtrack to the end of the record we thought was the start.
+              start = backtrackPosition;
+              stream.seek(start);
+              reader = new LineReader(stream);
+            }
+          }
+        } while (bytesRead > 0);
+
+        stream.seek(start);
+      }
+      // else
+      //	if start == 0 we presume it starts with a valid fastq record
+      pos = start;
+    }
+
+    /** Added to use mapreduce API. */
+    public void initialize(InputSplit split, TaskAttemptContext context)
+        throws IOException, InterruptedException {}
+
+    /** Added to use mapreduce API. */
+    public Text getCurrentKey() {
+      return currentKey;
+    }
+
+    /** Added to use mapreduce API. */
+    public SequencedFragment getCurrentValue() {
+      return currentValue;
+    }
+
+    /** Added to use mapreduce API. */
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      return next(currentKey, currentValue);
+    }
+
+    /** Close this RecordReader to future operations. */
+    public void close() throws IOException {
+      inputStream.close();
+    }
+
+    /** Create an object of the appropriate type to be used as a key. */
+    public Text createKey() {
+      return new Text();
+    }
+
+    /** Create an object of the appropriate type to be used as a value. */
+    public SequencedFragment createValue() {
+      return new SequencedFragment();
+    }
+
+    /** Returns the current position in the input. */
+    public long getPos() {
+      return pos;
+    }
+
+    /** How much of the input has the RecordReader consumed i.e. */
+    public float getProgress() {
+      if (start == end) {
+        return 1.0f;
+      } else {
+        return Math.min(1.0f, (pos - start) / (float) (end - start));
+      }
+    }
+
+    public String makePositionMessage() {
+      return file.toString() + ":" + pos;
+    }
+
+    protected boolean lowLevelFastqRead(Text key, SequencedFragment value) throws IOException {
+      // ID line
+      long skipped = lineReader.skip(1); // skip @
+      pos += skipped;
+      if (skipped == 0) {
+        return false; // EOF
+      }
+
+      // ID
+      readLineInto(key);
+      // sequence
+      value.clear();
+      readLineInto(value.getSequence());
+      readLineInto(buffer);
+      if (buffer.getLength() == 0 || buffer.getBytes()[0] != '+') {
+        throw new RuntimeException(
+            "unexpected fastq line separating sequence and quality at "
+                + makePositionMessage()
+                + ". Line: "
+                + buffer
+                + ". \nSequence ID: "
+                + key);
+      }
+      readLineInto(value.getQuality());
+
+      // look for the Illumina-formatted name.  Once it isn't found lookForIlluminaIdentifier will
+      // be set to false
+      lookForIlluminaIdentifier = lookForIlluminaIdentifier && scanIlluminaId(key, value);
+      if (!lookForIlluminaIdentifier) {
+        scanNameForReadNumber(key, value);
+      }
+      return true;
+    }
+
+    /** Reads the next key/value pair from the input for processing. */
+    public boolean next(Text key, SequencedFragment value) throws IOException {
+      if (pos >= end) {
+        return false; // past end of slice
+      }
+      try {
+        boolean gotData;
+        boolean goodRecord;
+        do {
+          gotData = lowLevelFastqRead(key, value);
+          goodRecord =
+              gotData
+                  && (!filterFailedQC
+                      || value.getFilterPassed() == null
+                      || value.getFilterPassed());
+        } while (gotData && !goodRecord);
+
+        if (goodRecord) // goodRecord falso also when we couldn't read any more data
+        {
+          if (qualityEncoding == BaseQualityEncoding.Illumina) {
+            try {
+              // convert illumina to sanger scale
+              SequencedFragment.convertQuality(
+                  value.getQuality(), BaseQualityEncoding.Illumina, BaseQualityEncoding.Sanger);
+            } catch (FormatException e) {
+              throw new FormatException(
+                  e.getMessage() + " Position: " + makePositionMessage() + "; Sequence ID: " + key);
+            }
+          } else // sanger qualities.
+          {
+            int outOfRangeElement =
+                SequencedFragment.verifyQuality(value.getQuality(), BaseQualityEncoding.Sanger);
+            if (outOfRangeElement >= 0) {
+              throw new FormatException(
+                  "fastq base quality score out of range for Sanger Phred+33 format (found "
+                      + (value.getQuality().getBytes()[outOfRangeElement]
+                          - FormatConstants.SANGER_OFFSET)
+                      + ").\n"
+                      + "Although Sanger format has been requested, maybe qualities are in Illumina Phred+64 format?\n"
+                      + "Position: "
+                      + makePositionMessage()
+                      + "; Sequence ID: "
+                      + key);
+            }
+          }
+        }
+        return goodRecord;
+      } catch (EOFException e) {
+        throw new RuntimeException(
+            "unexpected end of file in fastq record at "
+                + makePositionMessage()
+                + ".  Id: "
+                + key.toString());
+      }
+    }
+
+    private void scanNameForReadNumber(Text name, SequencedFragment fragment) {
+      // look for a /[0-9] at the end of the name
+      if (name.getLength() >= 2) {
+        byte[] bytes = name.getBytes();
+        int last = name.getLength() - 1;
+
+        if (bytes[last - 1] == '/' && bytes[last] >= '0' && bytes[last] <= '9') {
+          fragment.setRead(bytes[last] - '0');
+        }
+      }
+    }
+
+    private boolean scanIlluminaId(Text name, SequencedFragment fragment) {
+      Matcher m = ILLUMINA_PATTERN.matcher(name.toString());
+      boolean matches = m.matches();
+      if (matches) {
+        fragment.setInstrument(m.group(1));
+        fragment.setRunNumber(Integer.parseInt(m.group(2)));
+        fragment.setFlowcellId(m.group(3));
+        fragment.setLane(Integer.parseInt(m.group(4)));
+        fragment.setTile(Integer.parseInt(m.group(5)));
+        fragment.setXpos(Integer.parseInt(m.group(6)));
+        fragment.setYpos(Integer.parseInt(m.group(7)));
+        fragment.setRead(Integer.parseInt(m.group(8)));
+        fragment.setFilterPassed("N".equals(m.group(9)));
+        fragment.setControlNumber(Integer.parseInt(m.group(10)));
+        fragment.setIndexSequence(m.group(11));
+      }
+      return matches;
+    }
+
+    private int readLineInto(Text dest) throws EOFException, IOException {
+      int bytesRead = lineReader.readLine(dest, MAX_LINE_LENGTH);
+      if (bytesRead <= 0) {
+        throw new EOFException();
+      }
+      pos += bytesRead;
+      return bytesRead;
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/FastqOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/FastqOutputFormat.java
index 3550c89..d6ed1d7 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/FastqOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/FastqOutputFormat.java
@@ -23,9 +23,8 @@
 package org.seqdoop.hadoop_bam;
 
 import java.io.DataOutputStream;
-import java.io.OutputStream;
 import java.io.IOException;
-
+import java.io.OutputStream;
 import java.nio.charset.Charset;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
@@ -38,147 +37,150 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 import org.apache.hadoop.util.ReflectionUtils;
-
 import org.seqdoop.hadoop_bam.FormatConstants.BaseQualityEncoding;
 
-/**
- * Output format for the fastq format.
- */
+/** Output format for the fastq format. */
 // If a key is provided with the SequencedFragment, the key is used as the sequence
 // id and the meta-info from the SequencedFragment (if any) is dropped.
 // If the key is null, then the format will attempt to create an
 // Illumina-style fastq id as specified in the Casava users' guide v1.8:
-// @instrument:run number:flowcell ID:lane:tile:x-pos:y-pos \s+ read:is filtered:control number:index sequence
+// @instrument:run number:flowcell ID:lane:tile:x-pos:y-pos \s+ read:is filtered:control
+// number:index sequence
 //
-public class FastqOutputFormat extends TextOutputFormat<Text, SequencedFragment>
-{
-	public static final String CONF_BASE_QUALITY_ENCODING         = "hbam.fastq-output.base-quality-encoding";
-	public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "sanger";
-	public static final Charset UTF8 = Charset.forName("UTF8");
-
-	static final byte[] PLUS_LINE;
-	static {
-		try {
-			PLUS_LINE = "\n+\n".getBytes("us-ascii");
-		} catch (java.io.UnsupportedEncodingException e) {
-			throw new RuntimeException("us-ascii encoding not supported!");
-		}
-	}
-
-	public static class FastqRecordWriter extends RecordWriter<Text,SequencedFragment>
-	{
-		protected StringBuilder       sBuilder          = new StringBuilder(800);
-		protected Text                buffer            = new Text();
-		protected OutputStream        out;
-		protected BaseQualityEncoding baseQualityFormat;
-
-		public FastqRecordWriter(Configuration conf, OutputStream out)
-		{
-			this.out = out;
-			setConf(conf);
-		}
-
-		public void setConf(Configuration conf)
-		{
-			String setting = conf.get(CONF_BASE_QUALITY_ENCODING, CONF_BASE_QUALITY_ENCODING_DEFAULT);
-			if ("illumina".equals(setting))
-				baseQualityFormat = BaseQualityEncoding.Illumina;
-			else if ("sanger".equals(setting))
-				baseQualityFormat = BaseQualityEncoding.Sanger;
-			else
-				throw new RuntimeException("Invalid property value '" + setting + "' for " + CONF_BASE_QUALITY_ENCODING + ".  Valid values are 'illumina' or 'sanger'");
-		}
-
-		protected String makeId(SequencedFragment seq) throws IOException
-		{
-			String delim = ":";
-			sBuilder.delete(0, sBuilder.length()); // clear
-
-			sBuilder.append( seq.getInstrument() == null ? "" : seq.getInstrument() ).append(delim);
-			sBuilder.append( seq.getRunNumber()  == null ? "" : seq.getRunNumber().toString() ).append(delim);
-			sBuilder.append( seq.getFlowcellId() == null ? "" : seq.getFlowcellId() ).append(delim);
-			sBuilder.append( seq.getLane()       == null ? "" : seq.getLane().toString() ).append(delim);
-			sBuilder.append( seq.getTile()       == null ? "" : seq.getTile().toString() ).append(delim);
-			sBuilder.append( seq.getXpos()       == null ? "" : seq.getXpos().toString() ).append(delim);
-			sBuilder.append( seq.getYpos()       == null ? "" : seq.getYpos().toString() );
-
-			sBuilder.append(" "); // space
-
-			sBuilder.append( seq.getRead()       == null ? "" : seq.getRead().toString() ).append(delim);
-			sBuilder.append(seq.getFilterPassed() == null || seq.getFilterPassed() ? "N" : "Y");
-			sBuilder.append(delim);
-
-			sBuilder.append( seq.getControlNumber() == null ? "0" : seq.getControlNumber().toString()).append(delim);
-			sBuilder.append( seq.getIndexSequence() == null ? "" : seq.getIndexSequence());
-
-			return sBuilder.toString();
-		}
-
-		public void write(Text key, SequencedFragment seq) throws IOException
-		{
-			// write the id line
-			out.write('@');
-			if (key != null)
-				out.write(key.getBytes(), 0, key.getLength());
-			else
-				out.write(makeId(seq).getBytes(UTF8));
-			out.write('\n');
-
-			// write the sequence and separator
-			out.write(seq.getSequence().getBytes(), 0, seq.getSequence().getLength());
-			out.write(PLUS_LINE);
-
-			// now the quality
-			if (baseQualityFormat == BaseQualityEncoding.Sanger)
-				out.write(seq.getQuality().getBytes(), 0, seq.getQuality().getLength());
-			else if (baseQualityFormat == BaseQualityEncoding.Illumina)
-			{
-				buffer.set(seq.getQuality());
-				SequencedFragment.convertQuality(buffer, BaseQualityEncoding.Sanger, baseQualityFormat);
-				out.write(buffer.getBytes(), 0, buffer.getLength());
-			}
-			else
-				throw new RuntimeException("FastqOutputFormat: unknown base quality format " + baseQualityFormat);
-
-			// and the final newline
-			out.write('\n');
-		}
-
-		public void close(TaskAttemptContext task) throws IOException
-		{
-			out.close();
-		}
+public class FastqOutputFormat extends TextOutputFormat<Text, SequencedFragment> {
+
+  public static final String CONF_BASE_QUALITY_ENCODING = "hbam.fastq-output.base-quality-encoding";
+  public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "sanger";
+  public static final Charset UTF8 = Charset.forName("UTF8");
+
+  static final byte[] PLUS_LINE;
+
+  static {
+    try {
+      PLUS_LINE = "\n+\n".getBytes("us-ascii");
+    } catch (java.io.UnsupportedEncodingException e) {
+      throw new RuntimeException("us-ascii encoding not supported!");
+    }
+  }
+
+  public RecordWriter<Text, SequencedFragment> getRecordWriter(TaskAttemptContext task)
+      throws IOException {
+    Configuration conf = task.getConfiguration();
+    boolean isCompressed = getCompressOutput(task);
+
+    CompressionCodec codec = null;
+    String extension = "";
+
+    if (isCompressed) {
+      Class<? extends CompressionCodec> codecClass =
+          getOutputCompressorClass(task, GzipCodec.class);
+      codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
+      extension = codec.getDefaultExtension();
+    }
+
+    Path file = getDefaultWorkFile(task, extension);
+    FileSystem fs = file.getFileSystem(conf);
+
+    OutputStream output;
+
+    if (isCompressed) {
+      FSDataOutputStream fileOut = fs.create(file, false);
+      output = new DataOutputStream(codec.createOutputStream(fileOut));
+    } else {
+      output = fs.create(file, false);
+    }
+
+    return new FastqRecordWriter(conf, output);
   }
 
-	public RecordWriter<Text,SequencedFragment> getRecordWriter(TaskAttemptContext task)
-	  throws IOException
-	{
-		Configuration conf = task.getConfiguration();
-		boolean isCompressed = getCompressOutput(task);
-
-		CompressionCodec codec = null;
-		String extension = "";
-
-		if (isCompressed)
-		{
-			Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
-			codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
-			extension = codec.getDefaultExtension();
-		}
-
-		Path file = getDefaultWorkFile(task, extension);
-		FileSystem fs = file.getFileSystem(conf);
-
-		OutputStream output;
-
-		if (isCompressed)
-		{
-			FSDataOutputStream fileOut = fs.create(file, false);
-			output = new DataOutputStream(codec.createOutputStream(fileOut));
-		}
-		else
-			output = fs.create(file, false);
-
-		return new FastqRecordWriter(conf, output);
-	}
+  public static class FastqRecordWriter extends RecordWriter<Text, SequencedFragment> {
+
+    protected StringBuilder sBuilder = new StringBuilder(800);
+    protected Text buffer = new Text();
+    protected OutputStream out;
+    protected BaseQualityEncoding baseQualityFormat;
+
+    public FastqRecordWriter(Configuration conf, OutputStream out) {
+      this.out = out;
+      setConf(conf);
+    }
+
+    public void setConf(Configuration conf) {
+      String setting = conf.get(CONF_BASE_QUALITY_ENCODING, CONF_BASE_QUALITY_ENCODING_DEFAULT);
+      if ("illumina".equals(setting)) {
+        baseQualityFormat = BaseQualityEncoding.Illumina;
+      } else if ("sanger".equals(setting)) {
+        baseQualityFormat = BaseQualityEncoding.Sanger;
+      } else {
+        throw new RuntimeException(
+            "Invalid property value '"
+                + setting
+                + "' for "
+                + CONF_BASE_QUALITY_ENCODING
+                + ".  Valid values are 'illumina' or 'sanger'");
+      }
+    }
+
+    protected String makeId(SequencedFragment seq) throws IOException {
+      String delim = ":";
+      sBuilder.delete(0, sBuilder.length()); // clear
+
+      sBuilder.append(seq.getInstrument() == null ? "" : seq.getInstrument()).append(delim);
+      sBuilder
+          .append(seq.getRunNumber() == null ? "" : seq.getRunNumber().toString())
+          .append(delim);
+      sBuilder.append(seq.getFlowcellId() == null ? "" : seq.getFlowcellId()).append(delim);
+      sBuilder.append(seq.getLane() == null ? "" : seq.getLane().toString()).append(delim);
+      sBuilder.append(seq.getTile() == null ? "" : seq.getTile().toString()).append(delim);
+      sBuilder.append(seq.getXpos() == null ? "" : seq.getXpos().toString()).append(delim);
+      sBuilder.append(seq.getYpos() == null ? "" : seq.getYpos().toString());
+
+      sBuilder.append(" "); // space
+
+      sBuilder.append(seq.getRead() == null ? "" : seq.getRead().toString()).append(delim);
+      sBuilder.append(seq.getFilterPassed() == null || seq.getFilterPassed() ? "N" : "Y");
+      sBuilder.append(delim);
+
+      sBuilder
+          .append(seq.getControlNumber() == null ? "0" : seq.getControlNumber().toString())
+          .append(delim);
+      sBuilder.append(seq.getIndexSequence() == null ? "" : seq.getIndexSequence());
+
+      return sBuilder.toString();
+    }
+
+    public void write(Text key, SequencedFragment seq) throws IOException {
+      // write the id line
+      out.write('@');
+      if (key != null) {
+        out.write(key.getBytes(), 0, key.getLength());
+      } else {
+        out.write(makeId(seq).getBytes(UTF8));
+      }
+      out.write('\n');
+
+      // write the sequence and separator
+      out.write(seq.getSequence().getBytes(), 0, seq.getSequence().getLength());
+      out.write(PLUS_LINE);
+
+      // now the quality
+      if (baseQualityFormat == BaseQualityEncoding.Sanger) {
+        out.write(seq.getQuality().getBytes(), 0, seq.getQuality().getLength());
+      } else if (baseQualityFormat == BaseQualityEncoding.Illumina) {
+        buffer.set(seq.getQuality());
+        SequencedFragment.convertQuality(buffer, BaseQualityEncoding.Sanger, baseQualityFormat);
+        out.write(buffer.getBytes(), 0, buffer.getLength());
+      } else {
+        throw new RuntimeException(
+            "FastqOutputFormat: unknown base quality format " + baseQualityFormat);
+      }
+
+      // and the final newline
+      out.write('\n');
+    }
+
+    public void close(TaskAttemptContext task) throws IOException {
+      out.close();
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/FileVirtualSplit.java b/src/main/java/org/seqdoop/hadoop_bam/FileVirtualSplit.java
index 5aa2a8a..506fabe 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/FileVirtualSplit.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/FileVirtualSplit.java
@@ -22,105 +22,127 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.DataOutput;
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.mapreduce.InputSplit;
 
-/** Like a {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}, but uses
- * BGZF virtual offsets to fit with {@link
- * htsjdk.samtools.util.BlockCompressedInputStream}.
+/**
+ * Like a {@link org.apache.hadoop.mapreduce.lib.input.FileSplit}, but uses BGZF virtual offsets to
+ * fit with {@link htsjdk.samtools.util.BlockCompressedInputStream}.
  */
 public class FileVirtualSplit extends InputSplit implements Writable {
-	private Path file;
-	private long vStart;
-	private long vEnd;
-	private final String[] locations;
-	private long[] intervalFilePointers;
-
-	private static final String[] NO_LOCATIONS = {};
-
-	public FileVirtualSplit() { locations = NO_LOCATIONS; }
-
-	public FileVirtualSplit(Path f, long vs, long ve, String[] locs) {
-		file      = f;
-		vStart    = vs;
-		vEnd      = ve;
-		locations = locs;
-	}
-
-	public FileVirtualSplit(Path f, long vs, long ve, String[] locs, long[] intervalFilePointers) {
-		file      = f;
-		vStart    = vs;
-		vEnd      = ve;
-		locations = locs;
-		this.intervalFilePointers = intervalFilePointers;
-	}
-
-	@Override public String[] getLocations() { return locations; }
-
-	/** Inexact due to the nature of virtual offsets.
-    *
-    * We can't know how many blocks there are in between two file offsets, nor
-    * how large those blocks are. So this uses only the difference between the
-    * file offsets—unless that difference is zero, in which case the split is
-    * wholly contained in one block and thus we can give an exact result.
-	 */
-	@Override public long getLength() {
-		final long vsHi   = vStart & ~0xffff;
-		final long veHi   = vEnd   & ~0xffff;
-		final long hiDiff = veHi - vsHi;
-		return hiDiff == 0 ? ((vEnd & 0xffff) - (vStart & 0xffff)) : hiDiff;
-	}
-
-	public Path getPath() { return file; }
-
-	/** Inclusive. */
-	public long getStartVirtualOffset() { return vStart; }
-
-	/** Exclusive. */
-	public long   getEndVirtualOffset() { return vEnd;   }
-
-	public void setStartVirtualOffset(long vo) { vStart = vo; }
-	public void   setEndVirtualOffset(long vo) { vEnd   = vo; }
-
-	/**
-	 * @return pairs of virtual file pointers for all intervals that should be used for
-	 * filtering the split, or <code>null</code> if there are none. These correspond to
-	 * BAMFileSpan chunk start/stop pointers in htsjdk.
-	 */
-	public long[] getIntervalFilePointers() {
-		return intervalFilePointers;
-	}
-
-	@Override public void write(DataOutput out) throws IOException {
-		Text.writeString(out, file.toString());
-		out.writeLong(vStart);
-		out.writeLong(vEnd);
-		out.writeBoolean(intervalFilePointers != null);
-		if (intervalFilePointers != null) {
-			out.writeInt(intervalFilePointers.length);
-			for (int i = 0; i < intervalFilePointers.length; i++) {
-				out.writeLong(intervalFilePointers[i]);
-			}
-		}
-	}
-	@Override public void readFields(DataInput in) throws IOException {
-		file   = new Path(Text.readString(in));
-		vStart = in.readLong();
-		vEnd   = in.readLong();
-		if (in.readBoolean()) {
-			intervalFilePointers = new long[in.readInt()];
-			for (int i = 0; i < intervalFilePointers.length; i++) {
-				intervalFilePointers[i] = in.readLong();
-			}
-		}
-	}
-
-	@Override
-	public String toString() { return file + ":" + vStart + "-" + vEnd; }
+
+  private static final String[] NO_LOCATIONS = {};
+  private final String[] locations;
+  private Path file;
+  private long vStart;
+  private long vEnd;
+  private long[] intervalFilePointers;
+
+  public FileVirtualSplit() {
+    locations = NO_LOCATIONS;
+  }
+
+  public FileVirtualSplit(Path f, long vs, long ve, String[] locs) {
+    file = f;
+    vStart = vs;
+    vEnd = ve;
+    locations = locs;
+  }
+
+  public FileVirtualSplit(Path f, long vs, long ve, String[] locs, long[] intervalFilePointers) {
+    file = f;
+    vStart = vs;
+    vEnd = ve;
+    locations = locs;
+    this.intervalFilePointers = intervalFilePointers;
+  }
+
+  @Override
+  public String[] getLocations() {
+    return locations;
+  }
+
+  /**
+   * Inexact due to the nature of virtual offsets.
+   *
+   * <p>We can't know how many blocks there are in between two file offsets, nor how large those
+   * blocks are. So this uses only the difference between the file offsets—unless that difference is
+   * zero, in which case the split is wholly contained in one block and thus we can give an exact
+   * result.
+   */
+  @Override
+  public long getLength() {
+    final long vsHi = vStart & ~0xffff;
+    final long veHi = vEnd & ~0xffff;
+    final long hiDiff = veHi - vsHi;
+    return hiDiff == 0 ? ((vEnd & 0xffff) - (vStart & 0xffff)) : hiDiff;
+  }
+
+  public Path getPath() {
+    return file;
+  }
+
+  /** Inclusive. */
+  public long getStartVirtualOffset() {
+    return vStart;
+  }
+
+  public void setStartVirtualOffset(long vo) {
+    vStart = vo;
+  }
+
+  /** Exclusive. */
+  public long getEndVirtualOffset() {
+    return vEnd;
+  }
+
+  public void setEndVirtualOffset(long vo) {
+    vEnd = vo;
+  }
+
+  /**
+   * @return pairs of virtual file pointers for all intervals that should be used for filtering the
+   *     split, or <code>null</code> if there are none. These correspond to BAMFileSpan chunk
+   *     start/stop pointers in htsjdk.
+   */
+  public long[] getIntervalFilePointers() {
+    return intervalFilePointers;
+  }
+
+  @Override
+  public void write(DataOutput out) throws IOException {
+    Text.writeString(out, file.toString());
+    out.writeLong(vStart);
+    out.writeLong(vEnd);
+    out.writeBoolean(intervalFilePointers != null);
+    if (intervalFilePointers != null) {
+      out.writeInt(intervalFilePointers.length);
+      for (int i = 0; i < intervalFilePointers.length; i++) {
+        out.writeLong(intervalFilePointers[i]);
+      }
+    }
+  }
+
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    file = new Path(Text.readString(in));
+    vStart = in.readLong();
+    vEnd = in.readLong();
+    if (in.readBoolean()) {
+      intervalFilePointers = new long[in.readInt()];
+      for (int i = 0; i < intervalFilePointers.length; i++) {
+        intervalFilePointers[i] = in.readLong();
+      }
+    }
+  }
+
+  @Override
+  public String toString() {
+    return file + ":" + vStart + "-" + vEnd;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/FormatConstants.java b/src/main/java/org/seqdoop/hadoop_bam/FormatConstants.java
index ec54531..bc02670 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/FormatConstants.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/FormatConstants.java
@@ -22,38 +22,33 @@
 
 package org.seqdoop.hadoop_bam;
 
-public class FormatConstants
-{
-	/**
-	 * Offset by which Sanger-style ASCII-encoded quality scores are shifted.
-	 */
-	public static final int SANGER_OFFSET = 33;
-
-	/**
-	 * Maximum encodable quality score for Sanger Phred+33 encoded base qualities.
-	 *
-	 * Range of legal values is [0,93], according to wikipedia on 10/9/2013:
-	 *   http://en.wikipedia.org/wiki/FASTQ_format#Quality
-	 */
-	public static final int SANGER_MAX = 93;
-
-	/**
-	 * Offset by which Illumina-style ASCII-encoded quality scores are shifted.
-	 */
-	public static final int ILLUMINA_OFFSET = 64;
-
-	/**
-	 * Maximum encodable quality score for Illumina Phred+64 encoded base qualities.
-	 */
-	public static final int ILLUMINA_MAX = 62;
-
-	/**
-	 * Encodings for base quality formats.
-	 */
-	public enum BaseQualityEncoding { Illumina, Sanger };
-
-	private FormatConstants() {} // no instantiation
-
-	public static final String CONF_INPUT_BASE_QUALITY_ENCODING = "hbam.input.base-quality-encoding";
-	public static final String CONF_INPUT_FILTER_FAILED_QC      = "hbam.input.filter-failed-qc";
+public class FormatConstants {
+
+  /** Offset by which Sanger-style ASCII-encoded quality scores are shifted. */
+  public static final int SANGER_OFFSET = 33;
+
+  /**
+   * Maximum encodable quality score for Sanger Phred+33 encoded base qualities.
+   *
+   * <p>Range of legal values is [0,93], according to wikipedia on 10/9/2013:
+   * http://en.wikipedia.org/wiki/FASTQ_format#Quality
+   */
+  public static final int SANGER_MAX = 93;
+
+  /** Offset by which Illumina-style ASCII-encoded quality scores are shifted. */
+  public static final int ILLUMINA_OFFSET = 64;
+
+  /** Maximum encodable quality score for Illumina Phred+64 encoded base qualities. */
+  public static final int ILLUMINA_MAX = 62;
+
+  public static final String CONF_INPUT_BASE_QUALITY_ENCODING = "hbam.input.base-quality-encoding";;
+
+  public static final String CONF_INPUT_FILTER_FAILED_QC = "hbam.input.filter-failed-qc";
+
+  private FormatConstants() {} // no instantiation
+  /** Encodings for base quality formats. */
+  public enum BaseQualityEncoding {
+    Illumina,
+    Sanger
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/FormatException.java b/src/main/java/org/seqdoop/hadoop_bam/FormatException.java
index 208904b..ad9671c 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/FormatException.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/FormatException.java
@@ -22,11 +22,11 @@
 
 package org.seqdoop.hadoop_bam;
 
-public class FormatException extends RuntimeException
-{
-	private static final long serialVersionUID = 1L;
-	public FormatException(String msg)
-	{
-		super(msg);
-	}
+public class FormatException extends RuntimeException {
+
+  private static final long serialVersionUID = 1L;
+
+  public FormatException(String msg) {
+    super(msg);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringAnySAMOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringAnySAMOutputFormat.java
index 8093de0..89d01a7 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringAnySAMOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringAnySAMOutputFormat.java
@@ -22,107 +22,102 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.samtools.SAMFileHeader;
 import java.io.IOException;
 import java.io.InputStream;
-
-import htsjdk.samtools.SAMFileHeader;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
-/** Writes only the SAM records, not the key.
+/**
+ * Writes only the SAM records, not the key.
  *
- * <p>A {@link SAMFileHeader} must be provided via {@link #setSAMHeader} or
- * {@link #readSAMHeaderFrom} before {@link #getRecordWriter} is called.</p>
+ * <p>A {@link SAMFileHeader} must be provided via {@link #setSAMHeader} or {@link
+ * #readSAMHeaderFrom} before {@link #getRecordWriter} is called.
  *
- * <p>By default, writes the SAM header to the output file(s). This
- * can be disabled, because in distributed usage one often ends up with (and,
- * for decent performance, wants to end up with) the output split into multiple
- * parts, which are easier to concatenate if the header is not present in each
- * file.</p>
+ * <p>By default, writes the SAM header to the output file(s). This can be disabled, because in
+ * distributed usage one often ends up with (and, for decent performance, wants to end up with) the
+ * output split into multiple parts, which are easier to concatenate if the header is not present in
+ * each file.
  */
 public class KeyIgnoringAnySAMOutputFormat<K> extends AnySAMOutputFormat<K> {
 
-	protected SAMFileHeader header;
-
-	/** Whether the header will be written, defaults to true..
-	 */
-	public static final String WRITE_HEADER_PROPERTY =
-		"hadoopbam.anysam.write-header";
-
-	public KeyIgnoringAnySAMOutputFormat(SAMFormat fmt) {
-		super(fmt);
-	}
-	public KeyIgnoringAnySAMOutputFormat(Configuration conf) {
-		super(conf);
-
-		if (format == null)
-			throw new IllegalArgumentException(
-				"unknown SAM format: OUTPUT_SAM_FORMAT_PROPERTY not set");
-	}
-	public KeyIgnoringAnySAMOutputFormat(Configuration conf, Path path) {
-		super(conf);
-
-		if (format == null) {
-			format = SAMFormat.inferFromFilePath(path);
-
-			if (format == null)
-				throw new IllegalArgumentException("unknown SAM format: " + path);
-		}
-	}
-
-	public SAMFileHeader getSAMHeader() { return header; }
-	public void setSAMHeader(SAMFileHeader header) { this.header = header; }
-
-	public void readSAMHeaderFrom(Path path, Configuration conf)
-		throws IOException
-	{
-		this.header = SAMHeaderReader.readSAMHeaderFrom(path, conf);
-	}
-	public void readSAMHeaderFrom(InputStream in, Configuration conf) {
-		this.header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
-	}
-
-	/** <code>setSAMHeader</code> or <code>readSAMHeaderFrom</code> must have
-	 * been called first.
-	 */
-	@Override public RecordWriter<K,SAMRecordWritable> getRecordWriter(
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		return getRecordWriter(ctx, getDefaultWorkFile(ctx, ""));
-	}
-
-	// Allows wrappers to provide their own work file.
-	public RecordWriter<K,SAMRecordWritable> getRecordWriter(
-			TaskAttemptContext ctx, Path out)
-		throws IOException
-	{
-		if (this.header == null)
-			throw new IOException(
-				"Can't create a RecordWriter without the SAM header");
-
-		final boolean writeHeader = ctx.getConfiguration().getBoolean(
-			WRITE_HEADER_PROPERTY, true);
-
-		switch (format) {
-			case BAM:
-				return new KeyIgnoringBAMRecordWriter<K>(
-					out, header, writeHeader, ctx);
-
-			case SAM:
-				return new KeyIgnoringSAMRecordWriter<K>(
-						out, header, writeHeader, ctx);
-
-			case CRAM:
-				return new KeyIgnoringCRAMRecordWriter<K>(
-						out, header, writeHeader, ctx);
-
-			default: assert false; return null;
-		}
-	}
+  /** Whether the header will be written, defaults to true.. */
+  public static final String WRITE_HEADER_PROPERTY = "hadoopbam.anysam.write-header";
+
+  protected SAMFileHeader header;
+
+  public KeyIgnoringAnySAMOutputFormat(SAMFormat fmt) {
+    super(fmt);
+  }
+
+  public KeyIgnoringAnySAMOutputFormat(Configuration conf) {
+    super(conf);
+
+    if (format == null) {
+      throw new IllegalArgumentException("unknown SAM format: OUTPUT_SAM_FORMAT_PROPERTY not set");
+    }
+  }
+
+  public KeyIgnoringAnySAMOutputFormat(Configuration conf, Path path) {
+    super(conf);
+
+    if (format == null) {
+      format = SAMFormat.inferFromFilePath(path);
+
+      if (format == null) {
+        throw new IllegalArgumentException("unknown SAM format: " + path);
+      }
+    }
+  }
+
+  public SAMFileHeader getSAMHeader() {
+    return header;
+  }
+
+  public void setSAMHeader(SAMFileHeader header) {
+    this.header = header;
+  }
+
+  public void readSAMHeaderFrom(Path path, Configuration conf) throws IOException {
+    this.header = SAMHeaderReader.readSAMHeaderFrom(path, conf);
+  }
+
+  public void readSAMHeaderFrom(InputStream in, Configuration conf) {
+    this.header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
+  }
+
+  /** <code>setSAMHeader</code> or <code>readSAMHeaderFrom</code> must have been called first. */
+  @Override
+  public RecordWriter<K, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx)
+      throws IOException {
+    return getRecordWriter(ctx, getDefaultWorkFile(ctx, ""));
+  }
+
+  // Allows wrappers to provide their own work file.
+  public RecordWriter<K, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx, Path out)
+      throws IOException {
+    if (this.header == null) {
+      throw new IOException("Can't create a RecordWriter without the SAM header");
+    }
+
+    final boolean writeHeader = ctx.getConfiguration().getBoolean(WRITE_HEADER_PROPERTY, true);
+
+    switch (format) {
+      case BAM:
+        return new KeyIgnoringBAMRecordWriter<K>(out, header, writeHeader, ctx);
+
+      case SAM:
+        return new KeyIgnoringSAMRecordWriter<K>(out, header, writeHeader, ctx);
+
+      case CRAM:
+        return new KeyIgnoringCRAMRecordWriter<K>(out, header, writeHeader, ctx);
+
+      default:
+        assert false;
+        return null;
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMOutputFormat.java
index 4670214..ea62148 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMOutputFormat.java
@@ -22,72 +22,73 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.samtools.SAMFileHeader;
 import java.io.IOException;
 import java.io.InputStream;
-
-import htsjdk.samtools.SAMFileHeader;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
-/** Writes only the BAM records, not the key.
+/**
+ * Writes only the BAM records, not the key.
  *
- * <p>A {@link SAMFileHeader} must be provided via {@link #setSAMHeader} or
- * {@link #readSAMHeaderFrom} before {@link #getRecordWriter} is called.</p>
+ * <p>A {@link SAMFileHeader} must be provided via {@link #setSAMHeader} or {@link
+ * #readSAMHeaderFrom} before {@link #getRecordWriter} is called.
  *
- * <p>By default, writes the SAM header to the output file(s). This
- * can be disabled, because in distributed usage one often ends up with (and,
- * for decent performance, wants to end up with) the output split into multiple
- * parts, which are easier to concatenate if the header is not present in each
- * file.</p>
+ * <p>By default, writes the SAM header to the output file(s). This can be disabled, because in
+ * distributed usage one often ends up with (and, for decent performance, wants to end up with) the
+ * output split into multiple parts, which are easier to concatenate if the header is not present in
+ * each file.
  */
 public class KeyIgnoringBAMOutputFormat<K> extends BAMOutputFormat<K> {
-	protected SAMFileHeader header;
-	private boolean writeHeader = true;
 
-	public KeyIgnoringBAMOutputFormat() {}
+  protected SAMFileHeader header;
+  private boolean writeHeader = true;
+
+  public KeyIgnoringBAMOutputFormat() {}
+
+  /** Whether the header will be written or not. */
+  public boolean getWriteHeader() {
+    return writeHeader;
+  }
+
+  /** Set whether the header will be written or not. */
+  public void setWriteHeader(boolean b) {
+    writeHeader = b;
+  }
 
-	/** Whether the header will be written or not. */
-	public boolean getWriteHeader()          { return writeHeader; }
+  public SAMFileHeader getSAMHeader() {
+    return header;
+  }
 
-	/** Set whether the header will be written or not. */
-	public void    setWriteHeader(boolean b) { writeHeader = b; }
+  public void setSAMHeader(SAMFileHeader header) {
+    this.header = header;
+  }
 
-	public SAMFileHeader getSAMHeader() { return header; }
-	public void setSAMHeader(SAMFileHeader header) { this.header = header; }
+  public void readSAMHeaderFrom(Path path, Configuration conf) throws IOException {
+    this.header = SAMHeaderReader.readSAMHeaderFrom(path, conf);
+  }
 
-	public void readSAMHeaderFrom(Path path, Configuration conf)
-		throws IOException
-	{
-		this.header = SAMHeaderReader.readSAMHeaderFrom(path, conf);
-	}
-	public void readSAMHeaderFrom(InputStream in, Configuration conf) {
-		this.header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
-	}
+  public void readSAMHeaderFrom(InputStream in, Configuration conf) {
+    this.header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
+  }
 
-	/** <code>setSAMHeader</code> or <code>readSAMHeaderFrom</code> must have
-	 * been called first.
-	 */
-	@Override public RecordWriter<K,SAMRecordWritable> getRecordWriter(
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		return getRecordWriter(ctx, getDefaultWorkFile(ctx, ""));
-	}
+  /** <code>setSAMHeader</code> or <code>readSAMHeaderFrom</code> must have been called first. */
+  @Override
+  public RecordWriter<K, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx)
+      throws IOException {
+    return getRecordWriter(ctx, getDefaultWorkFile(ctx, ""));
+  }
 
-	// Allows wrappers to provide their own work file.
-	public RecordWriter<K,SAMRecordWritable> getRecordWriter(
-			TaskAttemptContext ctx, Path out)
-		throws IOException
-	{
-		if (this.header == null)
-			throw new IOException(
-				"Can't create a RecordWriter without the SAM header");
+  // Allows wrappers to provide their own work file.
+  public RecordWriter<K, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx, Path out)
+      throws IOException {
+    if (this.header == null) {
+      throw new IOException("Can't create a RecordWriter without the SAM header");
+    }
 
-		return new KeyIgnoringBAMRecordWriter<K>(out, header, writeHeader, ctx);
-	}
+    return new KeyIgnoringBAMRecordWriter<K>(out, header, writeHeader, ctx);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMRecordWriter.java
index 270f095..c630be9 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBAMRecordWriter.java
@@ -22,34 +22,31 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.IOException;
-import java.io.OutputStream;
-
 import htsjdk.samtools.SAMFileHeader;
-
+import java.io.IOException;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
-/** A convenience class that you can use as a RecordWriter for BAM files.
+/**
+ * A convenience class that you can use as a RecordWriter for BAM files.
  *
- * <p>The write function ignores the key, just outputting the SAMRecord.</p>
+ * <p>The write function ignores the key, just outputting the SAMRecord.
  */
 public class KeyIgnoringBAMRecordWriter<K> extends BAMRecordWriter<K> {
-	public KeyIgnoringBAMRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, input, writeHeader, ctx);
-	}
-	public KeyIgnoringBAMRecordWriter(
-			Path output, SAMFileHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, header, writeHeader, ctx);
-	}
 
-	@Override public void write(K ignored, SAMRecordWritable rec) throws IOException {
-		writeAlignment(rec.get());
-	}
+  public KeyIgnoringBAMRecordWriter(
+      Path output, Path input, boolean writeHeader, TaskAttemptContext ctx) throws IOException {
+    super(output, input, writeHeader, ctx);
+  }
+
+  public KeyIgnoringBAMRecordWriter(
+      Path output, SAMFileHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    super(output, header, writeHeader, ctx);
+  }
+
+  @Override
+  public void write(K ignored, SAMRecordWritable rec) throws IOException {
+    writeAlignment(rec.get());
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBCFRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBCFRecordWriter.java
index 17080e9..f51d888 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBCFRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringBCFRecordWriter.java
@@ -22,41 +22,37 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.variant.vcf.VCFHeader;
 import java.io.IOException;
 import java.io.OutputStream;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
-import htsjdk.variant.vcf.VCFHeader;
-
-/** A convenience class that you can use as a RecordWriter for BCF files.
+/**
+ * A convenience class that you can use as a RecordWriter for BCF files.
  *
- * <p>The write function ignores the key, just outputting the
- * VariantContext.</p>
+ * <p>The write function ignores the key, just outputting the VariantContext.
  */
 public class KeyIgnoringBCFRecordWriter<K> extends BCFRecordWriter<K> {
-	public KeyIgnoringBCFRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, input, writeHeader, ctx);
-	}
-	public KeyIgnoringBCFRecordWriter(
-			Path output, VCFHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, header, writeHeader, ctx);
-	}
-	public KeyIgnoringBCFRecordWriter(
-			OutputStream output, VCFHeader header, boolean writeHeader)
-		throws IOException
-	{
-		super(output, header, writeHeader);
-	}
-
-	@Override public void write(K ignored, VariantContextWritable vc) {
-		writeRecord(vc.get());
-	}
+
+  public KeyIgnoringBCFRecordWriter(
+      Path output, Path input, boolean writeHeader, TaskAttemptContext ctx) throws IOException {
+    super(output, input, writeHeader, ctx);
+  }
+
+  public KeyIgnoringBCFRecordWriter(
+      Path output, VCFHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    super(output, header, writeHeader, ctx);
+  }
+
+  public KeyIgnoringBCFRecordWriter(OutputStream output, VCFHeader header, boolean writeHeader)
+      throws IOException {
+    super(output, header, writeHeader);
+  }
+
+  @Override
+  public void write(K ignored, VariantContextWritable vc) {
+    writeRecord(vc.get());
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMOutputFormat.java
index c7a4126..79d8eb0 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMOutputFormat.java
@@ -1,71 +1,72 @@
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.samtools.SAMFileHeader;
 import java.io.IOException;
 import java.io.InputStream;
-
-import htsjdk.samtools.SAMFileHeader;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
-/** Writes only the BAM records, not the key.
+/**
+ * Writes only the BAM records, not the key.
  *
- * <p>A {@link SAMFileHeader} must be provided via {@link #setSAMHeader} or
- * {@link #readSAMHeaderFrom} before {@link #getRecordWriter} is called.</p>
+ * <p>A {@link SAMFileHeader} must be provided via {@link #setSAMHeader} or {@link
+ * #readSAMHeaderFrom} before {@link #getRecordWriter} is called.
  *
- * <p>By default, writes the SAM header to the output file(s). This
- * can be disabled, because in distributed usage one often ends up with (and,
- * for decent performance, wants to end up with) the output split into multiple
- * parts, which are easier to concatenate if the header is not present in each
- * file.</p>
+ * <p>By default, writes the SAM header to the output file(s). This can be disabled, because in
+ * distributed usage one often ends up with (and, for decent performance, wants to end up with) the
+ * output split into multiple parts, which are easier to concatenate if the header is not present in
+ * each file.
  */
 public class KeyIgnoringCRAMOutputFormat<K> extends CRAMOutputFormat<K> {
-    protected SAMFileHeader header;
-    private boolean writeHeader = true;
 
-    public KeyIgnoringCRAMOutputFormat() {}
+  protected SAMFileHeader header;
+  private boolean writeHeader = true;
 
-    /** Whether the header will be written or not. */
-    public boolean getWriteHeader()          { return writeHeader; }
+  public KeyIgnoringCRAMOutputFormat() {}
 
-    /** Set whether the header will be written or not. */
-    public void    setWriteHeader(boolean b) { writeHeader = b; }
+  /** Whether the header will be written or not. */
+  public boolean getWriteHeader() {
+    return writeHeader;
+  }
 
-    public SAMFileHeader getSAMHeader() { return header; }
-    public void setSAMHeader(SAMFileHeader header) { this.header = header; }
+  /** Set whether the header will be written or not. */
+  public void setWriteHeader(boolean b) {
+    writeHeader = b;
+  }
 
-    public void readSAMHeaderFrom(Path path, Configuration conf)
-            throws IOException
-    {
-        this.header = SAMHeaderReader.readSAMHeaderFrom(path, conf);
-    }
-    public void readSAMHeaderFrom(InputStream in, Configuration conf) {
-        this.header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
-    }
+  public SAMFileHeader getSAMHeader() {
+    return header;
+  }
 
-    /** <code>setSAMHeader</code> or <code>readSAMHeaderFrom</code> must have
-     * been called first.
-     */
-    @Override public RecordWriter<K,SAMRecordWritable> getRecordWriter(
-            TaskAttemptContext ctx)
-            throws IOException
-    {
-        return getRecordWriter(ctx, getDefaultWorkFile(ctx, ""));
-    }
+  public void setSAMHeader(SAMFileHeader header) {
+    this.header = header;
+  }
+
+  public void readSAMHeaderFrom(Path path, Configuration conf) throws IOException {
+    this.header = SAMHeaderReader.readSAMHeaderFrom(path, conf);
+  }
 
-    // Allows wrappers to provide their own work file.
-    public RecordWriter<K,SAMRecordWritable> getRecordWriter(
-            TaskAttemptContext ctx, Path out)
-            throws IOException
-    {
-        if (this.header == null)
-            throw new IOException(
-                    "Can't create a RecordWriter without the SAM header");
+  public void readSAMHeaderFrom(InputStream in, Configuration conf) {
+    this.header = SAMHeaderReader.readSAMHeaderFrom(in, conf);
+  }
 
-        return new KeyIgnoringCRAMRecordWriter<K>(out, header, writeHeader, ctx);
+  /** <code>setSAMHeader</code> or <code>readSAMHeaderFrom</code> must have been called first. */
+  @Override
+  public RecordWriter<K, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx)
+      throws IOException {
+    return getRecordWriter(ctx, getDefaultWorkFile(ctx, ""));
+  }
+
+  // Allows wrappers to provide their own work file.
+  public RecordWriter<K, SAMRecordWritable> getRecordWriter(TaskAttemptContext ctx, Path out)
+      throws IOException {
+    if (this.header == null) {
+      throw new IOException("Can't create a RecordWriter without the SAM header");
     }
+
+    return new KeyIgnoringCRAMRecordWriter<K>(out, header, writeHeader, ctx);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMRecordWriter.java
index c190741..1657742 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringCRAMRecordWriter.java
@@ -1,33 +1,30 @@
 package org.seqdoop.hadoop_bam;
 
 import htsjdk.samtools.SAMFileHeader;
+import java.io.IOException;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
-import java.io.IOException;
-import java.io.OutputStream;
-
-/** A convenience class that you can use as a RecordWriter for CRAM files.
+/**
+ * A convenience class that you can use as a RecordWriter for CRAM files.
  *
- * <p>The write function ignores the key, just outputting the SAMRecord.</p>
+ * <p>The write function ignores the key, just outputting the SAMRecord.
  */
 public class KeyIgnoringCRAMRecordWriter<K> extends CRAMRecordWriter<K> {
-    public KeyIgnoringCRAMRecordWriter(
-            Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-            throws IOException
-    {
-        super(output, input, writeHeader, ctx);
-    }
 
-    public KeyIgnoringCRAMRecordWriter(
-            Path output, SAMFileHeader header, boolean writeHeader,
-            TaskAttemptContext ctx)
-            throws IOException
-    {
-        super(output, header, writeHeader, ctx);
-    }
+  public KeyIgnoringCRAMRecordWriter(
+      Path output, Path input, boolean writeHeader, TaskAttemptContext ctx) throws IOException {
+    super(output, input, writeHeader, ctx);
+  }
+
+  public KeyIgnoringCRAMRecordWriter(
+      Path output, SAMFileHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    super(output, header, writeHeader, ctx);
+  }
 
-    @Override public void write(K ignored, SAMRecordWritable rec) {
-        writeAlignment(rec.get());
-    }
+  @Override
+  public void write(K ignored, SAMRecordWritable rec) {
+    writeAlignment(rec.get());
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringSAMRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringSAMRecordWriter.java
index 9fc3249..ac8c787 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringSAMRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringSAMRecordWriter.java
@@ -22,40 +22,37 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.samtools.SAMFileHeader;
 import java.io.IOException;
 import java.io.OutputStream;
-
-import htsjdk.samtools.SAMFileHeader;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
-/** A convenience class that you can use as a RecordWriter for SAM files.
+/**
+ * A convenience class that you can use as a RecordWriter for SAM files.
  *
- * <p>The write function ignores the key, just outputting the SAMRecord.</p>
+ * <p>The write function ignores the key, just outputting the SAMRecord.
  */
 public class KeyIgnoringSAMRecordWriter<K> extends SAMRecordWriter<K> {
-	public KeyIgnoringSAMRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, input, writeHeader, ctx);
-	}
-	public KeyIgnoringSAMRecordWriter(
-			Path output, SAMFileHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, header, writeHeader, ctx);
-	}
-	public KeyIgnoringSAMRecordWriter(
-			OutputStream output, SAMFileHeader header, boolean writeHeader)
-		throws IOException
-	{
-		super(output, header, writeHeader);
-	}
-
-	@Override public void write(K ignored, SAMRecordWritable rec) {
-		writeAlignment(rec.get());
-	}
+
+  public KeyIgnoringSAMRecordWriter(
+      Path output, Path input, boolean writeHeader, TaskAttemptContext ctx) throws IOException {
+    super(output, input, writeHeader, ctx);
+  }
+
+  public KeyIgnoringSAMRecordWriter(
+      Path output, SAMFileHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    super(output, header, writeHeader, ctx);
+  }
+
+  public KeyIgnoringSAMRecordWriter(OutputStream output, SAMFileHeader header, boolean writeHeader)
+      throws IOException {
+    super(output, header, writeHeader);
+  }
+
+  @Override
+  public void write(K ignored, SAMRecordWritable rec) {
+    writeAlignment(rec.get());
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFOutputFormat.java
index 04282c8..9674c92 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFOutputFormat.java
@@ -22,8 +22,9 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.samtools.seekablestream.SeekableStream;
+import htsjdk.variant.vcf.VCFHeader;
 import java.io.IOException;
-
 import java.io.OutputStream;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -31,122 +32,127 @@
 import org.apache.hadoop.io.compress.CompressionCodec;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
-import htsjdk.samtools.seekablestream.SeekableStream;
-import htsjdk.variant.vcf.VCFHeader;
-
 import org.apache.hadoop.util.ReflectionUtils;
 import org.seqdoop.hadoop_bam.util.BGZFCodec;
 import org.seqdoop.hadoop_bam.util.VCFHeaderReader;
 import org.seqdoop.hadoop_bam.util.WrapSeekable;
 
-/** Writes only the VCF records, not the key.
+/**
+ * Writes only the VCF records, not the key.
  *
- * <p>A {@link VCFHeader} must be provided via {@link #setHeader} or {@link
- * #readHeaderFrom} before {@link #getRecordWriter} is called.</p>
+ * <p>A {@link VCFHeader} must be provided via {@link #setHeader} or {@link #readHeaderFrom} before
+ * {@link #getRecordWriter} is called.
  *
- * <p>By default, writes the VCF header to the output file(s). This can be
- * disabled, because in distributed usage one often ends up with (and, for
- * decent performance, wants to end up with) the output split into multiple
- * parts, which are easier to concatenate if the header is not present in each
- * file.</p>
+ * <p>By default, writes the VCF header to the output file(s). This can be disabled, because in
+ * distributed usage one often ends up with (and, for decent performance, wants to end up with) the
+ * output split into multiple parts, which are easier to concatenate if the header is not present in
+ * each file.
  */
 public class KeyIgnoringVCFOutputFormat<K> extends VCFOutputFormat<K> {
-	protected VCFHeader header;
-
-	public KeyIgnoringVCFOutputFormat(VCFFormat fmt) { super(fmt); }
-	public KeyIgnoringVCFOutputFormat(Configuration conf) {
-		super(conf);
-		if (format == null)
-			throw new IllegalArgumentException(
-				"unknown VCF format: OUTPUT_VCF_FORMAT_PROPERTY not set");
-	}
-	public KeyIgnoringVCFOutputFormat(Configuration conf, Path path) {
-		super(conf);
-		if (format == null) {
-			format = VCFFormat.inferFromFilePath(path);
-
-			if (format == null)
-				throw new IllegalArgumentException("unknown VCF format: " + path);
-		}
-	}
-
-	/** Whether the header will be written, defaults to true. */
-	public static final String WRITE_HEADER_PROPERTY =
-		"hadoopbam.vcf.write-header";
-
-	public VCFHeader getHeader()                 { return header; }
-	public void      setHeader(VCFHeader header) { this.header = header; }
-
-	public void readHeaderFrom(Path path, FileSystem fs) throws IOException {
-		SeekableStream i = WrapSeekable.openPath(fs, path);
-		readHeaderFrom(i);
-		i.close();
-	}
-	public void readHeaderFrom(SeekableStream in) throws IOException {
-		this.header = VCFHeaderReader.readHeaderFrom(in);
-	}
-
-	/** <code>setHeader</code> or <code>readHeaderFrom</code> must have been
-	 * called first.
-	 */
-	@Override public RecordWriter<K,VariantContextWritable> getRecordWriter(
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		Configuration conf = ctx.getConfiguration();
-		boolean isCompressed = getCompressOutput(ctx);
-		CompressionCodec codec = null;
-		String extension = "";
-		if (isCompressed) {
-			Class<? extends CompressionCodec> codecClass =
-					getOutputCompressorClass(ctx, BGZFCodec.class);
-			codec = ReflectionUtils.newInstance(codecClass, conf);
-			extension = codec.getDefaultExtension();
-		}
-		Path file = getDefaultWorkFile(ctx, extension);
-		if (!isCompressed) {
-			return getRecordWriter(ctx, file);
-		} else {
-			FileSystem fs = file.getFileSystem(conf);
-			return getRecordWriter(ctx, codec.createOutputStream(fs.create(file)));
-		}
-	}
-
-	// Allows wrappers to provide their own work file.
-	public RecordWriter<K,VariantContextWritable> getRecordWriter(
-			TaskAttemptContext ctx, Path out)
-		throws IOException
-	{
-		if (this.header == null)
-			throw new IOException(
-				"Can't create a RecordWriter without the VCF header");
-
-		final boolean wh = ctx.getConfiguration().getBoolean(
-			WRITE_HEADER_PROPERTY, true);
-
-		switch (format) {
-			case BCF: return new KeyIgnoringBCFRecordWriter<K>(out,header,wh,ctx);
-			case VCF: return new KeyIgnoringVCFRecordWriter<K>(out,header,wh,ctx);
-			default: assert false; return null;
-		}
-	}
-
-	private RecordWriter<K,VariantContextWritable> getRecordWriter(
-			TaskAttemptContext ctx, OutputStream outputStream)
-			throws IOException
-	{
-		if (this.header == null)
-			throw new IOException(
-					"Can't create a RecordWriter without the VCF header");
-
-		final boolean wh = ctx.getConfiguration().getBoolean(
-				WRITE_HEADER_PROPERTY, true);
-
-		switch (format) {
-			case BCF: return new KeyIgnoringBCFRecordWriter<K>(outputStream,header,wh);
-			case VCF: return new KeyIgnoringVCFRecordWriter<K>(outputStream,header,wh);
-			default: assert false; return null;
-		}
-	}
+
+  /** Whether the header will be written, defaults to true. */
+  public static final String WRITE_HEADER_PROPERTY = "hadoopbam.vcf.write-header";
+
+  protected VCFHeader header;
+
+  public KeyIgnoringVCFOutputFormat(VCFFormat fmt) {
+    super(fmt);
+  }
+
+  public KeyIgnoringVCFOutputFormat(Configuration conf) {
+    super(conf);
+    if (format == null) {
+      throw new IllegalArgumentException("unknown VCF format: OUTPUT_VCF_FORMAT_PROPERTY not set");
+    }
+  }
+
+  public KeyIgnoringVCFOutputFormat(Configuration conf, Path path) {
+    super(conf);
+    if (format == null) {
+      format = VCFFormat.inferFromFilePath(path);
+
+      if (format == null) {
+        throw new IllegalArgumentException("unknown VCF format: " + path);
+      }
+    }
+  }
+
+  public VCFHeader getHeader() {
+    return header;
+  }
+
+  public void setHeader(VCFHeader header) {
+    this.header = header;
+  }
+
+  public void readHeaderFrom(Path path, FileSystem fs) throws IOException {
+    SeekableStream i = WrapSeekable.openPath(fs, path);
+    readHeaderFrom(i);
+    i.close();
+  }
+
+  public void readHeaderFrom(SeekableStream in) throws IOException {
+    this.header = VCFHeaderReader.readHeaderFrom(in);
+  }
+
+  /** <code>setHeader</code> or <code>readHeaderFrom</code> must have been called first. */
+  @Override
+  public RecordWriter<K, VariantContextWritable> getRecordWriter(TaskAttemptContext ctx)
+      throws IOException {
+    Configuration conf = ctx.getConfiguration();
+    boolean isCompressed = getCompressOutput(ctx);
+    CompressionCodec codec = null;
+    String extension = "";
+    if (isCompressed) {
+      Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(ctx, BGZFCodec.class);
+      codec = ReflectionUtils.newInstance(codecClass, conf);
+      extension = codec.getDefaultExtension();
+    }
+    Path file = getDefaultWorkFile(ctx, extension);
+    if (!isCompressed) {
+      return getRecordWriter(ctx, file);
+    } else {
+      FileSystem fs = file.getFileSystem(conf);
+      return getRecordWriter(ctx, codec.createOutputStream(fs.create(file)));
+    }
+  }
+
+  // Allows wrappers to provide their own work file.
+  public RecordWriter<K, VariantContextWritable> getRecordWriter(TaskAttemptContext ctx, Path out)
+      throws IOException {
+    if (this.header == null) {
+      throw new IOException("Can't create a RecordWriter without the VCF header");
+    }
+
+    final boolean wh = ctx.getConfiguration().getBoolean(WRITE_HEADER_PROPERTY, true);
+
+    switch (format) {
+      case BCF:
+        return new KeyIgnoringBCFRecordWriter<K>(out, header, wh, ctx);
+      case VCF:
+        return new KeyIgnoringVCFRecordWriter<K>(out, header, wh, ctx);
+      default:
+        assert false;
+        return null;
+    }
+  }
+
+  private RecordWriter<K, VariantContextWritable> getRecordWriter(
+      TaskAttemptContext ctx, OutputStream outputStream) throws IOException {
+    if (this.header == null) {
+      throw new IOException("Can't create a RecordWriter without the VCF header");
+    }
+
+    final boolean wh = ctx.getConfiguration().getBoolean(WRITE_HEADER_PROPERTY, true);
+
+    switch (format) {
+      case BCF:
+        return new KeyIgnoringBCFRecordWriter<K>(outputStream, header, wh);
+      case VCF:
+        return new KeyIgnoringVCFRecordWriter<K>(outputStream, header, wh);
+      default:
+        assert false;
+        return null;
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFRecordWriter.java
index d3fb1c5..f89a506 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/KeyIgnoringVCFRecordWriter.java
@@ -22,41 +22,37 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.variant.vcf.VCFHeader;
 import java.io.IOException;
 import java.io.OutputStream;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
-import htsjdk.variant.vcf.VCFHeader;
-
-/** A convenience class that you can use as a RecordWriter for VCF files.
+/**
+ * A convenience class that you can use as a RecordWriter for VCF files.
  *
- * <p>The write function ignores the key, just outputting the
- * VariantContext.</p>
+ * <p>The write function ignores the key, just outputting the VariantContext.
  */
 public class KeyIgnoringVCFRecordWriter<K> extends VCFRecordWriter<K> {
-	public KeyIgnoringVCFRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, input, writeHeader, ctx);
-	}
-	public KeyIgnoringVCFRecordWriter(
-			Path output, VCFHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		super(output, header, writeHeader, ctx);
-	}
-	public KeyIgnoringVCFRecordWriter(
-			OutputStream output, VCFHeader header, boolean writeHeader)
-		throws IOException
-	{
-		super(output, header, writeHeader);
-	}
-
-	@Override public void write(K ignored, VariantContextWritable vc) {
-		writeRecord(vc.get());
-	}
+
+  public KeyIgnoringVCFRecordWriter(
+      Path output, Path input, boolean writeHeader, TaskAttemptContext ctx) throws IOException {
+    super(output, input, writeHeader, ctx);
+  }
+
+  public KeyIgnoringVCFRecordWriter(
+      Path output, VCFHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    super(output, header, writeHeader, ctx);
+  }
+
+  public KeyIgnoringVCFRecordWriter(OutputStream output, VCFHeader header, boolean writeHeader)
+      throws IOException {
+    super(output, header, writeHeader);
+  }
+
+  @Override
+  public void write(K ignored, VariantContextWritable vc) {
+    writeRecord(vc.get());
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java b/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java
index 3b9b0d3..3a908b1 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java
@@ -29,83 +29,125 @@
 
 /** A factory for the kind of lazy {@link BAMRecord} used internally. */
 public class LazyBAMRecordFactory implements SAMRecordFactory {
-	@Override public SAMRecord createSAMRecord(SAMFileHeader hdr) {
-		throw new UnsupportedOperationException(
-			"LazyBAMRecordFactory can only create BAM records");
-	}
-
-	@Override public BAMRecord createBAMRecord(
-		SAMFileHeader hdr,
-		int referenceSequenceIndex, int alignmentStart,
-		short readNameLength, short mappingQuality,
-		int indexingBin, int cigarLen, int flags, int readLen,
-		int mateReferenceSequenceIndex, int mateAlignmentStart,
-		int insertSize, byte[] variableLengthBlock)
-	{
-		return new LazyBAMRecord(
-			hdr, referenceSequenceIndex, alignmentStart, readNameLength,
-			mappingQuality, indexingBin, cigarLen, flags, readLen,
-			mateReferenceSequenceIndex, mateAlignmentStart, insertSize,
-			variableLengthBlock);
-	}
+
+  @Override
+  public SAMRecord createSAMRecord(SAMFileHeader hdr) {
+    throw new UnsupportedOperationException("LazyBAMRecordFactory can only create BAM records");
+  }
+
+  @Override
+  public BAMRecord createBAMRecord(
+      SAMFileHeader hdr,
+      int referenceSequenceIndex,
+      int alignmentStart,
+      short readNameLength,
+      short mappingQuality,
+      int indexingBin,
+      int cigarLen,
+      int flags,
+      int readLen,
+      int mateReferenceSequenceIndex,
+      int mateAlignmentStart,
+      int insertSize,
+      byte[] variableLengthBlock) {
+    return new LazyBAMRecord(
+        hdr,
+        referenceSequenceIndex,
+        alignmentStart,
+        readNameLength,
+        mappingQuality,
+        indexingBin,
+        cigarLen,
+        flags,
+        readLen,
+        mateReferenceSequenceIndex,
+        mateAlignmentStart,
+        insertSize,
+        variableLengthBlock);
+  }
 }
 
 class LazyBAMRecord extends BAMRecord {
-	private boolean decodedRefIdx     = false;
-	private boolean decodedMateRefIdx = false;
-
-	public LazyBAMRecord(
-		SAMFileHeader hdr, int referenceID, int coordinate, short readNameLength,
-		short mappingQuality, int indexingBin, int cigarLen, int flags,
-		int readLen, int mateReferenceID, int mateCoordinate, int insertSize,
-		byte[] restOfData)
-	{
-		super(
-			hdr, referenceID, coordinate, readNameLength, mappingQuality,
-			indexingBin, cigarLen, flags, readLen, mateReferenceID,
-			mateCoordinate, insertSize, restOfData);
-	}
-
-	@Override public void setReferenceIndex(final int referenceIndex) {
-		mReferenceIndex = referenceIndex;
-		decodedRefIdx = false;
-	}
-	@Override public void setMateReferenceIndex(final int referenceIndex) {
-		mMateReferenceIndex = referenceIndex;
-		decodedMateRefIdx = false;
-	}
-
-	@Override public String getReferenceName() {
-		if (mReferenceIndex != null && !decodedRefIdx) {
-			decodedRefIdx = true;
-			super.setReferenceIndex(mReferenceIndex);
-		}
-		return super.getReferenceName();
-	}
-
-	@Override public String getMateReferenceName() {
-		if (mMateReferenceIndex != null && !decodedMateRefIdx) {
-			decodedMateRefIdx = true;
-			super.setMateReferenceIndex(mMateReferenceIndex);
-		}
-		return super.getMateReferenceName();
-	}
-
-	@Override protected void eagerDecode() {
-		getReferenceName();
-		getMateReferenceName();
-		super.eagerDecode();
-	}
-
-	@Override
-	public boolean equals(Object o) {
-		// don't use decoded flags for equality check
-		return super.equals(o);
-	}
-
-	@Override
-	public int hashCode() {
-		// don't use decoded flags for hash code
-		return super.hashCode();
-	}
+
+  private boolean decodedRefIdx = false;
+  private boolean decodedMateRefIdx = false;
+
+  public LazyBAMRecord(
+      SAMFileHeader hdr,
+      int referenceID,
+      int coordinate,
+      short readNameLength,
+      short mappingQuality,
+      int indexingBin,
+      int cigarLen,
+      int flags,
+      int readLen,
+      int mateReferenceID,
+      int mateCoordinate,
+      int insertSize,
+      byte[] restOfData) {
+    super(
+        hdr,
+        referenceID,
+        coordinate,
+        readNameLength,
+        mappingQuality,
+        indexingBin,
+        cigarLen,
+        flags,
+        readLen,
+        mateReferenceID,
+        mateCoordinate,
+        insertSize,
+        restOfData);
+  }
+
+  @Override
+  public void setReferenceIndex(final int referenceIndex) {
+    mReferenceIndex = referenceIndex;
+    decodedRefIdx = false;
+  }
+
+  @Override
+  public void setMateReferenceIndex(final int referenceIndex) {
+    mMateReferenceIndex = referenceIndex;
+    decodedMateRefIdx = false;
+  }
+
+  @Override
+  public String getReferenceName() {
+    if (mReferenceIndex != null && !decodedRefIdx) {
+      decodedRefIdx = true;
+      super.setReferenceIndex(mReferenceIndex);
+    }
+    return super.getReferenceName();
+  }
+
+  @Override
+  public String getMateReferenceName() {
+    if (mMateReferenceIndex != null && !decodedMateRefIdx) {
+      decodedMateRefIdx = true;
+      super.setMateReferenceIndex(mMateReferenceIndex);
+    }
+    return super.getMateReferenceName();
+  }
+
+  @Override
+  protected void eagerDecode() {
+    getReferenceName();
+    getMateReferenceName();
+    super.eagerDecode();
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    // don't use decoded flags for equality check
+    return super.equals(o);
+  }
+
+  @Override
+  public int hashCode() {
+    // don't use decoded flags for hash code
+    return super.hashCode();
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/LazyBCFGenotypesContext.java b/src/main/java/org/seqdoop/hadoop_bam/LazyBCFGenotypesContext.java
index 0de8cb6..f244823 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/LazyBCFGenotypesContext.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/LazyBCFGenotypesContext.java
@@ -22,11 +22,6 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-
 import htsjdk.tribble.TribbleException;
 import htsjdk.variant.bcf2.BCF2Decoder;
 import htsjdk.variant.bcf2.BCF2GenotypeFieldDecoders;
@@ -36,114 +31,123 @@
 import htsjdk.variant.variantcontext.GenotypeBuilder;
 import htsjdk.variant.variantcontext.LazyGenotypesContext;
 import htsjdk.variant.vcf.VCFHeader;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
 
 // XXX: Since we cannot use BCF2LazyGenotypesDecoder, the parsing functionality
 //      is, unfortunately, simply copied from there.
 public class LazyBCFGenotypesContext extends LazyParsingGenotypesContext {
 
-	/** Takes ownership of the given byte[]: don't modify its contents. */
-	public LazyBCFGenotypesContext(
-		List<Allele> alleles, int fields, byte[] unparsed, int count)
-	{
-		super(new Parser(alleles, fields), unparsed, count);
-	}
-
-	public static class HeaderDataCache
-		implements LazyParsingGenotypesContext.HeaderDataCache
-	{
-		public static final BCF2Decoder decoder = new BCF2Decoder();
-
-		private BCF2GenotypeFieldDecoders genoFieldDecoders;
-		private List<String>              fieldDict;
-		private GenotypeBuilder[]         builders;
-
-		private ArrayList<String>        sampleNamesInOrder;
-		private HashMap<String, Integer> sampleNameToOffset;
-
-		@Override public void setHeader(VCFHeader header) {
-			genoFieldDecoders = new BCF2GenotypeFieldDecoders(header);
-			fieldDict = BCF2Utils.makeDictionary(header);
-
-			builders = new GenotypeBuilder[header.getNGenotypeSamples()];
-			final List<String> genotypeSamples = header.getGenotypeSamples();
-			for (int i = 0; i < builders.length; ++i)
-				builders[i] = new GenotypeBuilder(genotypeSamples.get(i));
-
-			sampleNamesInOrder = header.getSampleNamesInOrder();
-			sampleNameToOffset = header.getSampleNameToOffset();
-		}
-
-		public BCF2GenotypeFieldDecoders getGenoFieldDecoders() {
-			return genoFieldDecoders;
-		}
-		public List<String>      getFieldDict() { return fieldDict; }
-		public GenotypeBuilder[] getBuilders () { return builders; }
-
-		public ArrayList<String> getSampleNamesInOrder() {
-			return sampleNamesInOrder;
-		}
-		public HashMap<String, Integer> getSampleNameToOffset() {
-			return sampleNameToOffset;
-		}
-	}
-
-	public static class Parser extends LazyParsingGenotypesContext.Parser {
-		private final List<Allele> alleles;
-		private final int fields;
-
-		private HeaderDataCache hd = null;
-
-		public Parser(List<Allele> alleles, int fields) {
-			this.alleles = alleles;
-			this.fields = fields;
-		}
-
-		@Override public void setHeaderDataCache(
-			LazyParsingGenotypesContext.HeaderDataCache data)
-		{
-			this.hd = (HeaderDataCache)data;
-		}
-
-		@Override public LazyGenotypesContext.LazyData parse(final Object data) {
-			if (hd == null)
-				throw new IllegalStateException(
-					"Cannot decode genotypes without HeaderDataCache");
-
-			final GenotypeBuilder[] builders = hd.getBuilders();
-
-			// The following is essentially the contents of
-			// BCF2LazyGenotypesDecoder.parse().
-
-			try {
-				hd.decoder.setRecordBytes((byte[])data);
-
-				for (final GenotypeBuilder gb : builders)
-					gb.reset(true);
-
-				for (int i = 0; i < fields; ++i) {
-					final String field =
-						hd.getFieldDict().get(
-							(Integer)hd.decoder.decodeTypedValue());
-
-					final byte type = hd.decoder.readTypeDescriptor();
-					final int numElems = hd.decoder.decodeNumberOfElements(type);
-
-					hd.getGenoFieldDecoders().getDecoder(field).decode(
-						alleles, field, hd.decoder, type, numElems, builders);
-				}
-
-				final ArrayList<Genotype> genotypes =
-					new ArrayList<Genotype>(builders.length);
-				for (final GenotypeBuilder gb : builders)
-					genotypes.add(gb.make());
-
-				return new LazyGenotypesContext.LazyData(
-					genotypes,
-					hd.getSampleNamesInOrder(), hd.getSampleNameToOffset());
-			} catch (IOException e) {
-            throw new TribbleException(
-            	"Unexpected IOException parsing genotypes data block", e);
-			}
-		}
-	}
+  /** Takes ownership of the given byte[]: don't modify its contents. */
+  public LazyBCFGenotypesContext(List<Allele> alleles, int fields, byte[] unparsed, int count) {
+    super(new Parser(alleles, fields), unparsed, count);
+  }
+
+  public static class HeaderDataCache implements LazyParsingGenotypesContext.HeaderDataCache {
+
+    public static final BCF2Decoder decoder = new BCF2Decoder();
+
+    private BCF2GenotypeFieldDecoders genoFieldDecoders;
+    private List<String> fieldDict;
+    private GenotypeBuilder[] builders;
+
+    private ArrayList<String> sampleNamesInOrder;
+    private HashMap<String, Integer> sampleNameToOffset;
+
+    @Override
+    public void setHeader(VCFHeader header) {
+      genoFieldDecoders = new BCF2GenotypeFieldDecoders(header);
+      fieldDict = BCF2Utils.makeDictionary(header);
+
+      builders = new GenotypeBuilder[header.getNGenotypeSamples()];
+      final List<String> genotypeSamples = header.getGenotypeSamples();
+      for (int i = 0; i < builders.length; ++i) {
+        builders[i] = new GenotypeBuilder(genotypeSamples.get(i));
+      }
+
+      sampleNamesInOrder = header.getSampleNamesInOrder();
+      sampleNameToOffset = header.getSampleNameToOffset();
+    }
+
+    public BCF2GenotypeFieldDecoders getGenoFieldDecoders() {
+      return genoFieldDecoders;
+    }
+
+    public List<String> getFieldDict() {
+      return fieldDict;
+    }
+
+    public GenotypeBuilder[] getBuilders() {
+      return builders;
+    }
+
+    public ArrayList<String> getSampleNamesInOrder() {
+      return sampleNamesInOrder;
+    }
+
+    public HashMap<String, Integer> getSampleNameToOffset() {
+      return sampleNameToOffset;
+    }
+  }
+
+  public static class Parser extends LazyParsingGenotypesContext.Parser {
+
+    private final List<Allele> alleles;
+    private final int fields;
+
+    private HeaderDataCache hd = null;
+
+    public Parser(List<Allele> alleles, int fields) {
+      this.alleles = alleles;
+      this.fields = fields;
+    }
+
+    @Override
+    public void setHeaderDataCache(LazyParsingGenotypesContext.HeaderDataCache data) {
+      this.hd = (HeaderDataCache) data;
+    }
+
+    @Override
+    public LazyGenotypesContext.LazyData parse(final Object data) {
+      if (hd == null) {
+        throw new IllegalStateException("Cannot decode genotypes without HeaderDataCache");
+      }
+
+      final GenotypeBuilder[] builders = hd.getBuilders();
+
+      // The following is essentially the contents of
+      // BCF2LazyGenotypesDecoder.parse().
+
+      try {
+        hd.decoder.setRecordBytes((byte[]) data);
+
+        for (final GenotypeBuilder gb : builders) {
+          gb.reset(true);
+        }
+
+        for (int i = 0; i < fields; ++i) {
+          final String field = hd.getFieldDict().get((Integer) hd.decoder.decodeTypedValue());
+
+          final byte type = hd.decoder.readTypeDescriptor();
+          final int numElems = hd.decoder.decodeNumberOfElements(type);
+
+          hd.getGenoFieldDecoders()
+              .getDecoder(field)
+              .decode(alleles, field, hd.decoder, type, numElems, builders);
+        }
+
+        final ArrayList<Genotype> genotypes = new ArrayList<Genotype>(builders.length);
+        for (final GenotypeBuilder gb : builders) {
+          genotypes.add(gb.make());
+        }
+
+        return new LazyGenotypesContext.LazyData(
+            genotypes, hd.getSampleNamesInOrder(), hd.getSampleNameToOffset());
+      } catch (IOException e) {
+        throw new TribbleException("Unexpected IOException parsing genotypes data block", e);
+      }
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/LazyParsingGenotypesContext.java b/src/main/java/org/seqdoop/hadoop_bam/LazyParsingGenotypesContext.java
index 27e6fde..1b29426 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/LazyParsingGenotypesContext.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/LazyParsingGenotypesContext.java
@@ -25,11 +25,11 @@
 import htsjdk.variant.variantcontext.LazyGenotypesContext;
 import htsjdk.variant.vcf.VCFHeader;
 
-/** You need to call getParser().setHeader() here before trying to decode() a
- * GenotypesContext in any VariantContext that came about via
- * VariantContextWritable.readFields(). That includes calling
- * VariantContext.fullyDecode() or almost any of the GenotypesContext methods.
- * The RecordReader provided by VCFInputFormat does this for you.
+/**
+ * You need to call getParser().setHeader() here before trying to decode() a GenotypesContext in any
+ * VariantContext that came about via VariantContextWritable.readFields(). That includes calling
+ * VariantContext.fullyDecode() or almost any of the GenotypesContext methods. The RecordReader
+ * provided by VCFInputFormat does this for you.
  */
 // There's no public LazyGenotypesContext.LazyParser in Picard so we need to
 // provide our own. Since we need to have the header in the parser set
@@ -38,24 +38,27 @@
 //
 // And since VCF and BCF have different kinds of lazy data, we have separate
 // classes implementing the actual parsing for each.
-public abstract class LazyParsingGenotypesContext
-	extends LazyGenotypesContext
-{
-	// super.parser is inaccessible to us so we keep a copy that we can access.
-	private final Parser parserCopy;
-
-	protected LazyParsingGenotypesContext(Parser p, byte[] data, int count) {
-		super(p, data, count);
-		parserCopy = p;
-	}
-
-	public Parser getParser() { return parserCopy; }
-
-	public static interface HeaderDataCache {
-		public void setHeader(VCFHeader header);
-	}
-
-	public static abstract class Parser implements LazyParser {
-		public abstract void setHeaderDataCache(HeaderDataCache data);
-	}
+public abstract class LazyParsingGenotypesContext extends LazyGenotypesContext {
+
+  // super.parser is inaccessible to us so we keep a copy that we can access.
+  private final Parser parserCopy;
+
+  protected LazyParsingGenotypesContext(Parser p, byte[] data, int count) {
+    super(p, data, count);
+    parserCopy = p;
+  }
+
+  public Parser getParser() {
+    return parserCopy;
+  }
+
+  public static interface HeaderDataCache {
+
+    public void setHeader(VCFHeader header);
+  }
+
+  public abstract static class Parser implements LazyParser {
+
+    public abstract void setHeaderDataCache(HeaderDataCache data);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java b/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java
index ccb061c..3fca4a9 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java
@@ -20,9 +20,6 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.UnsupportedEncodingException;
-import java.util.List;
-
 import htsjdk.tribble.readers.LineIterator;
 import htsjdk.variant.variantcontext.Allele;
 import htsjdk.variant.variantcontext.LazyGenotypesContext;
@@ -30,99 +27,108 @@
 import htsjdk.variant.vcf.VCFHeader;
 import htsjdk.variant.vcf.VCFHeaderLine;
 import htsjdk.variant.vcf.VCFHeaderVersion;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
 
 // File created: 2013-07-03 15:41:21
 
 // The actual parsing is delegated to AbstractVCFCodec.
 public class LazyVCFGenotypesContext extends LazyParsingGenotypesContext {
 
-	/** Takes ownership of the given byte[]: don't modify its contents. */
-	public LazyVCFGenotypesContext(
-		List<Allele> alleles, String chrom, int start,
-		byte[] utf8Unparsed, int count)
-	{
-		super(new Parser(alleles, chrom, start), utf8Unparsed, count);
-	}
-
-	public static class HeaderDataCache
-		implements LazyParsingGenotypesContext.HeaderDataCache
-	{
-		private HeaderSettableVCFCodec codec = new HeaderSettableVCFCodec();
-
-		@Override public void setHeader(VCFHeader header) {
-			VCFHeaderVersion version = null;
-
-			// Normally AbstractVCFCodec parses the header and thereby sets the
-			// version field. It gets used later on so we need to set it.
-			for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) {
-				if (VCFHeaderVersion.isFormatString(line.getKey())) {
-					version = VCFHeaderVersion.toHeaderVersion(line.getValue());
-					break;
-				}
-			}
-
-			codec.setHeaderAndVersion(header, version);
-		}
-
-		public AbstractVCFCodec getCodec() { return codec; }
-	}
-
-	public static class Parser extends LazyParsingGenotypesContext.Parser {
-		private HeaderSettableVCFCodec codec = null;
-		private final List<Allele> alleles;
-		private final String chrom;
-		private final int start;
-
-		public Parser(List<Allele> alleles, String chrom, int start) {
-			this.alleles = alleles;
-			this.chrom = chrom;
-			this.start = start;
-		}
-
-		@Override public void setHeaderDataCache(
-			LazyParsingGenotypesContext.HeaderDataCache data)
-		{
-			codec = (HeaderSettableVCFCodec)((HeaderDataCache)data).getCodec();
-		}
-
-		@Override public LazyGenotypesContext.LazyData parse(final Object data) {
-			if (codec == null || !codec.hasHeader())
-				throw new IllegalStateException(
-					"Cannot decode genotypes without a codec with a VCFHeader");
-
-			final String str;
-			try {
-				str = new String((byte[])data, "UTF-8");
-			} catch (UnsupportedEncodingException absurd) {
-				throw new RuntimeException(
-					"Can never happen on a compliant Java implementation because "+
-					"UTF-8 is guaranteed to be supported");
-			}
-			return codec.createGenotypeMap(str, alleles, chrom, start);
-		}
-	}
+  /** Takes ownership of the given byte[]: don't modify its contents. */
+  public LazyVCFGenotypesContext(
+      List<Allele> alleles, String chrom, int start, byte[] utf8Unparsed, int count) {
+    super(new Parser(alleles, chrom, start), utf8Unparsed, count);
+  }
+
+  public static class HeaderDataCache implements LazyParsingGenotypesContext.HeaderDataCache {
+
+    private HeaderSettableVCFCodec codec = new HeaderSettableVCFCodec();
+
+    @Override
+    public void setHeader(VCFHeader header) {
+      VCFHeaderVersion version = null;
+
+      // Normally AbstractVCFCodec parses the header and thereby sets the
+      // version field. It gets used later on so we need to set it.
+      for (final VCFHeaderLine line : header.getMetaDataInInputOrder()) {
+        if (VCFHeaderVersion.isFormatString(line.getKey())) {
+          version = VCFHeaderVersion.toHeaderVersion(line.getValue());
+          break;
+        }
+      }
+
+      codec.setHeaderAndVersion(header, version);
+    }
+
+    public AbstractVCFCodec getCodec() {
+      return codec;
+    }
+  }
+
+  public static class Parser extends LazyParsingGenotypesContext.Parser {
+
+    private final List<Allele> alleles;
+    private final String chrom;
+    private final int start;
+    private HeaderSettableVCFCodec codec = null;
+
+    public Parser(List<Allele> alleles, String chrom, int start) {
+      this.alleles = alleles;
+      this.chrom = chrom;
+      this.start = start;
+    }
+
+    @Override
+    public void setHeaderDataCache(LazyParsingGenotypesContext.HeaderDataCache data) {
+      codec = (HeaderSettableVCFCodec) ((HeaderDataCache) data).getCodec();
+    }
+
+    @Override
+    public LazyGenotypesContext.LazyData parse(final Object data) {
+      if (codec == null || !codec.hasHeader()) {
+        throw new IllegalStateException("Cannot decode genotypes without a codec with a VCFHeader");
+      }
+
+      final String str;
+      try {
+        str = new String((byte[]) data, "UTF-8");
+      } catch (UnsupportedEncodingException absurd) {
+        throw new RuntimeException(
+            "Can never happen on a compliant Java implementation because "
+                + "UTF-8 is guaranteed to be supported");
+      }
+      return codec.createGenotypeMap(str, alleles, chrom, start);
+    }
+  }
 }
 
 // This is a HACK. But, the functionality is only in AbstractVCFCodec so it
 // can't be helped. This is preferable to copying the functionality into
 // parse() above.
 class HeaderSettableVCFCodec extends AbstractVCFCodec {
-	public boolean hasHeader() { return header != null; }
-
-	public void setHeaderAndVersion(VCFHeader header, VCFHeaderVersion ver) {
-		this.header = header;
-		this.version = ver;
-	}
-
-	@Override public Object readActualHeader(LineIterator reader) {
-		throw new UnsupportedOperationException(
-			"Internal error: this shouldn't be called");
-	}
-	@Override public List<String> parseFilters(String filterString) {
-		throw new UnsupportedOperationException(
-			"Internal error: this shouldn't be called");
-	}
-	@Override public boolean canDecode(String s) {
-		return true;
-	}
+
+  public boolean hasHeader() {
+    return header != null;
+  }
+
+  public void setHeaderAndVersion(VCFHeader header, VCFHeaderVersion ver) {
+    this.header = header;
+    this.version = ver;
+  }
+
+  @Override
+  public Object readActualHeader(LineIterator reader) {
+    throw new UnsupportedOperationException("Internal error: this shouldn't be called");
+  }
+
+  @Override
+  public List<String> parseFilters(String filterString) {
+    throw new UnsupportedOperationException("Internal error: this shouldn't be called");
+  }
+
+  @Override
+  public boolean canDecode(String s) {
+    return true;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/LineReader.java b/src/main/java/org/seqdoop/hadoop_bam/LineReader.java
index 1db1b85..545fc75 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/LineReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/LineReader.java
@@ -24,17 +24,18 @@
 
 import java.io.IOException;
 import java.io.InputStream;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
 
 /**
- * A class that provides a line reader from an input stream.
- * This code started as the org.apache.hadoop.util.LineReader class in Hadoop 0.20.2,
- * to which we added a skip(n) method.
+ * A class that provides a line reader from an input stream. This code started as the
+ * org.apache.hadoop.util.LineReader class in Hadoop 0.20.2, to which we added a skip(n) method.
  */
 public class LineReader {
+
   private static final int DEFAULT_BUFFER_SIZE = 64 * 1024;
+  private static final byte CR = '\r';
+  private static final byte LF = '\n';
   private int bufferSize = DEFAULT_BUFFER_SIZE;
   private InputStream in;
   private byte[] buffer;
@@ -43,25 +44,20 @@ public class LineReader {
   // the current position in the buffer
   private int bufferPosn = 0;
 
-  private static final byte CR = '\r';
-  private static final byte LF = '\n';
-
   /**
-   * Create a line reader that reads from the given stream using the
-   * default buffer-size (64k).
+   * Create a line reader that reads from the given stream using the default buffer-size (64k).
+   *
    * @param in The input stream
-   * @throws IOException
    */
   public LineReader(InputStream in) {
     this(in, DEFAULT_BUFFER_SIZE);
   }
 
   /**
-   * Create a line reader that reads from the given stream using the
-   * given buffer-size.
+   * Create a line reader that reads from the given stream using the given buffer-size.
+   *
    * @param in The input stream
    * @param bufferSize Size of the read buffer
-   * @throws IOException
    */
   public LineReader(InputStream in, int bufferSize) {
     this.in = in;
@@ -70,46 +66,36 @@ public LineReader(InputStream in, int bufferSize) {
   }
 
   /**
-   * Create a line reader that reads from the given stream using the
-   * <code>io.file.buffer.size</code> specified in the given
-   * <code>Configuration</code>.
+   * Create a line reader that reads from the given stream using the <code>io.file.buffer.size
+   * </code> specified in the given <code>Configuration</code>.
+   *
    * @param in input stream
    * @param conf configuration
-   * @throws IOException
    */
   public LineReader(InputStream in, Configuration conf) throws IOException {
     this(in, conf.getInt("io.file.buffer.size", DEFAULT_BUFFER_SIZE));
   }
 
-  /**
-   * Close the underlying stream.
-   * @throws IOException
-   */
+  /** Close the underlying stream. */
   public void close() throws IOException {
     in.close();
   }
 
   /**
-   * Read one line from the InputStream into the given Text.  A line
-   * can be terminated by one of the following: '\n' (LF) , '\r' (CR),
-   * or '\r\n' (CR+LF).  EOF also terminates an otherwise unterminated
-   * line.
+   * Read one line from the InputStream into the given Text. A line can be terminated by one of the
+   * following: '\n' (LF) , '\r' (CR), or '\r\n' (CR+LF). EOF also terminates an otherwise
+   * unterminated line.
    *
    * @param str the object to store the given line (without newline)
-   * @param maxLineLength the maximum number of bytes to store into str;
-   *  the rest of the line is silently discarded.
-   * @param maxBytesToConsume the maximum number of bytes to consume
-   *  in this call.  This is only a hint, because if the line cross
-   *  this threshold, we allow it to happen.  It can overshoot
-   *  potentially by as much as one buffer length.
-   *
-   * @return the number of bytes read including the (longest) newline
-   * found.
-   *
+   * @param maxLineLength the maximum number of bytes to store into str; the rest of the line is
+   *     silently discarded.
+   * @param maxBytesToConsume the maximum number of bytes to consume in this call. This is only a
+   *     hint, because if the line cross this threshold, we allow it to happen. It can overshoot
+   *     potentially by as much as one buffer length.
+   * @return the number of bytes read including the (longest) newline found.
    * @throws IOException if the underlying stream throws
    */
-  public int readLine(Text str, int maxLineLength,
-                      int maxBytesToConsume) throws IOException {
+  public int readLine(Text str, int maxLineLength, int maxBytesToConsume) throws IOException {
     /* We're reading data from in, but the head of the stream may be
      * already buffered in buffer, so we have several cases:
      * 1. No newline characters are in the buffer, so we need to copy
@@ -127,35 +113,38 @@ public int readLine(Text str, int maxLineLength,
      * follows.
      */
     str.clear();
-    int txtLength = 0; //tracks str.getLength(), as an optimization
-    int newlineLength = 0; //length of terminating newline
-    boolean prevCharCR = false; //true of prev char was CR
+    int txtLength = 0; // tracks str.getLength(), as an optimization
+    int newlineLength = 0; // length of terminating newline
+    boolean prevCharCR = false; // true of prev char was CR
     long bytesConsumed = 0;
     do {
-      int startPosn = bufferPosn; //starting from where we left off the last time
+      int startPosn = bufferPosn; // starting from where we left off the last time
       if (bufferPosn >= bufferLength) {
         startPosn = bufferPosn = 0;
-        if (prevCharCR)
-          ++bytesConsumed; //account for CR from previous read
+        if (prevCharCR) {
+          ++bytesConsumed; // account for CR from previous read
+        }
         bufferLength = in.read(buffer);
-        if (bufferLength <= 0)
+        if (bufferLength <= 0) {
           break; // EOF
+        }
       }
-      for (; bufferPosn < bufferLength; ++bufferPosn) { //search for newline
+      for (; bufferPosn < bufferLength; ++bufferPosn) { // search for newline
         if (buffer[bufferPosn] == LF) {
           newlineLength = (prevCharCR) ? 2 : 1;
           ++bufferPosn; // at next invocation proceed from following byte
           break;
         }
-        if (prevCharCR) { //CR + notLF, we are at notLF
+        if (prevCharCR) { // CR + notLF, we are at notLF
           newlineLength = 1;
           break;
         }
         prevCharCR = (buffer[bufferPosn] == CR);
       }
       int readLength = bufferPosn - startPosn;
-      if (prevCharCR && newlineLength == 0)
-        --readLength; //CR at the end of the buffer
+      if (prevCharCR && newlineLength == 0) {
+        --readLength; // CR at the end of the buffer
+      }
       bytesConsumed += readLength;
       int appendLength = readLength - newlineLength;
       if (appendLength > maxLineLength - txtLength) {
@@ -167,13 +156,15 @@ public int readLine(Text str, int maxLineLength,
       }
     } while (newlineLength == 0 && bytesConsumed < maxBytesToConsume);
 
-    if (bytesConsumed > (long)Integer.MAX_VALUE)
+    if (bytesConsumed > (long) Integer.MAX_VALUE) {
       throw new IOException("Too many bytes before newline: " + bytesConsumed);
-    return (int)bytesConsumed;
+    }
+    return (int) bytesConsumed;
   }
 
   /**
    * Read from the InputStream into the given Text.
+   *
    * @param str the object to store the given line
    * @param maxLineLength the maximum number of bytes to store into str.
    * @return the number of bytes read including the newline
@@ -181,10 +172,11 @@ public int readLine(Text str, int maxLineLength,
    */
   public int readLine(Text str, int maxLineLength) throws IOException {
     return readLine(str, maxLineLength, Integer.MAX_VALUE);
-}
+  }
 
   /**
    * Read from the InputStream into the given Text.
+   *
    * @param str the object to store the given line
    * @return the number of bytes read including the newline
    * @throws IOException if the underlying stream throws
@@ -193,39 +185,37 @@ public int readLine(Text str) throws IOException {
     return readLine(str, Integer.MAX_VALUE, Integer.MAX_VALUE);
   }
 
-	/**
-	 * Skip n bytes from the InputStream.
-	 * @param n the number of bytes to skip.
+  /**
+   * Skip n bytes from the InputStream.
+   *
+   * @param n the number of bytes to skip.
    * @return the number of bytes skipped.
    * @throws IOException if the underlying stream throws.
-	 */
-	public long skip(long n) throws IOException
-	{
-		boolean end = false;
-		long toskip = n;
-		while (toskip > 0 && !end)
-		{
-			if (bufferPosn < bufferLength)
-			{
-				int skipped = (int)Math.min(bufferLength - bufferPosn, toskip);
-				bufferPosn += skipped;
-				toskip -= skipped;
-			}
-			if (bufferPosn >= bufferLength)
-			{
-				int loaded = loadBuffer();
-				end = loaded == 0;
-			}
-		}
-		return n - toskip;
-	}
+   */
+  public long skip(long n) throws IOException {
+    boolean end = false;
+    long toskip = n;
+    while (toskip > 0 && !end) {
+      if (bufferPosn < bufferLength) {
+        int skipped = (int) Math.min(bufferLength - bufferPosn, toskip);
+        bufferPosn += skipped;
+        toskip -= skipped;
+      }
+      if (bufferPosn >= bufferLength) {
+        int loaded = loadBuffer();
+        end = loaded == 0;
+      }
+    }
+    return n - toskip;
+  }
 
-	protected int loadBuffer() throws IOException
-	{
-		bufferLength = in.read(buffer);
-		if (bufferLength < 0) // if EOF read returns -1
-			bufferLength = 0;
-		bufferPosn = 0;
-		return bufferLength;
-	}
+  protected int loadBuffer() throws IOException {
+    bufferLength = in.read(buffer);
+    if (bufferLength < 0) // if EOF read returns -1
+    {
+      bufferLength = 0;
+    }
+    bufferPosn = 0;
+    return bufferLength;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/QseqInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/QseqInputFormat.java
index 1432671..3b392e0 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/QseqInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/QseqInputFormat.java
@@ -25,7 +25,6 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.CharacterCodingException;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -39,405 +38,407 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-
 import org.seqdoop.hadoop_bam.FormatConstants.BaseQualityEncoding;
 import org.seqdoop.hadoop_bam.util.ConfHelper;
 
 /**
- * Reads the Illumina qseq sequence format.
- * Key: instrument, run number, lane, tile, xpos, ypos, read number, delimited by ':' characters.
- * Value:  a SequencedFragment object representing the entry.
+ * Reads the Illumina qseq sequence format. Key: instrument, run number, lane, tile, xpos, ypos,
+ * read number, delimited by ':' characters. Value: a SequencedFragment object representing the
+ * entry.
  */
-public class QseqInputFormat extends FileInputFormat<Text,SequencedFragment>
-{
-	public static final String CONF_BASE_QUALITY_ENCODING = "hbam.qseq-input.base-quality-encoding";
-	public static final String CONF_FILTER_FAILED_QC      = "hbam.qseq-input.filter-failed-qc";
-	public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "illumina";
-
-	public static class QseqRecordReader extends RecordReader<Text,SequencedFragment>
-	{
-		/*
-		 * qseq format:
-		 * 11 tab-separated columns
-		 *
-		 * 1) Instrument
-		 * 2) Run id
-		 * 3) Lane number
-		 * 4) Tile number
-		 * 5) X pos
-		 * 6) Y pos
-		 * 7) Index sequence (0 for runs without multiplexing)
-		 * 8) Read Number
-		 * 9) Base Sequence
-		 * 10) Base Quality
-		 * 11) Filter: did the read pass filtering? 0 - No, 1 - Yes.
-		 */
-		// start:  first valid data index
-		private long start;
-		// end:  first index value beyond the slice, i.e. slice is in range [start,end)
-		private long end;
-		// pos: current position in file
-		private long pos;
-		// file:  the file being read
-		private Path file;
-
-		private LineReader lineReader;
-		private InputStream inputStream;
-		private Text currentKey = new Text();
-		private SequencedFragment currentValue = new SequencedFragment();
-
-		private Text buffer = new Text();
-		private static final int NUM_QSEQ_COLS = 11;
-		// for these, we have one per qseq field
-		private int[] fieldPositions = new int[NUM_QSEQ_COLS];
-		private int[] fieldLengths = new int[NUM_QSEQ_COLS];
-
-		private BaseQualityEncoding qualityEncoding;
-		private boolean filterFailedQC = false;
-
-		private static final String Delim = "\t";
-
-		// How long can a qseq line get?
-		public static final int MAX_LINE_LENGTH = 20000;
-
-		public QseqRecordReader(Configuration conf, FileSplit split) throws IOException
-		{
-			setConf(conf);
-			file = split.getPath();
-			start = split.getStart();
-			end = start + split.getLength();
-
-			FileSystem fs = file.getFileSystem(conf);
-			FSDataInputStream fileIn = fs.open(file);
-
-			CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
-			CompressionCodec        codec        = codecFactory.getCodec(file);
-
-			if (codec == null) // no codec.  Uncompressed file.
-			{
-				positionAtFirstRecord(fileIn);
-				inputStream = fileIn;
-			}
-			else
-			{ // compressed file
-				if (start != 0)
-					throw new RuntimeException("Start position for compressed file is not 0! (found " + start + ")");
-
-				inputStream = codec.createInputStream(fileIn);
-				end = Long.MAX_VALUE; // read until the end of the file
-			}
-
-			lineReader = new LineReader(inputStream);
-		}
-
-		/*
-		 * Position the input stream at the start of the first record.
-		 */
-		private void positionAtFirstRecord(FSDataInputStream stream) throws IOException
-		{
-			if (start > 0)
-			{
-				// Advance to the start of the first line in our slice.
-				// We use a temporary LineReader to read a partial line and find the
-				// start of the first one on or after our starting position.
-				// In case our slice starts right at the beginning of a line, we need to back
-				// up by one position and then discard the first line.
-				start -= 1;
-				stream.seek(start);
-				LineReader reader = new LineReader(stream);
-				int bytesRead = reader.readLine(buffer, (int)Math.min(MAX_LINE_LENGTH, end - start));
-				start = start + bytesRead;
-				stream.seek(start);
-			}
-			// else
-			//	if start == 0 we're starting at the beginning of a line
-			pos = start;
-		}
-
-		protected void setConf(Configuration conf)
-		{
-			String encoding =
-			  conf.get(QseqInputFormat.CONF_BASE_QUALITY_ENCODING,
-			    conf.get(FormatConstants.CONF_INPUT_BASE_QUALITY_ENCODING,
-			      CONF_BASE_QUALITY_ENCODING_DEFAULT));
-
-			if ("illumina".equals(encoding))
-				qualityEncoding = BaseQualityEncoding.Illumina;
-			else if ("sanger".equals(encoding))
-				qualityEncoding = BaseQualityEncoding.Sanger;
-			else
-				throw new RuntimeException("Unknown input base quality encoding value " + encoding);
-
-			filterFailedQC = ConfHelper.parseBoolean(
-			  conf.get(QseqInputFormat.CONF_FILTER_FAILED_QC,
-			    conf.get(FormatConstants.CONF_INPUT_FILTER_FAILED_QC)),
-			      false);
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public void initialize(InputSplit split, TaskAttemptContext context) throws IOException, InterruptedException
-		{
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public Text getCurrentKey()
-		{
-			return currentKey;
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public SequencedFragment getCurrentValue()
-	 	{
-			return currentValue;
-		}
-
-		/**
-		 * Added to use mapreduce API.
-		 */
-		public boolean nextKeyValue() throws IOException, InterruptedException
-		{
-			return next(currentKey, currentValue);
-		}
-
-		/**
-		 * Close this RecordReader to future operations.
-		 */
-		public void close() throws IOException
-		{
-			inputStream.close();
-		}
-
-		/**
-		 * Create an object of the appropriate type to be used as a key.
-		 */
-		public Text createKey()
-		{
-			return new Text();
-		}
-
-		/**
-		 * Create an object of the appropriate type to be used as a value.
-		 */
-		public SequencedFragment createValue()
-		{
-			return new SequencedFragment();
-		}
-
-		/**
-		 * Returns the current position in the input.
-		 */
-		public long getPos() { return pos; }
-
-		/**
-		 * How much of the input has the RecordReader consumed i.e.
-		 */
-		public float getProgress()
-		{
-			if (start == end)
-				return 1.0f;
-			else
-				return Math.min(1.0f, (pos - start) / (float)(end - start));
-		}
-
-		public String makePositionMessage(long pos)
-		{
-			return file.toString() + ":" + pos;
-		}
-
-		public String makePositionMessage()
-		{
-			return file.toString() + ":" + pos;
-		}
-
-		/*
-		 * Read a single record.
-		 *
-		 * Reads a single line of input and scans it with scanQseqLine, which
-		 * sets key and value accordingly.  The method updates this.pos.
-		 *
-		 * @return The number of bytes read.  If no bytes were read, the EOF was reached.
-		 */
-		private int lowLevelQseqRead(Text key, SequencedFragment value) throws IOException
-		{
-			int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
-			pos += bytesRead;
-			if (bytesRead >= MAX_LINE_LENGTH)
-			{
-				String line;
-				try {
-					line = Text.decode(buffer.getBytes(), 0, 500);
-				} catch (java.nio.charset.CharacterCodingException e) {
-					line = "(line not convertible to printable format)";
-				}
-				throw new RuntimeException("found abnormally large line (length " + bytesRead + ") at " +
-				            makePositionMessage(pos - bytesRead) + ": " + line);
-			}
-			else if (bytesRead > 0)
-				scanQseqLine(buffer, key, value);
-
-			return bytesRead;
-		}
-
-		/**
-		 * Reads the next key/value pair from the input for processing.
-		 */
-		public boolean next(Text key, SequencedFragment value) throws IOException
-		{
-			if (pos >= end)
-				return false; // past end of slice
-
-			int bytesRead = 0;
-			boolean goodRecord;
-			do {
-				bytesRead = lowLevelQseqRead(key, value); // if bytesRead <= 0 EOF has been reached
-				goodRecord = (bytesRead > 0) && (!filterFailedQC || value.getFilterPassed() == null || value.getFilterPassed());
-			} while (bytesRead > 0 && !goodRecord);
-
-			if (goodRecord) // post process the record only if it's going to be used
-			{
-				try {
-					postProcessSequencedFragment(value);
-				} catch (FormatException e) {
-					throw new FormatException(e.getMessage() + " Position: " + makePositionMessage(this.pos - bytesRead) +
-					            "; line: " + buffer); // last line read is still in the buffer
-				}
-			}
-
-			return goodRecord;
-		}
-
-		/*
-		 * Scans the text line to find the position and the lengths of the fields
-		 * within it. The positions and lengths are saved into the instance arrays
-		 * 'fieldPositions' and 'fieldLengths'.
-		 *
-		 * @exception FormatException Line doesn't have the expected number of fields.
-		 */
-		private void setFieldPositionsAndLengths(Text line)
-		{
-			int pos = 0; // the byte position within the record
-			int fieldno = 0; // the field index within the record
-			while (pos < line.getLength() && fieldno < NUM_QSEQ_COLS) // iterate over each field
-			{
-				int endpos = line.find(Delim, pos); // the field's end position
-				if (endpos < 0)
-					endpos = line.getLength();
-
-				fieldPositions[fieldno] = pos;
-				fieldLengths[fieldno] = endpos - pos;
-
-				pos = endpos + 1; // the next starting position is the current end + 1
-				fieldno += 1;
-			}
-
-			if (fieldno != NUM_QSEQ_COLS)
-				throw new FormatException("found " + fieldno + " fields instead of 11 at " +
-				            makePositionMessage(this.pos - line.getLength()) + ". Line: " + line);
-		}
-
-		private void scanQseqLine(Text line, Text key, SequencedFragment fragment)
-		{
-			setFieldPositionsAndLengths(line);
-
-			// Build the key.  We concatenate all fields from 0 to 5 (machine to y-pos)
-			// and then the read number, replacing the tabs with colons.
-			key.clear();
-			// append up and including field[5]
-			key.append(line.getBytes(), 0, fieldPositions[5] + fieldLengths[5]);
-			// replace tabs with :
-			byte[] bytes = key.getBytes();
-			int temporaryEnd = key.getLength();
-			for (int i = 0; i < temporaryEnd; ++i)
-				if (bytes[i] == '\t')
-					bytes[i] = ':';
-			// append the read number
-			key.append(line.getBytes(), fieldPositions[7] - 1, fieldLengths[7] + 1); // +/- 1 to catch the preceding tab.
-			// convert the tab preceding the read number into a :
-			key.getBytes()[temporaryEnd] = ':';
-
-			// now the fragment
-			try
-			{
-				fragment.clear();
-				fragment.setInstrument( Text.decode(line.getBytes(), fieldPositions[0], fieldLengths[0]) );
-				fragment.setRunNumber( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[1], fieldLengths[1])) );
-				//fragment.setFlowcellId();
-				fragment.setLane( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[2], fieldLengths[2])) );
-				fragment.setTile( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[3], fieldLengths[3])) );
-				fragment.setXpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[4], fieldLengths[4])) );
-				fragment.setYpos( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[5], fieldLengths[5])) );
-				fragment.setRead( Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[7], fieldLengths[7])) );
-				fragment.setFilterPassed( line.getBytes()[fieldPositions[10]] != '0' );
-				//fragment.setControlNumber();
-				if (fieldLengths[6] > 0 && line.getBytes()[fieldPositions[6]] == '0') // 0 is a null index sequence
-					fragment.setIndexSequence(null);
-				else
-					fragment.setIndexSequence(Text.decode(line.getBytes(), fieldPositions[6], fieldLengths[6]).replace('.', 'N'));
-			}
-			catch (CharacterCodingException e) {
-				throw new FormatException("Invalid character format at " + makePositionMessage(this.pos - line.getLength()) + "; line: " + line);
-			}
-
-			fragment.getSequence().append(line.getBytes(), fieldPositions[8], fieldLengths[8]);
-			fragment.getQuality().append(line.getBytes(), fieldPositions[9], fieldLengths[9]);
-		}
-
-		/*
-		 * This method applies some transformations to the read and quality data.
-		 *
-		 * <ul>
-		 *   <li>'.' in the read are converted to 'N'</li>
-		 *   <li>the base quality encoding is converted to 'sanger', unless otherwise
-		 *   requested by the configuration.</li>
-		 * </ul>
-		 *
-		 * @exception FormatException Thrown if the record contains base quality scores
-		 * outside the range allowed by the format.
-		 */
-		private void postProcessSequencedFragment(SequencedFragment fragment)
-		{
-			byte[] bytes = fragment.getSequence().getBytes();
-			// replace . with N
-			for (int i = 0; i < fieldLengths[8]; ++i)
-				if (bytes[i] == '.')
-					bytes[i] = 'N';
-
-			if (qualityEncoding == BaseQualityEncoding.Illumina)
-			{
-				// convert illumina to sanger scale
-				SequencedFragment.convertQuality(fragment.getQuality(), BaseQualityEncoding.Illumina, BaseQualityEncoding.Sanger);
-			}
-			else // sanger qualities.
-			{
-				int outOfRangeElement = SequencedFragment.verifyQuality(fragment.getQuality(), BaseQualityEncoding.Sanger);
-				if (outOfRangeElement >= 0)
-				{
-					throw new FormatException("qseq base quality score out of range for Sanger Phred+33 format (found " +
-					    (fragment.getQuality().getBytes()[outOfRangeElement] - FormatConstants.SANGER_OFFSET) + ").\n" +
-					    "Although Sanger format has been requested, maybe qualities are in Illumina Phred+64 format?\n");
-				}
-			}
-		}
-	}
-
-	@Override
-	public boolean isSplitable(JobContext context, Path path)
-	{
-		CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
-		return codec == null;
-	}
-
-	public RecordReader<Text, SequencedFragment> createRecordReader(
-	                                        InputSplit genericSplit,
-	                                        TaskAttemptContext context) throws IOException, InterruptedException
-	{
-		context.setStatus(genericSplit.toString());
-		return new QseqRecordReader(context.getConfiguration(), (FileSplit)genericSplit); // cast as per example in TextInputFormat
-	}
+public class QseqInputFormat extends FileInputFormat<Text, SequencedFragment> {
+
+  public static final String CONF_BASE_QUALITY_ENCODING = "hbam.qseq-input.base-quality-encoding";
+  public static final String CONF_FILTER_FAILED_QC = "hbam.qseq-input.filter-failed-qc";
+  public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "illumina";
+
+  @Override
+  public boolean isSplitable(JobContext context, Path path) {
+    CompressionCodec codec = new CompressionCodecFactory(context.getConfiguration()).getCodec(path);
+    return codec == null;
+  }
+
+  public RecordReader<Text, SequencedFragment> createRecordReader(
+      InputSplit genericSplit, TaskAttemptContext context)
+      throws IOException, InterruptedException {
+    context.setStatus(genericSplit.toString());
+    return new QseqRecordReader(
+        context.getConfiguration(),
+        (FileSplit) genericSplit); // cast as per example in TextInputFormat
+  }
+
+  public static class QseqRecordReader extends RecordReader<Text, SequencedFragment> {
+
+    // How long can a qseq line get?
+    public static final int MAX_LINE_LENGTH = 20000;
+    private static final int NUM_QSEQ_COLS = 11;
+    private static final String Delim = "\t";
+    /*
+     * qseq format:
+     * 11 tab-separated columns
+     *
+     * 1) Instrument
+     * 2) Run id
+     * 3) Lane number
+     * 4) Tile number
+     * 5) X pos
+     * 6) Y pos
+     * 7) Index sequence (0 for runs without multiplexing)
+     * 8) Read Number
+     * 9) Base Sequence
+     * 10) Base Quality
+     * 11) Filter: did the read pass filtering? 0 - No, 1 - Yes.
+     */
+    // start:  first valid data index
+    private long start;
+    // end:  first index value beyond the slice, i.e. slice is in range [start,end)
+    private long end;
+    // pos: current position in file
+    private long pos;
+    // file:  the file being read
+    private Path file;
+    private LineReader lineReader;
+    private InputStream inputStream;
+    private Text currentKey = new Text();
+    private SequencedFragment currentValue = new SequencedFragment();
+    private Text buffer = new Text();
+    // for these, we have one per qseq field
+    private int[] fieldPositions = new int[NUM_QSEQ_COLS];
+    private int[] fieldLengths = new int[NUM_QSEQ_COLS];
+    private BaseQualityEncoding qualityEncoding;
+    private boolean filterFailedQC = false;
+
+    public QseqRecordReader(Configuration conf, FileSplit split) throws IOException {
+      setConf(conf);
+      file = split.getPath();
+      start = split.getStart();
+      end = start + split.getLength();
+
+      FileSystem fs = file.getFileSystem(conf);
+      FSDataInputStream fileIn = fs.open(file);
+
+      CompressionCodecFactory codecFactory = new CompressionCodecFactory(conf);
+      CompressionCodec codec = codecFactory.getCodec(file);
+
+      if (codec == null) // no codec.  Uncompressed file.
+      {
+        positionAtFirstRecord(fileIn);
+        inputStream = fileIn;
+      } else { // compressed file
+        if (start != 0) {
+          throw new RuntimeException(
+              "Start position for compressed file is not 0! (found " + start + ")");
+        }
+
+        inputStream = codec.createInputStream(fileIn);
+        end = Long.MAX_VALUE; // read until the end of the file
+      }
+
+      lineReader = new LineReader(inputStream);
+    }
+
+    /*
+     * Position the input stream at the start of the first record.
+     */
+    private void positionAtFirstRecord(FSDataInputStream stream) throws IOException {
+      if (start > 0) {
+        // Advance to the start of the first line in our slice.
+        // We use a temporary LineReader to read a partial line and find the
+        // start of the first one on or after our starting position.
+        // In case our slice starts right at the beginning of a line, we need to back
+        // up by one position and then discard the first line.
+        start -= 1;
+        stream.seek(start);
+        LineReader reader = new LineReader(stream);
+        int bytesRead = reader.readLine(buffer, (int) Math.min(MAX_LINE_LENGTH, end - start));
+        start = start + bytesRead;
+        stream.seek(start);
+      }
+      // else
+      //	if start == 0 we're starting at the beginning of a line
+      pos = start;
+    }
+
+    protected void setConf(Configuration conf) {
+      String encoding =
+          conf.get(
+              QseqInputFormat.CONF_BASE_QUALITY_ENCODING,
+              conf.get(
+                  FormatConstants.CONF_INPUT_BASE_QUALITY_ENCODING,
+                  CONF_BASE_QUALITY_ENCODING_DEFAULT));
+
+      if ("illumina".equals(encoding)) {
+        qualityEncoding = BaseQualityEncoding.Illumina;
+      } else if ("sanger".equals(encoding)) {
+        qualityEncoding = BaseQualityEncoding.Sanger;
+      } else {
+        throw new RuntimeException("Unknown input base quality encoding value " + encoding);
+      }
+
+      filterFailedQC =
+          ConfHelper.parseBoolean(
+              conf.get(
+                  QseqInputFormat.CONF_FILTER_FAILED_QC,
+                  conf.get(FormatConstants.CONF_INPUT_FILTER_FAILED_QC)),
+              false);
+    }
+
+    /** Added to use mapreduce API. */
+    public void initialize(InputSplit split, TaskAttemptContext context)
+        throws IOException, InterruptedException {}
+
+    /** Added to use mapreduce API. */
+    public Text getCurrentKey() {
+      return currentKey;
+    }
+
+    /** Added to use mapreduce API. */
+    public SequencedFragment getCurrentValue() {
+      return currentValue;
+    }
+
+    /** Added to use mapreduce API. */
+    public boolean nextKeyValue() throws IOException, InterruptedException {
+      return next(currentKey, currentValue);
+    }
+
+    /** Close this RecordReader to future operations. */
+    public void close() throws IOException {
+      inputStream.close();
+    }
+
+    /** Create an object of the appropriate type to be used as a key. */
+    public Text createKey() {
+      return new Text();
+    }
+
+    /** Create an object of the appropriate type to be used as a value. */
+    public SequencedFragment createValue() {
+      return new SequencedFragment();
+    }
+
+    /** Returns the current position in the input. */
+    public long getPos() {
+      return pos;
+    }
+
+    /** How much of the input has the RecordReader consumed i.e. */
+    public float getProgress() {
+      if (start == end) {
+        return 1.0f;
+      } else {
+        return Math.min(1.0f, (pos - start) / (float) (end - start));
+      }
+    }
+
+    public String makePositionMessage(long pos) {
+      return file.toString() + ":" + pos;
+    }
+
+    public String makePositionMessage() {
+      return file.toString() + ":" + pos;
+    }
+
+    /*
+     * Read a single record.
+     *
+     * Reads a single line of input and scans it with scanQseqLine, which
+     * sets key and value accordingly.  The method updates this.pos.
+     *
+     * @return The number of bytes read.  If no bytes were read, the EOF was reached.
+     */
+    private int lowLevelQseqRead(Text key, SequencedFragment value) throws IOException {
+      int bytesRead = lineReader.readLine(buffer, MAX_LINE_LENGTH);
+      pos += bytesRead;
+      if (bytesRead >= MAX_LINE_LENGTH) {
+        String line;
+        try {
+          line = Text.decode(buffer.getBytes(), 0, 500);
+        } catch (java.nio.charset.CharacterCodingException e) {
+          line = "(line not convertible to printable format)";
+        }
+        throw new RuntimeException(
+            "found abnormally large line (length "
+                + bytesRead
+                + ") at "
+                + makePositionMessage(pos - bytesRead)
+                + ": "
+                + line);
+      } else if (bytesRead > 0) {
+        scanQseqLine(buffer, key, value);
+      }
+
+      return bytesRead;
+    }
+
+    /** Reads the next key/value pair from the input for processing. */
+    public boolean next(Text key, SequencedFragment value) throws IOException {
+      if (pos >= end) {
+        return false; // past end of slice
+      }
+
+      int bytesRead = 0;
+      boolean goodRecord;
+      do {
+        bytesRead = lowLevelQseqRead(key, value); // if bytesRead <= 0 EOF has been reached
+        goodRecord =
+            (bytesRead > 0)
+                && (!filterFailedQC || value.getFilterPassed() == null || value.getFilterPassed());
+      } while (bytesRead > 0 && !goodRecord);
+
+      if (goodRecord) // post process the record only if it's going to be used
+      {
+        try {
+          postProcessSequencedFragment(value);
+        } catch (FormatException e) {
+          throw new FormatException(
+              e.getMessage()
+                  + " Position: "
+                  + makePositionMessage(this.pos - bytesRead)
+                  + "; line: "
+                  + buffer); // last line read is still in the buffer
+        }
+      }
+
+      return goodRecord;
+    }
+
+    /*
+     * Scans the text line to find the position and the lengths of the fields
+     * within it. The positions and lengths are saved into the instance arrays
+     * 'fieldPositions' and 'fieldLengths'.
+     *
+     * @exception FormatException Line doesn't have the expected number of fields.
+     */
+    private void setFieldPositionsAndLengths(Text line) {
+      int pos = 0; // the byte position within the record
+      int fieldno = 0; // the field index within the record
+      while (pos < line.getLength() && fieldno < NUM_QSEQ_COLS) // iterate over each field
+      {
+        int endpos = line.find(Delim, pos); // the field's end position
+        if (endpos < 0) {
+          endpos = line.getLength();
+        }
+
+        fieldPositions[fieldno] = pos;
+        fieldLengths[fieldno] = endpos - pos;
+
+        pos = endpos + 1; // the next starting position is the current end + 1
+        fieldno += 1;
+      }
+
+      if (fieldno != NUM_QSEQ_COLS) {
+        throw new FormatException(
+            "found "
+                + fieldno
+                + " fields instead of 11 at "
+                + makePositionMessage(this.pos - line.getLength())
+                + ". Line: "
+                + line);
+      }
+    }
+
+    private void scanQseqLine(Text line, Text key, SequencedFragment fragment) {
+      setFieldPositionsAndLengths(line);
+
+      // Build the key.  We concatenate all fields from 0 to 5 (machine to y-pos)
+      // and then the read number, replacing the tabs with colons.
+      key.clear();
+      // append up and including field[5]
+      key.append(line.getBytes(), 0, fieldPositions[5] + fieldLengths[5]);
+      // replace tabs with :
+      byte[] bytes = key.getBytes();
+      int temporaryEnd = key.getLength();
+      for (int i = 0; i < temporaryEnd; ++i) {
+        if (bytes[i] == '\t') {
+          bytes[i] = ':';
+        }
+      }
+      // append the read number
+      key.append(
+          line.getBytes(),
+          fieldPositions[7] - 1,
+          fieldLengths[7] + 1); // +/- 1 to catch the preceding tab.
+      // convert the tab preceding the read number into a :
+      key.getBytes()[temporaryEnd] = ':';
+
+      // now the fragment
+      try {
+        fragment.clear();
+        fragment.setInstrument(Text.decode(line.getBytes(), fieldPositions[0], fieldLengths[0]));
+        fragment.setRunNumber(
+            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[1], fieldLengths[1])));
+        // fragment.setFlowcellId();
+        fragment.setLane(
+            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[2], fieldLengths[2])));
+        fragment.setTile(
+            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[3], fieldLengths[3])));
+        fragment.setXpos(
+            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[4], fieldLengths[4])));
+        fragment.setYpos(
+            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[5], fieldLengths[5])));
+        fragment.setRead(
+            Integer.parseInt(Text.decode(line.getBytes(), fieldPositions[7], fieldLengths[7])));
+        fragment.setFilterPassed(line.getBytes()[fieldPositions[10]] != '0');
+        // fragment.setControlNumber();
+        if (fieldLengths[6] > 0
+            && line.getBytes()[fieldPositions[6]] == '0') // 0 is a null index sequence
+        {
+          fragment.setIndexSequence(null);
+        } else {
+          fragment.setIndexSequence(
+              Text.decode(line.getBytes(), fieldPositions[6], fieldLengths[6]).replace('.', 'N'));
+        }
+      } catch (CharacterCodingException e) {
+        throw new FormatException(
+            "Invalid character format at "
+                + makePositionMessage(this.pos - line.getLength())
+                + "; line: "
+                + line);
+      }
+
+      fragment.getSequence().append(line.getBytes(), fieldPositions[8], fieldLengths[8]);
+      fragment.getQuality().append(line.getBytes(), fieldPositions[9], fieldLengths[9]);
+    }
+
+    /*
+     * This method applies some transformations to the read and quality data.
+     *
+     * <ul>
+     *   <li>'.' in the read are converted to 'N'</li>
+     *   <li>the base quality encoding is converted to 'sanger', unless otherwise
+     *   requested by the configuration.</li>
+     * </ul>
+     *
+     * @exception FormatException Thrown if the record contains base quality scores
+     * outside the range allowed by the format.
+     */
+    private void postProcessSequencedFragment(SequencedFragment fragment) {
+      byte[] bytes = fragment.getSequence().getBytes();
+      // replace . with N
+      for (int i = 0; i < fieldLengths[8]; ++i) {
+        if (bytes[i] == '.') {
+          bytes[i] = 'N';
+        }
+      }
+
+      if (qualityEncoding == BaseQualityEncoding.Illumina) {
+        // convert illumina to sanger scale
+        SequencedFragment.convertQuality(
+            fragment.getQuality(), BaseQualityEncoding.Illumina, BaseQualityEncoding.Sanger);
+      } else // sanger qualities.
+      {
+        int outOfRangeElement =
+            SequencedFragment.verifyQuality(fragment.getQuality(), BaseQualityEncoding.Sanger);
+        if (outOfRangeElement >= 0) {
+          throw new FormatException(
+              "qseq base quality score out of range for Sanger Phred+33 format (found "
+                  + (fragment.getQuality().getBytes()[outOfRangeElement]
+                      - FormatConstants.SANGER_OFFSET)
+                  + ").\n"
+                  + "Although Sanger format has been requested, maybe qualities are in Illumina Phred+64 format?\n");
+        }
+      }
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/QseqOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/QseqOutputFormat.java
index c54031d..1e01701 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/QseqOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/QseqOutputFormat.java
@@ -23,10 +23,9 @@
 package org.seqdoop.hadoop_bam;
 
 import java.io.DataOutputStream;
-import java.io.OutputStream;
 import java.io.IOException;
+import java.io.OutputStream;
 import java.nio.ByteBuffer;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataOutputStream;
 import org.apache.hadoop.fs.FileSystem;
@@ -38,159 +37,159 @@
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
 import org.apache.hadoop.util.ReflectionUtils;
-
 import org.seqdoop.hadoop_bam.FormatConstants.BaseQualityEncoding;
 
 /**
- * Output format for Illumina qseq format.
- * Records are lines of tab-separated fields.  Each record consists of
- *   - Machine name
- *   - Run number
- *   - Lane number
- *   - Tile number
- *   - X coordinate of the spot. Integer (can be negative).
- *   - Y coordinate of the spot. Integer (can be negative).
- *   - Index
- *   - Read Number
- *   - Sequence
- *   - Quality
- *   - Filter
+ * Output format for Illumina qseq format. Records are lines of tab-separated fields. Each record
+ * consists of - Machine name - Run number - Lane number - Tile number - X coordinate of the spot.
+ * Integer (can be negative). - Y coordinate of the spot. Integer (can be negative). - Index - Read
+ * Number - Sequence - Quality - Filter
  */
-public class QseqOutputFormat extends TextOutputFormat<Text, SequencedFragment>
-{
-	public static final String CONF_BASE_QUALITY_ENCODING = "hbam.qseq-output.base-quality-encoding";
-	public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "illumina";
-
-	public static class QseqRecordWriter extends RecordWriter<Text,SequencedFragment>
-	{
-		static final byte[] newLine;
-		static final String delim = "\t";
-		static {
-			try {
-				newLine = "\n".getBytes("us-ascii");
-			} catch (java.io.UnsupportedEncodingException e) {
-				throw new RuntimeException("us-ascii encoding not supported!");
-			}
-		}
-
-		protected StringBuilder sBuilder = new StringBuilder(800);
-		protected OutputStream out;
-		BaseQualityEncoding baseQualityFormat;
-
-		public QseqRecordWriter(Configuration conf, OutputStream out)
-		{
-			baseQualityFormat = BaseQualityEncoding.Illumina;
-			this.out = out;
-			setConf(conf);
-		}
-
-		public void setConf(Configuration conf)
-		{
-			String setting = conf.get(CONF_BASE_QUALITY_ENCODING, CONF_BASE_QUALITY_ENCODING_DEFAULT);
-			if ("illumina".equals(setting))
-				baseQualityFormat = BaseQualityEncoding.Illumina;
-			else if ("sanger".equals(setting))
-				baseQualityFormat = BaseQualityEncoding.Sanger;
-			else
-				throw new RuntimeException("Invalid property value '" + setting + "' for " + CONF_BASE_QUALITY_ENCODING + ".  Valid values are 'illumina' or 'sanger'");
-		}
-
-		public void write(Text ignored_key, SequencedFragment seq) throws IOException
-		{
-			sBuilder.delete(0, sBuilder.length()); // clear
-
-			sBuilder.append( seq.getInstrument() == null ? "" : seq.getInstrument() ).append(delim);
-			sBuilder.append( seq.getRunNumber() == null ? "" : seq.getRunNumber().toString() ).append(delim);
-			sBuilder.append( seq.getLane() == null ? "" : seq.getLane().toString() ).append(delim);
-			sBuilder.append( seq.getTile() == null ? "" : seq.getTile().toString() ).append(delim);
-			sBuilder.append( seq.getXpos() == null ? "" : seq.getXpos().toString() ).append(delim);
-			sBuilder.append( seq.getYpos() == null ? "" : seq.getYpos().toString() ).append(delim);
-
-			String index;
-			if (seq.getIndexSequence() == null || seq.getIndexSequence().isEmpty())
-				index = "0";
-			else
-				index = seq.getIndexSequence().replace('N', '.');
-			sBuilder.append( index ).append(delim);
-
-			sBuilder.append( seq.getRead() == null ? "" : seq.getRead().toString() ).append(delim);
-			// here we also replace 'N' with '.'
-			sBuilder.append( seq.getSequence() == null ? "" : seq.getSequence().toString().replace('N', '.')).append(delim);
-
-			//////// quality may have to be re-coded
-			if (seq.getQuality() == null)
-				sBuilder.append("");
-			else
-			{
-				int startPos = sBuilder.length();
-				sBuilder.append(seq.getQuality().toString());
-				if (baseQualityFormat == BaseQualityEncoding.Sanger)
-				{
-					//  do nothing
-				}
-				else if (baseQualityFormat == BaseQualityEncoding.Illumina)
-				{
-					// recode the quality in-place
-					for (int i = startPos; i < sBuilder.length(); ++i)
-					{
-						// cast to avoid warning about possible loss of precision for assigning a char from an int.
-						char newValue = (char)(sBuilder.charAt(i) + 31); // 64 - 33 = 31: difference between illumina and sanger encoding
-						if (newValue > 126)
-							throw new RuntimeException("output quality score over allowed range.  Maybe you meant to write in Sanger format?");
-						sBuilder.setCharAt(i, newValue);
-					}
-				}
-				else
-					throw new RuntimeException("BUG!  Unknown base quality format value " + baseQualityFormat + " in QseqRecordWriter");
-			}
-			sBuilder.append(delim);
-			/////////
-			sBuilder.append((seq.getFilterPassed() == null || seq.getFilterPassed() ) ? 1 : 0);
-
-			try {
-				ByteBuffer buf = Text.encode(sBuilder.toString());
-				out.write(buf.array(), 0, buf.limit());
-			} catch (java.nio.charset.CharacterCodingException e) {
-				throw new RuntimeException("Error encoding qseq record: " + seq);
-			}
-			out.write(newLine, 0, newLine.length);
-		}
-
-		public void close(TaskAttemptContext context) throws IOException
-		{
-			out.close();
-		}
-	}
-
-	public RecordWriter<Text,SequencedFragment> getRecordWriter(TaskAttemptContext task)
-	  throws IOException
-	{
-		Configuration conf = task.getConfiguration();
-		boolean isCompressed = getCompressOutput(task);
-
-		CompressionCodec codec = null;
-		String extension = "";
-
-		if (isCompressed)
-		{
-			Class<? extends CompressionCodec> codecClass = getOutputCompressorClass(task, GzipCodec.class);
-			codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
-			extension = codec.getDefaultExtension();
-		}
-
-		Path file = getDefaultWorkFile(task, extension);
-		FileSystem fs = file.getFileSystem(conf);
-
-		OutputStream output;
-
-		if (isCompressed)
-		{
-			FSDataOutputStream fileOut = fs.create(file, false);
-			output = new DataOutputStream(codec.createOutputStream(fileOut));
-		}
-		else
-			output = fs.create(file, false);
-
-		return new QseqRecordWriter(conf, output);
-	}
+public class QseqOutputFormat extends TextOutputFormat<Text, SequencedFragment> {
+
+  public static final String CONF_BASE_QUALITY_ENCODING = "hbam.qseq-output.base-quality-encoding";
+  public static final String CONF_BASE_QUALITY_ENCODING_DEFAULT = "illumina";
+
+  public RecordWriter<Text, SequencedFragment> getRecordWriter(TaskAttemptContext task)
+      throws IOException {
+    Configuration conf = task.getConfiguration();
+    boolean isCompressed = getCompressOutput(task);
+
+    CompressionCodec codec = null;
+    String extension = "";
+
+    if (isCompressed) {
+      Class<? extends CompressionCodec> codecClass =
+          getOutputCompressorClass(task, GzipCodec.class);
+      codec = (CompressionCodec) ReflectionUtils.newInstance(codecClass, conf);
+      extension = codec.getDefaultExtension();
+    }
+
+    Path file = getDefaultWorkFile(task, extension);
+    FileSystem fs = file.getFileSystem(conf);
+
+    OutputStream output;
+
+    if (isCompressed) {
+      FSDataOutputStream fileOut = fs.create(file, false);
+      output = new DataOutputStream(codec.createOutputStream(fileOut));
+    } else {
+      output = fs.create(file, false);
+    }
+
+    return new QseqRecordWriter(conf, output);
+  }
+
+  public static class QseqRecordWriter extends RecordWriter<Text, SequencedFragment> {
+
+    static final byte[] newLine;
+    static final String delim = "\t";
+
+    static {
+      try {
+        newLine = "\n".getBytes("us-ascii");
+      } catch (java.io.UnsupportedEncodingException e) {
+        throw new RuntimeException("us-ascii encoding not supported!");
+      }
+    }
+
+    protected StringBuilder sBuilder = new StringBuilder(800);
+    protected OutputStream out;
+    BaseQualityEncoding baseQualityFormat;
+
+    public QseqRecordWriter(Configuration conf, OutputStream out) {
+      baseQualityFormat = BaseQualityEncoding.Illumina;
+      this.out = out;
+      setConf(conf);
+    }
+
+    public void setConf(Configuration conf) {
+      String setting = conf.get(CONF_BASE_QUALITY_ENCODING, CONF_BASE_QUALITY_ENCODING_DEFAULT);
+      if ("illumina".equals(setting)) {
+        baseQualityFormat = BaseQualityEncoding.Illumina;
+      } else if ("sanger".equals(setting)) {
+        baseQualityFormat = BaseQualityEncoding.Sanger;
+      } else {
+        throw new RuntimeException(
+            "Invalid property value '"
+                + setting
+                + "' for "
+                + CONF_BASE_QUALITY_ENCODING
+                + ".  Valid values are 'illumina' or 'sanger'");
+      }
+    }
+
+    public void write(Text ignored_key, SequencedFragment seq) throws IOException {
+      sBuilder.delete(0, sBuilder.length()); // clear
+
+      sBuilder.append(seq.getInstrument() == null ? "" : seq.getInstrument()).append(delim);
+      sBuilder
+          .append(seq.getRunNumber() == null ? "" : seq.getRunNumber().toString())
+          .append(delim);
+      sBuilder.append(seq.getLane() == null ? "" : seq.getLane().toString()).append(delim);
+      sBuilder.append(seq.getTile() == null ? "" : seq.getTile().toString()).append(delim);
+      sBuilder.append(seq.getXpos() == null ? "" : seq.getXpos().toString()).append(delim);
+      sBuilder.append(seq.getYpos() == null ? "" : seq.getYpos().toString()).append(delim);
+
+      String index;
+      if (seq.getIndexSequence() == null || seq.getIndexSequence().isEmpty()) {
+        index = "0";
+      } else {
+        index = seq.getIndexSequence().replace('N', '.');
+      }
+      sBuilder.append(index).append(delim);
+
+      sBuilder.append(seq.getRead() == null ? "" : seq.getRead().toString()).append(delim);
+      // here we also replace 'N' with '.'
+      sBuilder
+          .append(seq.getSequence() == null ? "" : seq.getSequence().toString().replace('N', '.'))
+          .append(delim);
+
+      //////// quality may have to be re-coded
+      if (seq.getQuality() == null) {
+        sBuilder.append("");
+      } else {
+        int startPos = sBuilder.length();
+        sBuilder.append(seq.getQuality().toString());
+        if (baseQualityFormat == BaseQualityEncoding.Sanger) {
+          //  do nothing
+        } else if (baseQualityFormat == BaseQualityEncoding.Illumina) {
+          // recode the quality in-place
+          for (int i = startPos; i < sBuilder.length(); ++i) {
+            // cast to avoid warning about possible loss of precision for assigning a char from an
+            // int.
+            char newValue =
+                (char)
+                    (sBuilder.charAt(i)
+                        + 31); // 64 - 33 = 31: difference between illumina and sanger encoding
+            if (newValue > 126) {
+              throw new RuntimeException(
+                  "output quality score over allowed range.  Maybe you meant to write in Sanger format?");
+            }
+            sBuilder.setCharAt(i, newValue);
+          }
+        } else {
+          throw new RuntimeException(
+              "BUG!  Unknown base quality format value "
+                  + baseQualityFormat
+                  + " in QseqRecordWriter");
+        }
+      }
+      sBuilder.append(delim);
+      /////////
+      sBuilder.append((seq.getFilterPassed() == null || seq.getFilterPassed()) ? 1 : 0);
+
+      try {
+        ByteBuffer buf = Text.encode(sBuilder.toString());
+        out.write(buf.array(), 0, buf.limit());
+      } catch (java.nio.charset.CharacterCodingException e) {
+        throw new RuntimeException("Error encoding qseq record: " + seq);
+      }
+      out.write(newLine, 0, newLine.length);
+    }
+
+    public void close(TaskAttemptContext context) throws IOException {
+      out.close();
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/ReferenceFragment.java b/src/main/java/org/seqdoop/hadoop_bam/ReferenceFragment.java
index 5455288..ee52ec1 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/ReferenceFragment.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/ReferenceFragment.java
@@ -22,130 +22,132 @@
 
 package org.seqdoop.hadoop_bam;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
 
-import java.io.IOException;
-import java.io.DataInput;
-import java.io.DataOutput;
-
 // partly based on SequencedFragment
-// note: this class is supposed to represent a single line of a fasta input file, augmented by chromosome/contig name and start position
-
-public class ReferenceFragment implements Writable
-{
-    protected Text sequence = new Text();
-    
-    protected Integer position;
-    protected String indexSequence;
-
-    public void clear()
-    {
-	sequence.clear();
-	indexSequence = null;
-	position = null;
+// note: this class is supposed to represent a single line of a fasta input file, augmented by
+// chromosome/contig name and start position
+
+public class ReferenceFragment implements Writable {
+
+  protected Text sequence = new Text();
+
+  protected Integer position;
+  protected String indexSequence;
+
+  public void clear() {
+    sequence.clear();
+    indexSequence = null;
+    position = null;
+  }
+
+  /**
+   * Get sequence Text object. Trade encapsulation for efficiency. Here we expose the internal Text
+   * object so that data may be read and written diretly from/to it.
+   *
+   * <p>Sequence should always be written using CAPITAL letters and 'N' for unknown bases.
+   */
+  public Text getSequence() {
+    return sequence;
+  }
+
+  public void setSequence(Text seq) {
+    if (seq == null) {
+      throw new IllegalArgumentException("can't have a null sequence");
     }
-
-    /**
-     * Get sequence Text object.
-     * Trade encapsulation for efficiency.  Here we expose the internal Text
-     * object so that data may be read and written diretly from/to it.
-     *
-     * Sequence should always be written using CAPITAL letters and 'N' for unknown bases.
-     */
-    public Text getSequence() { return sequence; }
-
-    /**
-     * Get quality Text object.
-     * Trade encapsulation for efficiency.  Here we expose the internal Text
-     * object so that data may be read and written diretly from/to it.
-     *
-     */
-    public void setPosition(Integer pos) {
-	if (pos == null)
-	    throw new IllegalArgumentException("can't have null reference position");
-	position = pos;
+    sequence = seq;
+  }
+
+  public Integer getPosition() {
+    return position;
+  }
+
+  /**
+   * Get quality Text object. Trade encapsulation for efficiency. Here we expose the internal Text
+   * object so that data may be read and written diretly from/to it.
+   */
+  public void setPosition(Integer pos) {
+    if (pos == null) {
+      throw new IllegalArgumentException("can't have null reference position");
     }
+    position = pos;
+  }
 
-    public void setIndexSequence(String v) {
-	if (v == null)
-	    throw new IllegalArgumentException("can't have null index sequence");
-	indexSequence = v;
-    }
+  public String getIndexSequence() {
+    return indexSequence;
+  }
 
-    public void setSequence(Text seq)
-    {
-	if (seq == null)
-	    throw new IllegalArgumentException("can't have a null sequence");
-	sequence = seq;
+  public void setIndexSequence(String v) {
+    if (v == null) {
+      throw new IllegalArgumentException("can't have null index sequence");
     }
-
-    public Integer getPosition() { return position; }
-    public String getIndexSequence() { return indexSequence; }
-
-    /**
-     * Recreates a pseudo fasta record with the fields available.
-     */
-    public String toString()
-    {
-	String delim = "\t";
-	StringBuilder builder = new StringBuilder(800);
-	builder.append(indexSequence).append(delim);
-	builder.append(position).append(delim);
-	builder.append(sequence);
-	return builder.toString();
+    indexSequence = v;
+  }
+
+  /** Recreates a pseudo fasta record with the fields available. */
+  public String toString() {
+    String delim = "\t";
+    StringBuilder builder = new StringBuilder(800);
+    builder.append(indexSequence).append(delim);
+    builder.append(position).append(delim);
+    builder.append(sequence);
+    return builder.toString();
+  }
+
+  public boolean equals(Object other) {
+    if (other != null && other instanceof ReferenceFragment) {
+      ReferenceFragment otherFrag = (ReferenceFragment) other;
+
+      if (position == null && otherFrag.position != null
+          || position != null && !position.equals(otherFrag.position)) {
+        return false;
+      }
+      if (indexSequence == null && otherFrag.indexSequence != null
+          || indexSequence != null && !indexSequence.equals(otherFrag.indexSequence)) {
+        return false;
+      }
+      // sequence can't be null
+      if (!sequence.equals(otherFrag.sequence)) {
+        return false;
+      }
+
+      return true;
+    } else {
+      return false;
     }
+  }
 
-    public boolean equals(Object other)
-    {
-	if (other != null && other instanceof ReferenceFragment)
-	    {
-		ReferenceFragment otherFrag = (ReferenceFragment)other;
-
-		if (position == null && otherFrag.position != null || position != null && !position.equals(otherFrag.position))
-		    return false;
-		if (indexSequence == null && otherFrag.indexSequence != null || indexSequence != null && !indexSequence.equals(otherFrag.indexSequence))
-		    return false;
-		// sequence can't be null
-		if (!sequence.equals(otherFrag.sequence))
-		    return false;
-
-		return true;
-	    }
-	else
-	    return false;
-    }
+  @Override
+  public int hashCode() {
+    int result = sequence.hashCode();
+    result = 31 * result + (position != null ? position.hashCode() : 0);
+    result = 31 * result + (indexSequence != null ? indexSequence.hashCode() : 0);
+    return result;
+  }
 
-	@Override
-	public int hashCode() {
-		int result = sequence.hashCode();
-		result = 31 * result + (position != null ? position.hashCode() : 0);
-		result = 31 * result + (indexSequence != null ? indexSequence.hashCode() : 0);
-		return result;
-	}
+  public void readFields(DataInput in) throws IOException {
+    // serialization order:
+    // 1) sequence
+    // 2) indexSequence (chromosome/contig name)
+    // 3) position of first base in this line of the fasta file
 
-	public void readFields(DataInput in) throws IOException
-    {
-	// serialization order:
-	// 1) sequence
-	// 2) indexSequence (chromosome/contig name)
-	// 3) position of first base in this line of the fasta file
+    this.clear();
 
-	this.clear();
+    sequence.readFields(in);
 
-	sequence.readFields(in);
+    indexSequence = WritableUtils.readString(in);
+    position = WritableUtils.readVInt(in);
+  }
 
-	indexSequence = WritableUtils.readString(in);
-	position = WritableUtils.readVInt(in);
-    }
-
-    public void write(DataOutput out) throws IOException
-    {
-	sequence.write(out);
+  public void write(DataOutput out) throws IOException {
+    sequence.write(out);
 
-	WritableUtils.writeString(out, indexSequence);
-	WritableUtils.writeVInt(out, position);
-	
-    }
+    WritableUtils.writeString(out, indexSequence);
+    WritableUtils.writeVInt(out, position);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SAMFormat.java b/src/main/java/org/seqdoop/hadoop_bam/SAMFormat.java
index 60394b2..7e2fd08 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SAMFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SAMFormat.java
@@ -22,42 +22,54 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.InputStream;
 import java.io.IOException;
-
+import java.io.InputStream;
 import org.apache.hadoop.fs.Path;
 
 /** Describes a SAM format. */
 public enum SAMFormat {
-	SAM, BAM, CRAM;
+  SAM,
+  BAM,
+  CRAM;
 
-	/** Infers the SAM format by looking at the filename of the given path.
-	 *
-	 * @see #inferFromFilePath(String)
-	 */
-	public static SAMFormat inferFromFilePath(final Path path) {
-		return inferFromFilePath(path.getName());
-	}
+  /**
+   * Infers the SAM format by looking at the filename of the given path.
+   *
+   * @see #inferFromFilePath(String)
+   */
+  public static SAMFormat inferFromFilePath(final Path path) {
+    return inferFromFilePath(path.getName());
+  }
 
-	/** Infers the SAM format by looking at the extension of the given file
-	 * name. <code>*.sam</code> is recognized as {@link #SAM},
-	 * <code>*.bam</code> as {@link #BAM}, and <code>*.bam</code> as {@link #CRAM}.
-	 */
-	public static SAMFormat inferFromFilePath(final String name) {
-		if (name.endsWith(".bam")) return BAM;
-		if (name.endsWith(".cram")) return CRAM;
-		if (name.endsWith(".sam")) return SAM;
-		return null;
-	}
+  /**
+   * Infers the SAM format by looking at the extension of the given file name. <code>*.sam</code> is
+   * recognized as {@link #SAM}, <code>*.bam</code> as {@link #BAM}, and <code>*.bam</code> as
+   * {@link #CRAM}.
+   */
+  public static SAMFormat inferFromFilePath(final String name) {
+    if (name.endsWith(".bam")) {
+      return BAM;
+    }
+    if (name.endsWith(".cram")) {
+      return CRAM;
+    }
+    if (name.endsWith(".sam")) {
+      return SAM;
+    }
+    return null;
+  }
 
-	public static SAMFormat inferFromData(final InputStream in) throws IOException {
-		final byte b = (byte)in.read();
-		in.close();
-		switch (b) {
-			case 0x1f: return SAMFormat.BAM;
-			case 0x43: return SAMFormat.CRAM;
-			case '@':  return SAMFormat.SAM;
-		}
-		return null;
-	}
+  public static SAMFormat inferFromData(final InputStream in) throws IOException {
+    final byte b = (byte) in.read();
+    in.close();
+    switch (b) {
+      case 0x1f:
+        return SAMFormat.BAM;
+      case 0x43:
+        return SAMFormat.CRAM;
+      case '@':
+        return SAMFormat.SAM;
+    }
+    return null;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SAMInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/SAMInputFormat.java
index 51e3958..19a393c 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SAMInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SAMInputFormat.java
@@ -23,34 +23,31 @@
 package org.seqdoop.hadoop_bam;
 
 import java.io.IOException;
-
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 
-/** An {@link org.apache.hadoop.mapreduce.InputFormat} for SAM files. Values
- * are the individual records; see {@link BAMRecordReader} for the meaning of
- * the key.
+/**
+ * An {@link org.apache.hadoop.mapreduce.InputFormat} for SAM files. Values are the individual
+ * records; see {@link BAMRecordReader} for the meaning of the key.
  */
-public class SAMInputFormat
-	extends FileInputFormat<LongWritable,SAMRecordWritable>
-{
-	/** Returns a {@link SAMRecordReader} initialized with the parameters. */
-	@Override public RecordReader<LongWritable,SAMRecordWritable>
-		createRecordReader(InputSplit split, TaskAttemptContext ctx)
-			throws InterruptedException, IOException
-	{
-		final RecordReader<LongWritable,SAMRecordWritable> rr =
-			new SAMRecordReader();
-		rr.initialize(split, ctx);
-		return rr;
-	}
+public class SAMInputFormat extends FileInputFormat<LongWritable, SAMRecordWritable> {
+
+  /** Returns a {@link SAMRecordReader} initialized with the parameters. */
+  @Override
+  public RecordReader<LongWritable, SAMRecordWritable> createRecordReader(
+      InputSplit split, TaskAttemptContext ctx) throws InterruptedException, IOException {
+    final RecordReader<LongWritable, SAMRecordWritable> rr = new SAMRecordReader();
+    rr.initialize(split, ctx);
+    return rr;
+  }
 
-	@Override public boolean isSplitable(JobContext job, Path path) {
-		return super.isSplitable(job, path);
-	}
+  @Override
+  public boolean isSplitable(JobContext job, Path path) {
+    return super.isSplitable(job, path);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java
index 5de204c..d6159d4 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java
@@ -22,309 +22,341 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMFormatException;
+import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.SAMRecordIterator;
+import htsjdk.samtools.SAMTextHeaderCodec;
+import htsjdk.samtools.SamInputResource;
+import htsjdk.samtools.SamReader;
+import htsjdk.samtools.SamReaderFactory;
+import htsjdk.samtools.ValidationStringency;
 import java.io.ByteArrayInputStream;
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.StringWriter;
 import java.io.UnsupportedEncodingException;
-
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapreduce.InputSplit;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMFormatException;
-import htsjdk.samtools.SAMRecord;
-import htsjdk.samtools.SAMRecordIterator;
-import htsjdk.samtools.SAMTextHeaderCodec;
-import htsjdk.samtools.SamInputResource;
-import htsjdk.samtools.SamReader;
-import htsjdk.samtools.SamReaderFactory;
-import htsjdk.samtools.ValidationStringency;
-
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
 /** See {@link BAMRecordReader} for the meaning of the key. */
-public class SAMRecordReader
-	extends RecordReader<LongWritable,SAMRecordWritable>
-{
-	private LongWritable key = new LongWritable();
-	private SAMRecordWritable record = new SAMRecordWritable();
-
-	private FSDataInputStream input;
-	private SAMRecordIterator iterator;
-	private long start, end;
-	private boolean isInitialized = false;
-
-   private WorkaroundingStream waInput;
-
-	@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
-		throws IOException
-	{
-		// This method should only be called once (see Hadoop API). However,
-		// there seems to be disagreement between implementations that call
-		// initialize() and Hadoop-BAM's own code that relies on
-		// {@link SAMInputFormat} to call initialize() when the reader is
-		// created. Therefore we add this check for the time being. 
-		if(isInitialized)
-			close();
-		isInitialized = true;
-
-		final FileSplit split = (FileSplit)spl;
-
-		this.start =         split.getStart();
-		this.end   = start + split.getLength();
-
-		final Configuration conf = ctx.getConfiguration();
-
-		final ValidationStringency stringency =
-			SAMHeaderReader.getValidationStringency(conf);
-
-		final Path file = split.getPath();
-		final FileSystem fs = file.getFileSystem(conf);
-
-		input = fs.open(file);
-
-		// SAMFileReader likes to make our life difficult, so complexity ensues.
-		// The basic problem is that SAMFileReader buffers its input internally,
-		// which causes two issues.
-		//
-		// Issue #1 is that SAMFileReader requires that its input begins with a
-		// SAM header. This is not fine for reading from the middle of a file.
-		// Because of the buffering, if we have the reader read the header from
-		// the beginning of the file and then seek to where we want to read
-		// records from, it'll have buffered some records from immediately after
-		// the header, which is no good. Thus we need to read the header
-		// separately and then use a custom stream that wraps the input stream,
-		// inserting the header at the beginning of it. (Note the spurious
-		// re-encoding of the header so that the reader can decode it.)
-		//
-		// Issue #2 is handling the boundary between two input splits. The best
-		// way seems to be the classic "in later splits, skip the first line, and
-		// in every split finish reading a partial line at the end of the split",
-		// but that latter part is a bit complicated here. Due to the buffering,
-		// we can easily overshoot: as soon as the stream moves past the end of
-		// the split, SAMFileReader has buffered some records past the end. The
-		// basic fix here is to have our custom stream count the number of bytes
-		// read and to stop after the split size. Unfortunately this prevents us
-		// from reading the last partial line, so our stream actually allows
-		// reading to the next newline after the actual end.
-
-		final SAMFileHeader header = createSamReader(input, stringency).getFileHeader();
-
-		waInput = new WorkaroundingStream(input, header);
-
-		final boolean firstSplit = this.start == 0;
-
-		if (firstSplit) {
-			// Skip the header because we already have it, and adjust the start
-			// to match.
-			final int headerLength = waInput.getRemainingHeaderLength();
-			input.seek(headerLength);
-			this.start += headerLength;
-		} else
-			input.seek(--this.start);
-
-		// Creating the iterator causes reading from the stream, so make sure
-		// to start counting this early.
-		waInput.setLength(this.end - this.start);
-
-		iterator = createSamReader(waInput, stringency).iterator();
-
-		if (!firstSplit) {
-			// Skip the first line, it'll be handled with the previous split.
-			try {
-				if (iterator.hasNext())
-					iterator.next();
-			} catch (SAMFormatException e) {}
-		}
-	}
-
-	private SamReader createSamReader(InputStream in, ValidationStringency stringency) {
-		SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
-				.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
-				.setUseAsyncIo(false);
-		if (stringency != null) {
-			readerFactory.validationStringency(stringency);
-		}
-		return readerFactory.open(SamInputResource.of(in));
-	}
-
-	@Override public void close() throws IOException { iterator.close(); }
-
-	@Override public float getProgress() throws IOException {
-		final long pos = input.getPos();
-		if (pos >= end)
-			return 1;
-		else
-			return (float)(pos - start) / (end - start);
-	}
-	@Override public LongWritable      getCurrentKey  () { return key; }
-	@Override public SAMRecordWritable getCurrentValue() { return record; }
-
-	@Override public boolean nextKeyValue() {
-		if (!iterator.hasNext())
-			return false;
-
-		final SAMRecord r = iterator.next();
-		key.set(BAMRecordReader.getKey(r));
-		record.set(r);
-		return true;
-	}
+public class SAMRecordReader extends RecordReader<LongWritable, SAMRecordWritable> {
+
+  private LongWritable key = new LongWritable();
+  private SAMRecordWritable record = new SAMRecordWritable();
+
+  private FSDataInputStream input;
+  private SAMRecordIterator iterator;
+  private long start, end;
+  private boolean isInitialized = false;
+
+  private WorkaroundingStream waInput;
+
+  @Override
+  public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException {
+    // This method should only be called once (see Hadoop API). However,
+    // there seems to be disagreement between implementations that call
+    // initialize() and Hadoop-BAM's own code that relies on
+    // {@link SAMInputFormat} to call initialize() when the reader is
+    // created. Therefore we add this check for the time being.
+    if (isInitialized) {
+      close();
+    }
+    isInitialized = true;
+
+    final FileSplit split = (FileSplit) spl;
+
+    this.start = split.getStart();
+    this.end = start + split.getLength();
+
+    final Configuration conf = ctx.getConfiguration();
+
+    final ValidationStringency stringency = SAMHeaderReader.getValidationStringency(conf);
+
+    final Path file = split.getPath();
+    final FileSystem fs = file.getFileSystem(conf);
+
+    input = fs.open(file);
+
+    // SAMFileReader likes to make our life difficult, so complexity ensues.
+    // The basic problem is that SAMFileReader buffers its input internally,
+    // which causes two issues.
+    //
+    // Issue #1 is that SAMFileReader requires that its input begins with a
+    // SAM header. This is not fine for reading from the middle of a file.
+    // Because of the buffering, if we have the reader read the header from
+    // the beginning of the file and then seek to where we want to read
+    // records from, it'll have buffered some records from immediately after
+    // the header, which is no good. Thus we need to read the header
+    // separately and then use a custom stream that wraps the input stream,
+    // inserting the header at the beginning of it. (Note the spurious
+    // re-encoding of the header so that the reader can decode it.)
+    //
+    // Issue #2 is handling the boundary between two input splits. The best
+    // way seems to be the classic "in later splits, skip the first line, and
+    // in every split finish reading a partial line at the end of the split",
+    // but that latter part is a bit complicated here. Due to the buffering,
+    // we can easily overshoot: as soon as the stream moves past the end of
+    // the split, SAMFileReader has buffered some records past the end. The
+    // basic fix here is to have our custom stream count the number of bytes
+    // read and to stop after the split size. Unfortunately this prevents us
+    // from reading the last partial line, so our stream actually allows
+    // reading to the next newline after the actual end.
+
+    final SAMFileHeader header = createSamReader(input, stringency).getFileHeader();
+
+    waInput = new WorkaroundingStream(input, header);
+
+    final boolean firstSplit = this.start == 0;
+
+    if (firstSplit) {
+      // Skip the header because we already have it, and adjust the start
+      // to match.
+      final int headerLength = waInput.getRemainingHeaderLength();
+      input.seek(headerLength);
+      this.start += headerLength;
+    } else {
+      input.seek(--this.start);
+    }
+
+    // Creating the iterator causes reading from the stream, so make sure
+    // to start counting this early.
+    waInput.setLength(this.end - this.start);
+
+    iterator = createSamReader(waInput, stringency).iterator();
+
+    if (!firstSplit) {
+      // Skip the first line, it'll be handled with the previous split.
+      try {
+        if (iterator.hasNext()) {
+          iterator.next();
+        }
+      } catch (SAMFormatException e) {
+      }
+    }
+  }
+
+  private SamReader createSamReader(InputStream in, ValidationStringency stringency) {
+    SamReaderFactory readerFactory =
+        SamReaderFactory.makeDefault()
+            .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
+            .setUseAsyncIo(false);
+    if (stringency != null) {
+      readerFactory.validationStringency(stringency);
+    }
+    return readerFactory.open(SamInputResource.of(in));
+  }
+
+  @Override
+  public void close() throws IOException {
+    iterator.close();
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    final long pos = input.getPos();
+    if (pos >= end) {
+      return 1;
+    } else {
+      return (float) (pos - start) / (end - start);
+    }
+  }
+
+  @Override
+  public LongWritable getCurrentKey() {
+    return key;
+  }
+
+  @Override
+  public SAMRecordWritable getCurrentValue() {
+    return record;
+  }
+
+  @Override
+  public boolean nextKeyValue() {
+    if (!iterator.hasNext()) {
+      return false;
+    }
+
+    final SAMRecord r = iterator.next();
+    key.set(BAMRecordReader.getKey(r));
+    record.set(r);
+    return true;
+  }
 }
 
 // See the long comment in SAMRecordReader.initialize() for what this does.
 class WorkaroundingStream extends InputStream {
-	private final InputStream stream, headerStream;
-	private boolean headerRemaining;
-	private long length;
-	private int headerLength;
-
-	private boolean lookingForEOL = false,
-	                foundEOL      = false,
-	                strippingAts  = false; // HACK, see read(byte[], int, int).
-
-	public WorkaroundingStream(InputStream stream, SAMFileHeader header) {
-		this.stream = stream;
-
-		String text = header.getTextHeader();
-		if (text == null) {
-			StringWriter writer = new StringWriter();
-			new SAMTextHeaderCodec().encode(writer, header);
-			text = writer.toString();
-		}
-		byte[] b;
-		try {
-			b = text.getBytes("UTF-8");
-		} catch (UnsupportedEncodingException e) {
-			b = null;
-			assert false;
-		}
-		headerRemaining = true;
-		headerLength    = b.length;
-		headerStream    = new ByteArrayInputStream(b);
-
-		this.length = Long.MAX_VALUE;
-	}
-
-	public void setLength(long length) {
-		this.length = length;
-	}
-
-	public int getRemainingHeaderLength() {
-		return headerLength;
-	}
-
-	private byte[] readBuf = new byte[1];
-	@Override public int read() throws IOException {
-		for (;;) switch (read(readBuf)) {
-			case  0: continue;
-			case  1: return readBuf[0];
-			case -1: return -1;
-		}
-	}
-
-	@Override public int read(byte[] buf, int off, int len) throws IOException {
-		if (!headerRemaining)
-			return streamRead(buf, off, len);
-
-		int h;
-		if (strippingAts)
-			h = 0;
-		else {
-			h = headerStream.read(buf, off, len);
-			if (h == -1) {
-				// This should only happen when there was no header at all, in
-				// which case Picard doesn't throw an error until trying to read
-				// a record, for some reason. (Perhaps an oversight.) Thus we
-				// need to handle that case here.
-				assert (headerLength == 0);
-				h = 0;
-			} else if (h < headerLength) {
-				headerLength -= h;
-				return h;
-			}
-			strippingAts = true;
-			headerStream.close();
-		}
-
-		final int newOff = off + h;
-		int s = streamRead(buf, newOff, len - h);
-
-		if (s <= 0)
-			return strippingAts ? s : h;
-
-		// HACK HACK HACK.
-		//
-		// We gave all of the header, which means that SAMFileReader is still
-		// trying to read more header lines. If we're in a split that isn't at
-		// the start of the SAM file, we could be in the middle of a line and
-		// thus see @ characters at the start of our data. Then SAMFileReader
-		// would try to understand those as header lines and the end result is
-		// that it throws an error, since they aren't actually header lines,
-		// they're just part of a SAM record.
-		//
-		// So, if we're done with the header, strip all @ characters we see. Thus
-		// SAMFileReader will stop reading the header there and won't throw an
-		// exception until we use its SAMRecordIterator, at which point we can
-		// catch it, because we know to expect it.
-		//
-		// headerRemaining remains true while it's possible that there are still
-		// @ characters coming.
-
-		int i = newOff-1;
-		while (buf[++i] == '@' && --s > 0);
-
-		if (i != newOff)
-			System.arraycopy(buf, i, buf, newOff, s);
-
-		headerRemaining = s == 0;
-		return h + s;
-	}
-	private int streamRead(byte[] buf, int off, int len) throws IOException {
-		if (len > length) {
-			if (foundEOL)
-				return 0;
-			lookingForEOL = true;
-		}
-		int n = stream.read(buf, off, len);
-		if (n > 0) {
-			n = tryFindEOL(buf, off, n);
-			length -= n;
-		}
-		return n;
-	}
-	private int tryFindEOL(byte[] buf, int off, int len) {
-		assert !foundEOL;
-
-		if (!lookingForEOL || len < length)
-			return len;
-
-		// Find the first EOL between length and len.
-
-		// len >= length so length fits in an int.
-		int i = Math.max(0, (int)length - 1);
-
-		for (; i < len; ++i) {
-			if (buf[off + i] == '\n') {
-				foundEOL = true;
-				return i + 1;
-			}
-		}
-		return len;
-	}
-
-	@Override public void close() throws IOException {
-		stream.close();
-	}
-
-	@Override public int available() throws IOException {
-		return headerRemaining ? headerStream.available() : stream.available();
-	}
+
+  private final InputStream stream, headerStream;
+  private boolean headerRemaining;
+  private long length;
+  private int headerLength;
+
+  private boolean lookingForEOL = false,
+      foundEOL = false,
+      strippingAts = false; // HACK, see read(byte[], int, int).
+  private byte[] readBuf = new byte[1];
+
+  public WorkaroundingStream(InputStream stream, SAMFileHeader header) {
+    this.stream = stream;
+
+    String text = header.getTextHeader();
+    if (text == null) {
+      StringWriter writer = new StringWriter();
+      new SAMTextHeaderCodec().encode(writer, header);
+      text = writer.toString();
+    }
+    byte[] b;
+    try {
+      b = text.getBytes("UTF-8");
+    } catch (UnsupportedEncodingException e) {
+      b = null;
+      assert false;
+    }
+    headerRemaining = true;
+    headerLength = b.length;
+    headerStream = new ByteArrayInputStream(b);
+
+    this.length = Long.MAX_VALUE;
+  }
+
+  public void setLength(long length) {
+    this.length = length;
+  }
+
+  public int getRemainingHeaderLength() {
+    return headerLength;
+  }
+
+  @Override
+  public int read() throws IOException {
+    for (; ; ) {
+      switch (read(readBuf)) {
+        case 0:
+          continue;
+        case 1:
+          return readBuf[0];
+        case -1:
+          return -1;
+      }
+    }
+  }
+
+  @Override
+  public int read(byte[] buf, int off, int len) throws IOException {
+    if (!headerRemaining) {
+      return streamRead(buf, off, len);
+    }
+
+    int h;
+    if (strippingAts) {
+      h = 0;
+    } else {
+      h = headerStream.read(buf, off, len);
+      if (h == -1) {
+        // This should only happen when there was no header at all, in
+        // which case Picard doesn't throw an error until trying to read
+        // a record, for some reason. (Perhaps an oversight.) Thus we
+        // need to handle that case here.
+        assert (headerLength == 0);
+        h = 0;
+      } else if (h < headerLength) {
+        headerLength -= h;
+        return h;
+      }
+      strippingAts = true;
+      headerStream.close();
+    }
+
+    final int newOff = off + h;
+    int s = streamRead(buf, newOff, len - h);
+
+    if (s <= 0) {
+      return strippingAts ? s : h;
+    }
+
+    // HACK HACK HACK.
+    //
+    // We gave all of the header, which means that SAMFileReader is still
+    // trying to read more header lines. If we're in a split that isn't at
+    // the start of the SAM file, we could be in the middle of a line and
+    // thus see @ characters at the start of our data. Then SAMFileReader
+    // would try to understand those as header lines and the end result is
+    // that it throws an error, since they aren't actually header lines,
+    // they're just part of a SAM record.
+    //
+    // So, if we're done with the header, strip all @ characters we see. Thus
+    // SAMFileReader will stop reading the header there and won't throw an
+    // exception until we use its SAMRecordIterator, at which point we can
+    // catch it, because we know to expect it.
+    //
+    // headerRemaining remains true while it's possible that there are still
+    // @ characters coming.
+
+    int i = newOff - 1;
+    while (buf[++i] == '@' && --s > 0) {;
+    }
+
+    if (i != newOff) {
+      System.arraycopy(buf, i, buf, newOff, s);
+    }
+
+    headerRemaining = s == 0;
+    return h + s;
+  }
+
+  private int streamRead(byte[] buf, int off, int len) throws IOException {
+    if (len > length) {
+      if (foundEOL) {
+        return 0;
+      }
+      lookingForEOL = true;
+    }
+    int n = stream.read(buf, off, len);
+    if (n > 0) {
+      n = tryFindEOL(buf, off, n);
+      length -= n;
+    }
+    return n;
+  }
+
+  private int tryFindEOL(byte[] buf, int off, int len) {
+    assert !foundEOL;
+
+    if (!lookingForEOL || len < length) {
+      return len;
+    }
+
+    // Find the first EOL between length and len.
+
+    // len >= length so length fits in an int.
+    int i = Math.max(0, (int) length - 1);
+
+    for (; i < len; ++i) {
+      if (buf[off + i] == '\n') {
+        foundEOL = true;
+        return i + 1;
+      }
+    }
+    return len;
+  }
+
+  @Override
+  public void close() throws IOException {
+    stream.close();
+  }
+
+  @Override
+  public int available() throws IOException {
+    return headerRemaining ? headerStream.available() : stream.available();
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWritable.java b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWritable.java
index 04db587..3bc53f6 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWritable.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWritable.java
@@ -22,54 +22,59 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.DataOutput;
+import htsjdk.samtools.BAMRecordCodec;
+import htsjdk.samtools.SAMRecord;
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
-
 import org.apache.hadoop.io.Writable;
-
-import htsjdk.samtools.BAMRecordCodec;
-import htsjdk.samtools.SAMRecord;
-
 import org.seqdoop.hadoop_bam.util.DataInputWrapper;
 import org.seqdoop.hadoop_bam.util.DataOutputWrapper;
 
-/** A {@link Writable} {@link SAMRecord}.
+/**
+ * A {@link Writable} {@link SAMRecord}.
  *
- * <p>In every mapper, the record will have a header, since BAMInputFormat
- * provides one. It is lost when transferring the SAMRecord to a reducer,
- * however. The current implementation of {@link BAMRecordCodec} does not
- * require a record for encoding nor decoding of a <code>SAMRecord</code>, so
- * this fortunately doesn't matter for either {@link #write} or {@link
- * #readFields}.</p>
+ * <p>In every mapper, the record will have a header, since BAMInputFormat provides one. It is lost
+ * when transferring the SAMRecord to a reducer, however. The current implementation of {@link
+ * BAMRecordCodec} does not require a record for encoding nor decoding of a <code>SAMRecord</code>,
+ * so this fortunately doesn't matter for either {@link #write} or {@link #readFields}.
  */
 public class SAMRecordWritable implements Writable {
-	private static final BAMRecordCodec lazyCodec =
-		new BAMRecordCodec(null, new LazyBAMRecordFactory());
 
-	private SAMRecord record;
+  private static final BAMRecordCodec lazyCodec =
+      new BAMRecordCodec(null, new LazyBAMRecordFactory());
+
+  private SAMRecord record;
+
+  public SAMRecord get() {
+    return record;
+  }
+
+  public void set(SAMRecord r) {
+    record = r;
+  }
 
-	public SAMRecord get()            { return record; }
-	public void      set(SAMRecord r) { record = r; }
+  @Override
+  public void write(DataOutput out) throws IOException {
+    // In theory, it shouldn't matter whether we give a header to
+    // BAMRecordCodec or not, since the representation of an alignment in BAM
+    // doesn't depend on the header data at all. Only its interpretation
+    // does, and a simple read/write codec shouldn't really have anything to
+    // say about that. (But in practice, it already does matter for decode(),
+    // which is why LazyBAMRecordFactory exists.)
+    final BAMRecordCodec codec = new BAMRecordCodec(record.getHeader());
+    codec.setOutputStream(new DataOutputWrapper(out));
+    codec.encode(record);
+  }
 
-	@Override public void write(DataOutput out) throws IOException {
-		// In theory, it shouldn't matter whether we give a header to
-		// BAMRecordCodec or not, since the representation of an alignment in BAM
-		// doesn't depend on the header data at all. Only its interpretation
-		// does, and a simple read/write codec shouldn't really have anything to
-		// say about that. (But in practice, it already does matter for decode(),
-		// which is why LazyBAMRecordFactory exists.)
-		final BAMRecordCodec codec = new BAMRecordCodec(record.getHeader());
-		codec.setOutputStream(new DataOutputWrapper(out));
-		codec.encode(record);
-	}
-	@Override public void readFields(DataInput in) throws IOException {
-		lazyCodec.setInputStream(new DataInputWrapper(in));
-		record = lazyCodec.decode();
-	}
+  @Override
+  public void readFields(DataInput in) throws IOException {
+    lazyCodec.setInputStream(new DataInputWrapper(in));
+    record = lazyCodec.decode();
+  }
 
-	@Override
-	public String toString() {
-		return record.getSAMString().trim(); // remove trailing newline
-	}
+  @Override
+  public String toString() {
+    return record.getSAMString().trim(); // remove trailing newline
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWriter.java
index a59c9cb..c702753 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordWriter.java
@@ -22,83 +22,68 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.IOException;
-import java.io.OutputStream;
-
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.SAMTextWriter;
-
+import java.io.IOException;
+import java.io.OutputStream;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
-/** A base {@link RecordWriter} for SAM records.
+/**
+ * A base {@link RecordWriter} for SAM records.
  *
- * <p>Handles the output stream, writing the header if requested, and provides
- * the {@link #writeAlignment} function for subclasses.</p>
+ * <p>Handles the output stream, writing the header if requested, and provides the {@link
+ * #writeAlignment} function for subclasses.
  */
-public abstract class SAMRecordWriter<K>
-	extends RecordWriter<K,SAMRecordWritable>
-{
-	private SAMTextWriter writer;
-	private SAMFileHeader header;
+public abstract class SAMRecordWriter<K> extends RecordWriter<K, SAMRecordWritable> {
+
+  private SAMTextWriter writer;
+  private SAMFileHeader header;
+
+  /** A SAMFileHeader is read from the input Path. */
+  public SAMRecordWriter(Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(
+        output, SAMHeaderReader.readSAMHeaderFrom(input, ctx.getConfiguration()), writeHeader, ctx);
+  }
+
+  public SAMRecordWriter(
+      Path output, SAMFileHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader);
+  }
+
+  public SAMRecordWriter(OutputStream output, SAMFileHeader header, boolean writeHeader)
+      throws IOException {
+    init(output, header, writeHeader);
+  }
 
-	/** A SAMFileHeader is read from the input Path. */
-	public SAMRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output,
-			SAMHeaderReader.readSAMHeaderFrom(input, ctx.getConfiguration()),
-			writeHeader, ctx);
-	}
-	public SAMRecordWriter(
-			Path output, SAMFileHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader);
-	}
-	public SAMRecordWriter(
-			OutputStream output, SAMFileHeader header, boolean writeHeader)
-		throws IOException
-	{
-		init(output, header, writeHeader);
-	}
+  private void init(Path output, SAMFileHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader);
+  }
 
-	private void init(
-			Path output, SAMFileHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader);
-	}
-	private void init(
-			OutputStream output, SAMFileHeader header, boolean writeHeader)
-		throws IOException
-	{
-		this.header = header;
-		writer = new SAMTextWriter(output);
+  private void init(OutputStream output, SAMFileHeader header, boolean writeHeader)
+      throws IOException {
+    this.header = header;
+    writer = new SAMTextWriter(output);
 
-		writer.setSortOrder(header.getSortOrder(), false);
-		if (writeHeader)
-			writer.setHeader(header);
-	}
+    writer.setSortOrder(header.getSortOrder(), false);
+    if (writeHeader) {
+      writer.setHeader(header);
+    }
+  }
 
-	@Override public void close(TaskAttemptContext ctx) {
-		writer.close();
-	}
+  @Override
+  public void close(TaskAttemptContext ctx) {
+    writer.close();
+  }
 
-	protected void writeAlignment(final SAMRecord rec) {
-		rec.setHeader(header);
-		writer.writeAlignment(rec);
-	}
+  protected void writeAlignment(final SAMRecord rec) {
+    rec.setHeader(header);
+    writer.writeAlignment(rec);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SequencedFragment.java b/src/main/java/org/seqdoop/hadoop_bam/SequencedFragment.java
index dc348dc..3da0f80 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SequencedFragment.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SequencedFragment.java
@@ -22,353 +22,489 @@
 
 package org.seqdoop.hadoop_bam;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.io.Writable;
 import org.apache.hadoop.io.WritableUtils;
-
-import java.io.IOException;
-import java.io.DataInput;
-import java.io.DataOutput;
-
 import org.seqdoop.hadoop_bam.FormatConstants.BaseQualityEncoding;
 
-public class SequencedFragment implements Writable
-{
-	protected Text sequence = new Text();
-	protected Text quality = new Text();
-
-	protected String instrument;
-	protected Integer runNumber;
-	protected String flowcellId;
-	protected Integer lane;
-	protected Integer tile;
-	protected Integer xpos;
-	protected Integer ypos;
-	protected Integer read;
-	protected Boolean filterPassed;
-	protected Integer controlNumber;
-	protected String indexSequence;
-
-	// for serialization of nullable fiels
-	protected static final int Instrument_Present     = 0x0001;
-	protected static final int RunNumber_Present      = 0x0002;
-	protected static final int FlowcellId_Present     = 0x0004;
-	protected static final int Lane_Present           = 0x0008;
-	protected static final int Tile_Present           = 0x0010;
-	protected static final int Xpos_Present           = 0x0020;
-	protected static final int Ypos_Present           = 0x0040;
-	protected static final int Read_Present           = 0x0080;
-	protected static final int FilterPassed_Present   = 0x0100;
-	protected static final int ControlNumber_Present  = 0x0200;
-	protected static final int IndexSequence_Present  = 0x0400;
-
-	public void clear()
-	{
-		sequence.clear();
-		quality.clear();
-
-		instrument = null;
-		runNumber = null;
-		flowcellId = null;
-		lane = null;
-		tile = null;
-		xpos = null;
-		ypos = null;
-		read = null;
-		filterPassed = null;
-		controlNumber = null;
-		indexSequence = null;
-	}
-
-	/**
-	 * Get sequence Text object.
-	 * Trade encapsulation for efficiency.  Here we expose the internal Text
-	 * object so that data may be read and written diretly from/to it.
-	 *
-	 * Sequence should always be written using CAPITAL letters and 'N' for unknown bases.
-	 */
-	public Text getSequence() { return sequence; }
-
-	/**
-	 * Get quality Text object.
-	 * Trade encapsulation for efficiency.  Here we expose the internal Text
-	 * object so that data may be read and written diretly from/to it.
-	 *
-	 * Quality should always be in ASCII-encoded Phred+33 format (sanger).
-	 */
-	public Text getQuality() { return quality; }
-
-	public void setInstrument(String v) { instrument = v; }
-	public void setRunNumber(Integer v) { runNumber = v; }
-	public void setFlowcellId(String v) { flowcellId = v; }
-	public void setLane(Integer v) { lane = v; }
-	public void setTile(Integer v) { tile = v; }
-	public void setXpos(Integer v) { xpos = v; }
-	public void setYpos(Integer v) { ypos = v; }
-	public void setRead(Integer v) { read = v; }
-	public void setFilterPassed(Boolean v) { filterPassed = v; }
-	public void setControlNumber(Integer v) { controlNumber = v; }
-	public void setIndexSequence(String v) { indexSequence = v; }
-
-	public void setSequence(Text seq)
-	{
-		if (seq == null)
-			throw new IllegalArgumentException("can't have a null sequence");
-	 	sequence = seq;
-	}
-
-	/**
-	 * Set quality.  Quality should be encoded in Sanger Phred+33 format.
-	 */
-	public void setQuality(Text qual)
-	{
-		if (qual == null)
-			throw new IllegalArgumentException("can't have a null quality");
-	 	quality = qual;
-	}
-
-	public String getInstrument() { return instrument; }
-	public Integer getRunNumber() { return runNumber; }
-	public String getFlowcellId() { return flowcellId; }
-	public Integer getLane() { return lane; }
-	public Integer getTile() { return tile; }
-	public Integer getXpos() { return xpos; }
-	public Integer getYpos() { return ypos; }
-	public Integer getRead() { return read; }
-	public Boolean getFilterPassed() { return filterPassed; }
-	public Integer getControlNumber() { return controlNumber; }
-	public String getIndexSequence() { return indexSequence; }
-
-	/**
-	 * Recreates a pseudo qseq record with the fields available.
-	 */
-	public String toString()
-	{
-		String delim = "\t";
-		StringBuilder builder = new StringBuilder(800);
-		builder.append(instrument).append(delim);
-		builder.append(runNumber).append(delim);
-		builder.append(flowcellId).append(delim);
-		builder.append(lane).append(delim);
-		builder.append(tile).append(delim);
-		builder.append(xpos).append(delim);
-		builder.append(ypos).append(delim);
-		builder.append(indexSequence).append(delim);
-		builder.append(read).append(delim);
-		builder.append(sequence).append(delim);
-		builder.append(quality).append(delim);
-		builder.append((filterPassed == null || filterPassed) ? 1 : 0);
-		return builder.toString();
-	}
-
-	public boolean equals(Object other)
-	{
-		if (other != null && other instanceof SequencedFragment)
-		{
-			SequencedFragment otherFrag = (SequencedFragment)other;
-
-			if (instrument == null && otherFrag.instrument != null || instrument != null && !instrument.equals(otherFrag.instrument))
-				return false;
-			if (runNumber == null && otherFrag.runNumber != null || runNumber != null && !runNumber.equals(otherFrag.runNumber))
-				return false;
-			if (flowcellId == null && otherFrag.flowcellId != null || flowcellId != null && !flowcellId.equals(otherFrag.flowcellId))
-				return false;
-			if (lane == null && otherFrag.lane != null || lane != null && !lane.equals(otherFrag.lane))
-				return false;
-			if (tile == null && otherFrag.tile != null || tile != null && !tile.equals(otherFrag.tile))
-				return false;
-			if (xpos == null && otherFrag.xpos != null || xpos != null && !xpos.equals(otherFrag.xpos))
-				return false;
-			if (ypos == null && otherFrag.ypos != null || ypos != null && !ypos.equals(otherFrag.ypos))
-				return false;
-			if (read == null && otherFrag.read != null || read != null && !read.equals(otherFrag.read))
-				return false;
-			if (filterPassed == null && otherFrag.filterPassed != null || filterPassed != null && !filterPassed.equals(otherFrag.filterPassed))
-				return false;
-			if (controlNumber == null && otherFrag.controlNumber != null || controlNumber != null && !controlNumber.equals(otherFrag.controlNumber))
-				return false;
-			if (indexSequence == null && otherFrag.indexSequence != null || indexSequence != null && !indexSequence.equals(otherFrag.indexSequence))
-				return false;
-			// sequence and quality can't be null
-			if (!sequence.equals(otherFrag.sequence))
-				return false;
-			if (!quality.equals(otherFrag.quality))
-				return false;
-
-			return true;
-		}
-		else
-			return false;
-	}
-
-	@Override
-	public int hashCode() {
-		int result = sequence.hashCode();
-		result = 31 * result + quality.hashCode();
-		result = 31 * result + (instrument != null ? instrument.hashCode() : 0);
-		result = 31 * result + (runNumber != null ? runNumber.hashCode() : 0);
-		result = 31 * result + (flowcellId != null ? flowcellId.hashCode() : 0);
-		result = 31 * result + (lane != null ? lane.hashCode() : 0);
-		result = 31 * result + (tile != null ? tile.hashCode() : 0);
-		result = 31 * result + (xpos != null ? xpos.hashCode() : 0);
-		result = 31 * result + (ypos != null ? ypos.hashCode() : 0);
-		result = 31 * result + (read != null ? read.hashCode() : 0);
-		result = 31 * result + (filterPassed != null ? filterPassed.hashCode() : 0);
-		result = 31 * result + (controlNumber != null ? controlNumber.hashCode() : 0);
-		result = 31 * result + (indexSequence != null ? indexSequence.hashCode() : 0);
-		return result;
-	}
-
-	/**
-	 * Convert quality scores in-place.
-	 *
-	 * @throws FormatException if quality scores are out of the range
-	 * allowed by the current encoding.
-	 * @throws IllegalArgumentException if current and  target quality encodings are the same.
-	 */
-	public static void convertQuality(Text quality, BaseQualityEncoding current, BaseQualityEncoding target)
-	{
-		if (current == target)
-			throw new IllegalArgumentException("current and target quality encodinds are the same (" + current + ")");
-
-		byte[] bytes = quality.getBytes();
-		final int len = quality.getLength();
-		final int illuminaSangerDistance = FormatConstants.ILLUMINA_OFFSET - FormatConstants.SANGER_OFFSET;
-
-		if (current == BaseQualityEncoding.Illumina && target == BaseQualityEncoding.Sanger)
-		{
-			for (int i = 0; i < len; ++i)
-			{
-				if (bytes[i] < FormatConstants.ILLUMINA_OFFSET || bytes[i] > (FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX))
-				{
-					throw new FormatException(
-							"base quality score out of range for Illumina Phred+64 format (found " + (bytes[i] - FormatConstants.ILLUMINA_OFFSET) +
-							" but acceptable range is [0," + FormatConstants.ILLUMINA_MAX + "]).\n" +
-							"Maybe qualities are encoded in Sanger format?\n");
-				}
-				bytes[i] -= illuminaSangerDistance;
-			}
-		}
-		else if (current == BaseQualityEncoding.Sanger && target == BaseQualityEncoding.Illumina)
-		{
-			for (int i = 0; i < len; ++i)
-			{
-				if (bytes[i] < FormatConstants.SANGER_OFFSET || bytes[i] > (FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX))
-				{
-					throw new FormatException(
-							"base quality score out of range for Sanger Phred+64 format (found " + (bytes[i] - FormatConstants.SANGER_OFFSET) +
-							" but acceptable range is [0," + FormatConstants.SANGER_MAX + "]).\n" +
-							"Maybe qualities are encoded in Illumina format?\n");
-				}
-				bytes[i] += illuminaSangerDistance;
-			}
-		}
-		else
-			throw new IllegalArgumentException("unsupported BaseQualityEncoding transformation from " + current + " to " + target);
-	}
-
-	/**
-	 * Verify that the given quality bytes are within the range allowed for the specified encoding.
-	 *
-	 * In theory, the Sanger encoding uses the entire
-	 * range of characters from ASCII 33 to 126, giving a value range of [0,93].  However, values over 60 are
-	 * unlikely in practice, and are more likely to be caused by mistaking a file that uses Illumina encoding
-	 * for Sanger.  So, we'll enforce the same range supported by Illumina encoding ([0,62]) for Sanger.
-	 *
-	 * @return -1 if quality is ok.
-	 * @return If an out-of-range value is found the index of the value is returned.
-	 */
-	public static int verifyQuality(Text quality, BaseQualityEncoding encoding)
-	{
-		// set allowed quality range
-		int max, min;
-
-		if (encoding == BaseQualityEncoding.Illumina)
-		{
-			max = FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX;
-			min = FormatConstants.ILLUMINA_OFFSET;
-		}
-		else if (encoding == BaseQualityEncoding.Sanger)
-		{
-			max = FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX;
-			min = FormatConstants.SANGER_OFFSET;
-		}
-		else
-			throw new IllegalArgumentException("Unsupported base encoding quality " + encoding);
-
-		// verify
-		final byte[] bytes = quality.getBytes();
-		final int len = quality.getLength();
-
-		for (int i = 0; i < len; ++i)
-		{
-			if (bytes[i] < min || bytes[i] > max)
-				return i;
-		}
-		return -1;
-	}
-
-	public void readFields(DataInput in) throws IOException
-	{
-		// TODO:  reimplement with a serialization system (e.g. Avro)
-
-		// serialization order:
-		// 1) sequence
-		// 2) quality
-		// 3) int with flags indicating which fields are defined (see *_Present flags)
-		// 4..end) the rest of the fields
-
-		this.clear();
-
-		sequence.readFields(in);
-		quality.readFields(in);
-
-		int presentFlags = WritableUtils.readVInt(in);
-		if ( (presentFlags & Instrument_Present) != 0) instrument = WritableUtils.readString(in);
-		if ( (presentFlags & RunNumber_Present) != 0) runNumber = WritableUtils.readVInt(in);
-		if ( (presentFlags & FlowcellId_Present) != 0) flowcellId = WritableUtils.readString(in);
-		if ( (presentFlags & Lane_Present) != 0) lane = WritableUtils.readVInt(in);
-		if ( (presentFlags & Tile_Present) != 0) tile = WritableUtils.readVInt(in);
-		if ( (presentFlags & Xpos_Present) != 0) xpos = WritableUtils.readVInt(in);
-		if ( (presentFlags & Ypos_Present) != 0) ypos = WritableUtils.readVInt(in);
-		if ( (presentFlags & Read_Present) != 0) read = WritableUtils.readVInt(in);
-		if ( (presentFlags & FilterPassed_Present) != 0) filterPassed = WritableUtils.readVInt(in) == 1;
-		if ( (presentFlags & ControlNumber_Present) != 0) controlNumber = WritableUtils.readVInt(in);
-		if ( (presentFlags & IndexSequence_Present) != 0) indexSequence = WritableUtils.readString(in);
-	}
-
-	public void write(DataOutput out) throws IOException
-	{
-		// TODO:  reimplement with a serialization system (e.g. Avro)
-
-		sequence.write(out);
-		quality.write(out);
-
-		int presentFlags = 0;
-		if (instrument != null) presentFlags |= Instrument_Present;
-		if (runNumber != null) presentFlags |= RunNumber_Present;
-		if (flowcellId != null) presentFlags |= FlowcellId_Present;
-		if (lane != null) presentFlags |= Lane_Present;
-		if (tile != null) presentFlags |= Tile_Present;
-		if (xpos != null) presentFlags |= Xpos_Present;
-		if (ypos != null) presentFlags |= Ypos_Present;
-		if (read != null) presentFlags |= Read_Present;
-		if (filterPassed != null) presentFlags |= FilterPassed_Present;
-		if (controlNumber != null) presentFlags |= ControlNumber_Present;
-		if (indexSequence != null) presentFlags |= IndexSequence_Present;
-
-		WritableUtils.writeVInt(out, presentFlags);
-
-		if (instrument != null) WritableUtils.writeString(out, instrument);
-		if (runNumber != null) WritableUtils.writeVInt(out, runNumber);
-		if (flowcellId != null) WritableUtils.writeString(out, flowcellId);
-		if (lane != null) WritableUtils.writeVInt(out, lane);
-		if (tile != null) WritableUtils.writeVInt(out, tile);
-		if (xpos != null) WritableUtils.writeVInt(out, xpos);
-		if (ypos != null) WritableUtils.writeVInt(out, ypos);
-		if (read != null) WritableUtils.writeVInt(out, read);
-		if (filterPassed != null) WritableUtils.writeVInt(out, filterPassed ? 1 : 0);
-		if (controlNumber != null) WritableUtils.writeVInt(out, controlNumber);
-		if (indexSequence != null) WritableUtils.writeString(out, indexSequence);
-	}
+public class SequencedFragment implements Writable {
+
+  // for serialization of nullable fiels
+  protected static final int Instrument_Present = 0x0001;
+  protected static final int RunNumber_Present = 0x0002;
+  protected static final int FlowcellId_Present = 0x0004;
+  protected static final int Lane_Present = 0x0008;
+  protected static final int Tile_Present = 0x0010;
+  protected static final int Xpos_Present = 0x0020;
+  protected static final int Ypos_Present = 0x0040;
+  protected static final int Read_Present = 0x0080;
+  protected static final int FilterPassed_Present = 0x0100;
+  protected static final int ControlNumber_Present = 0x0200;
+  protected static final int IndexSequence_Present = 0x0400;
+  protected Text sequence = new Text();
+  protected Text quality = new Text();
+  protected String instrument;
+  protected Integer runNumber;
+  protected String flowcellId;
+  protected Integer lane;
+  protected Integer tile;
+  protected Integer xpos;
+  protected Integer ypos;
+  protected Integer read;
+  protected Boolean filterPassed;
+  protected Integer controlNumber;
+  protected String indexSequence;
+
+  /**
+   * Convert quality scores in-place.
+   *
+   * @throws FormatException if quality scores are out of the range allowed by the current encoding.
+   * @throws IllegalArgumentException if current and target quality encodings are the same.
+   */
+  public static void convertQuality(
+      Text quality, BaseQualityEncoding current, BaseQualityEncoding target) {
+    if (current == target) {
+      throw new IllegalArgumentException(
+          "current and target quality encodinds are the same (" + current + ")");
+    }
+
+    byte[] bytes = quality.getBytes();
+    final int len = quality.getLength();
+    final int illuminaSangerDistance =
+        FormatConstants.ILLUMINA_OFFSET - FormatConstants.SANGER_OFFSET;
+
+    if (current == BaseQualityEncoding.Illumina && target == BaseQualityEncoding.Sanger) {
+      for (int i = 0; i < len; ++i) {
+        if (bytes[i] < FormatConstants.ILLUMINA_OFFSET
+            || bytes[i] > (FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX)) {
+          throw new FormatException(
+              "base quality score out of range for Illumina Phred+64 format (found "
+                  + (bytes[i] - FormatConstants.ILLUMINA_OFFSET)
+                  + " but acceptable range is [0,"
+                  + FormatConstants.ILLUMINA_MAX
+                  + "]).\n"
+                  + "Maybe qualities are encoded in Sanger format?\n");
+        }
+        bytes[i] -= illuminaSangerDistance;
+      }
+    } else if (current == BaseQualityEncoding.Sanger && target == BaseQualityEncoding.Illumina) {
+      for (int i = 0; i < len; ++i) {
+        if (bytes[i] < FormatConstants.SANGER_OFFSET
+            || bytes[i] > (FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX)) {
+          throw new FormatException(
+              "base quality score out of range for Sanger Phred+64 format (found "
+                  + (bytes[i] - FormatConstants.SANGER_OFFSET)
+                  + " but acceptable range is [0,"
+                  + FormatConstants.SANGER_MAX
+                  + "]).\n"
+                  + "Maybe qualities are encoded in Illumina format?\n");
+        }
+        bytes[i] += illuminaSangerDistance;
+      }
+    } else {
+      throw new IllegalArgumentException(
+          "unsupported BaseQualityEncoding transformation from " + current + " to " + target);
+    }
+  }
+
+  /**
+   * Verify that the given quality bytes are within the range allowed for the specified encoding.
+   *
+   * <p>In theory, the Sanger encoding uses the entire range of characters from ASCII 33 to 126,
+   * giving a value range of [0,93]. However, values over 60 are unlikely in practice, and are more
+   * likely to be caused by mistaking a file that uses Illumina encoding for Sanger. So, we'll
+   * enforce the same range supported by Illumina encoding ([0,62]) for Sanger.
+   *
+   * @return If an out-of-range value is found the index of the value is returned.
+   */
+  public static int verifyQuality(Text quality, BaseQualityEncoding encoding) {
+    // set allowed quality range
+    int max, min;
+
+    if (encoding == BaseQualityEncoding.Illumina) {
+      max = FormatConstants.ILLUMINA_OFFSET + FormatConstants.ILLUMINA_MAX;
+      min = FormatConstants.ILLUMINA_OFFSET;
+    } else if (encoding == BaseQualityEncoding.Sanger) {
+      max = FormatConstants.SANGER_OFFSET + FormatConstants.SANGER_MAX;
+      min = FormatConstants.SANGER_OFFSET;
+    } else {
+      throw new IllegalArgumentException("Unsupported base encoding quality " + encoding);
+    }
+
+    // verify
+    final byte[] bytes = quality.getBytes();
+    final int len = quality.getLength();
+
+    for (int i = 0; i < len; ++i) {
+      if (bytes[i] < min || bytes[i] > max) {
+        return i;
+      }
+    }
+    return -1;
+  }
+
+  public void clear() {
+    sequence.clear();
+    quality.clear();
+
+    instrument = null;
+    runNumber = null;
+    flowcellId = null;
+    lane = null;
+    tile = null;
+    xpos = null;
+    ypos = null;
+    read = null;
+    filterPassed = null;
+    controlNumber = null;
+    indexSequence = null;
+  }
+
+  /**
+   * Get sequence Text object. Trade encapsulation for efficiency. Here we expose the internal Text
+   * object so that data may be read and written diretly from/to it.
+   *
+   * <p>Sequence should always be written using CAPITAL letters and 'N' for unknown bases.
+   */
+  public Text getSequence() {
+    return sequence;
+  }
+
+  public void setSequence(Text seq) {
+    if (seq == null) {
+      throw new IllegalArgumentException("can't have a null sequence");
+    }
+    sequence = seq;
+  }
+
+  /**
+   * Get quality Text object. Trade encapsulation for efficiency. Here we expose the internal Text
+   * object so that data may be read and written diretly from/to it.
+   *
+   * <p>Quality should always be in ASCII-encoded Phred+33 format (sanger).
+   */
+  public Text getQuality() {
+    return quality;
+  }
+
+  /** Set quality. Quality should be encoded in Sanger Phred+33 format. */
+  public void setQuality(Text qual) {
+    if (qual == null) {
+      throw new IllegalArgumentException("can't have a null quality");
+    }
+    quality = qual;
+  }
+
+  public String getInstrument() {
+    return instrument;
+  }
+
+  public void setInstrument(String v) {
+    instrument = v;
+  }
+
+  public Integer getRunNumber() {
+    return runNumber;
+  }
+
+  public void setRunNumber(Integer v) {
+    runNumber = v;
+  }
+
+  public String getFlowcellId() {
+    return flowcellId;
+  }
+
+  public void setFlowcellId(String v) {
+    flowcellId = v;
+  }
+
+  public Integer getLane() {
+    return lane;
+  }
+
+  public void setLane(Integer v) {
+    lane = v;
+  }
+
+  public Integer getTile() {
+    return tile;
+  }
+
+  public void setTile(Integer v) {
+    tile = v;
+  }
+
+  public Integer getXpos() {
+    return xpos;
+  }
+
+  public void setXpos(Integer v) {
+    xpos = v;
+  }
+
+  public Integer getYpos() {
+    return ypos;
+  }
+
+  public void setYpos(Integer v) {
+    ypos = v;
+  }
+
+  public Integer getRead() {
+    return read;
+  }
+
+  public void setRead(Integer v) {
+    read = v;
+  }
+
+  public Boolean getFilterPassed() {
+    return filterPassed;
+  }
+
+  public void setFilterPassed(Boolean v) {
+    filterPassed = v;
+  }
+
+  public Integer getControlNumber() {
+    return controlNumber;
+  }
+
+  public void setControlNumber(Integer v) {
+    controlNumber = v;
+  }
+
+  public String getIndexSequence() {
+    return indexSequence;
+  }
+
+  public void setIndexSequence(String v) {
+    indexSequence = v;
+  }
+
+  /** Recreates a pseudo qseq record with the fields available. */
+  public String toString() {
+    String delim = "\t";
+    StringBuilder builder = new StringBuilder(800);
+    builder.append(instrument).append(delim);
+    builder.append(runNumber).append(delim);
+    builder.append(flowcellId).append(delim);
+    builder.append(lane).append(delim);
+    builder.append(tile).append(delim);
+    builder.append(xpos).append(delim);
+    builder.append(ypos).append(delim);
+    builder.append(indexSequence).append(delim);
+    builder.append(read).append(delim);
+    builder.append(sequence).append(delim);
+    builder.append(quality).append(delim);
+    builder.append((filterPassed == null || filterPassed) ? 1 : 0);
+    return builder.toString();
+  }
+
+  public boolean equals(Object other) {
+    if (other != null && other instanceof SequencedFragment) {
+      SequencedFragment otherFrag = (SequencedFragment) other;
+
+      if (instrument == null && otherFrag.instrument != null
+          || instrument != null && !instrument.equals(otherFrag.instrument)) {
+        return false;
+      }
+      if (runNumber == null && otherFrag.runNumber != null
+          || runNumber != null && !runNumber.equals(otherFrag.runNumber)) {
+        return false;
+      }
+      if (flowcellId == null && otherFrag.flowcellId != null
+          || flowcellId != null && !flowcellId.equals(otherFrag.flowcellId)) {
+        return false;
+      }
+      if (lane == null && otherFrag.lane != null || lane != null && !lane.equals(otherFrag.lane)) {
+        return false;
+      }
+      if (tile == null && otherFrag.tile != null || tile != null && !tile.equals(otherFrag.tile)) {
+        return false;
+      }
+      if (xpos == null && otherFrag.xpos != null || xpos != null && !xpos.equals(otherFrag.xpos)) {
+        return false;
+      }
+      if (ypos == null && otherFrag.ypos != null || ypos != null && !ypos.equals(otherFrag.ypos)) {
+        return false;
+      }
+      if (read == null && otherFrag.read != null || read != null && !read.equals(otherFrag.read)) {
+        return false;
+      }
+      if (filterPassed == null && otherFrag.filterPassed != null
+          || filterPassed != null && !filterPassed.equals(otherFrag.filterPassed)) {
+        return false;
+      }
+      if (controlNumber == null && otherFrag.controlNumber != null
+          || controlNumber != null && !controlNumber.equals(otherFrag.controlNumber)) {
+        return false;
+      }
+      if (indexSequence == null && otherFrag.indexSequence != null
+          || indexSequence != null && !indexSequence.equals(otherFrag.indexSequence)) {
+        return false;
+      }
+      // sequence and quality can't be null
+      if (!sequence.equals(otherFrag.sequence)) {
+        return false;
+      }
+      if (!quality.equals(otherFrag.quality)) {
+        return false;
+      }
+
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  @Override
+  public int hashCode() {
+    int result = sequence.hashCode();
+    result = 31 * result + quality.hashCode();
+    result = 31 * result + (instrument != null ? instrument.hashCode() : 0);
+    result = 31 * result + (runNumber != null ? runNumber.hashCode() : 0);
+    result = 31 * result + (flowcellId != null ? flowcellId.hashCode() : 0);
+    result = 31 * result + (lane != null ? lane.hashCode() : 0);
+    result = 31 * result + (tile != null ? tile.hashCode() : 0);
+    result = 31 * result + (xpos != null ? xpos.hashCode() : 0);
+    result = 31 * result + (ypos != null ? ypos.hashCode() : 0);
+    result = 31 * result + (read != null ? read.hashCode() : 0);
+    result = 31 * result + (filterPassed != null ? filterPassed.hashCode() : 0);
+    result = 31 * result + (controlNumber != null ? controlNumber.hashCode() : 0);
+    result = 31 * result + (indexSequence != null ? indexSequence.hashCode() : 0);
+    return result;
+  }
+
+  public void readFields(DataInput in) throws IOException {
+    // TODO:  reimplement with a serialization system (e.g. Avro)
+
+    // serialization order:
+    // 1) sequence
+    // 2) quality
+    // 3) int with flags indicating which fields are defined (see *_Present flags)
+    // 4..end) the rest of the fields
+
+    this.clear();
+
+    sequence.readFields(in);
+    quality.readFields(in);
+
+    int presentFlags = WritableUtils.readVInt(in);
+    if ((presentFlags & Instrument_Present) != 0) {
+      instrument = WritableUtils.readString(in);
+    }
+    if ((presentFlags & RunNumber_Present) != 0) {
+      runNumber = WritableUtils.readVInt(in);
+    }
+    if ((presentFlags & FlowcellId_Present) != 0) {
+      flowcellId = WritableUtils.readString(in);
+    }
+    if ((presentFlags & Lane_Present) != 0) {
+      lane = WritableUtils.readVInt(in);
+    }
+    if ((presentFlags & Tile_Present) != 0) {
+      tile = WritableUtils.readVInt(in);
+    }
+    if ((presentFlags & Xpos_Present) != 0) {
+      xpos = WritableUtils.readVInt(in);
+    }
+    if ((presentFlags & Ypos_Present) != 0) {
+      ypos = WritableUtils.readVInt(in);
+    }
+    if ((presentFlags & Read_Present) != 0) {
+      read = WritableUtils.readVInt(in);
+    }
+    if ((presentFlags & FilterPassed_Present) != 0) {
+      filterPassed = WritableUtils.readVInt(in) == 1;
+    }
+    if ((presentFlags & ControlNumber_Present) != 0) {
+      controlNumber = WritableUtils.readVInt(in);
+    }
+    if ((presentFlags & IndexSequence_Present) != 0) {
+      indexSequence = WritableUtils.readString(in);
+    }
+  }
+
+  public void write(DataOutput out) throws IOException {
+    // TODO:  reimplement with a serialization system (e.g. Avro)
+
+    sequence.write(out);
+    quality.write(out);
+
+    int presentFlags = 0;
+    if (instrument != null) {
+      presentFlags |= Instrument_Present;
+    }
+    if (runNumber != null) {
+      presentFlags |= RunNumber_Present;
+    }
+    if (flowcellId != null) {
+      presentFlags |= FlowcellId_Present;
+    }
+    if (lane != null) {
+      presentFlags |= Lane_Present;
+    }
+    if (tile != null) {
+      presentFlags |= Tile_Present;
+    }
+    if (xpos != null) {
+      presentFlags |= Xpos_Present;
+    }
+    if (ypos != null) {
+      presentFlags |= Ypos_Present;
+    }
+    if (read != null) {
+      presentFlags |= Read_Present;
+    }
+    if (filterPassed != null) {
+      presentFlags |= FilterPassed_Present;
+    }
+    if (controlNumber != null) {
+      presentFlags |= ControlNumber_Present;
+    }
+    if (indexSequence != null) {
+      presentFlags |= IndexSequence_Present;
+    }
+
+    WritableUtils.writeVInt(out, presentFlags);
+
+    if (instrument != null) {
+      WritableUtils.writeString(out, instrument);
+    }
+    if (runNumber != null) {
+      WritableUtils.writeVInt(out, runNumber);
+    }
+    if (flowcellId != null) {
+      WritableUtils.writeString(out, flowcellId);
+    }
+    if (lane != null) {
+      WritableUtils.writeVInt(out, lane);
+    }
+    if (tile != null) {
+      WritableUtils.writeVInt(out, tile);
+    }
+    if (xpos != null) {
+      WritableUtils.writeVInt(out, xpos);
+    }
+    if (ypos != null) {
+      WritableUtils.writeVInt(out, ypos);
+    }
+    if (read != null) {
+      WritableUtils.writeVInt(out, read);
+    }
+    if (filterPassed != null) {
+      WritableUtils.writeVInt(out, filterPassed ? 1 : 0);
+    }
+    if (controlNumber != null) {
+      WritableUtils.writeVInt(out, controlNumber);
+    }
+    if (indexSequence != null) {
+      WritableUtils.writeString(out, indexSequence);
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndex.java b/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndex.java
index af72270..ffa91af 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndex.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndex.java
@@ -25,131 +25,145 @@
 import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.NavigableSet;
 import java.util.TreeSet;
 
-/** An index into BAM files, for {@link BAMInputFormat}. Reads files that are
- * created by {@link SplittingBAMIndexer}.
+/**
+ * An index into BAM files, for {@link BAMInputFormat}. Reads files that are created by {@link
+ * SplittingBAMIndexer}.
  *
- * <p>Indexes the positions of individual BAM records in the file.</p>
+ * <p>Indexes the positions of individual BAM records in the file.
  */
 public final class SplittingBAMIndex {
-	private final NavigableSet<Long> virtualOffsets = new TreeSet<Long>();
-
-	public SplittingBAMIndex() {}
-	public SplittingBAMIndex(final File path) throws IOException {
-		this(new BufferedInputStream(new FileInputStream(path)));
-	}
-	public SplittingBAMIndex(final InputStream in) throws IOException {
-		readIndex(in);
-	}
-
-	public void readIndex(final InputStream in) throws IOException {
-		virtualOffsets.clear();
-
-		final ByteBuffer bb = ByteBuffer.allocate(8);
-
-		for (long prev = -1; in.read(bb.array()) == 8;) {
-			final long cur = bb.getLong(0);
-			if (prev > cur)
-				throw new IOException(String.format(
-					"Invalid splitting BAM index; offsets not in order: %#x > %#x",
-					prev, cur));
-
-			virtualOffsets.add(prev = cur);
-		}
-		in.close();
-
-		if (virtualOffsets.size() < 1)
-			throw new IOException(
-				"Invalid splitting BAM index: "+
-				"should contain at least the file size");
-	}
-
-	public List<Long> getVirtualOffsets() {
-		return new ArrayList<>(virtualOffsets);
-	}
-
-	public Long prevAlignment(final long filePos) {
-		return virtualOffsets.floor(filePos << 16);
-	}
-	public Long nextAlignment(final long filePos) {
-		return virtualOffsets.higher(filePos << 16);
-	}
-
-	public int size() { return virtualOffsets.size(); }
-
-	private long   first() { return virtualOffsets.first(); }
-	private long    last() { return prevAlignment(bamSize() - 1); }
-	long bamSize() { return virtualOffsets.last() >>> 16; }
-
-	@Override
-	public boolean equals(Object o) {
-		if (this == o) return true;
-		if (o == null || getClass() != o.getClass()) return false;
-
-		SplittingBAMIndex that = (SplittingBAMIndex) o;
-
-		return virtualOffsets != null ? virtualOffsets.equals(that.virtualOffsets) : that
-				.virtualOffsets == null;
-
-	}
-
-	@Override
-	public int hashCode() {
-		return virtualOffsets != null ? virtualOffsets.hashCode() : 0;
-	}
-
-	@Override
-	public String toString() {
-		return virtualOffsets.toString();
-	}
-
-	/** Writes some statistics about each splitting BAM index file given as an
-	 * argument.
-	 */
-	public static void main(String[] args) {
-		if (args.length == 0) {
-			System.out.println(
-				"Usage: SplittingBAMIndex [splitting BAM indices...]\n\n"+
-
-				"Writes a few statistics about each splitting BAM index.");
-			return;
-		}
-
-		for (String arg : args) {
-			final File f = new File(arg);
-			if (f.isFile() && f.canRead()) {
-				try {
-					System.err.printf("%s:\n", f);
-					final SplittingBAMIndex bi = new SplittingBAMIndex(f);
-					if (bi.size() == 1) {
-						System.err.printf("\t0 alignments\n" +
-								"\tassociated BAM file size %d\n", bi.bamSize());
-					} else {
-						final long first = bi.first();
-						final long last = bi.last();
-						System.err.printf(
-								"\t%d alignments\n" +
-										"\tfirst is at %#06x in BGZF block at %#014x\n" +
-										"\tlast  is at %#06x in BGZF block at %#014x\n" +
-										"\tassociated BAM file size %d\n",
-								bi.size(),
-								first & 0xffff, first >>> 16,
-								last & 0xffff, last >>> 16,
-								bi.bamSize());
-					}
-				} catch (IOException e) {
-					System.err.printf("Failed to read %s!\n", f);
-					e.printStackTrace();
-				}
-			} else
-				System.err.printf("%s does not look like a readable file!\n", f);
-		}
-	}
+
+  private final NavigableSet<Long> virtualOffsets = new TreeSet<Long>();
+
+  public SplittingBAMIndex() {}
+
+  public SplittingBAMIndex(final File path) throws IOException {
+    this(new BufferedInputStream(new FileInputStream(path)));
+  }
+
+  public SplittingBAMIndex(final InputStream in) throws IOException {
+    readIndex(in);
+  }
+
+  /** Writes some statistics about each splitting BAM index file given as an argument. */
+  public static void main(String[] args) {
+    if (args.length == 0) {
+      System.out.println(
+          "Usage: SplittingBAMIndex [splitting BAM indices...]\n\n"
+              + "Writes a few statistics about each splitting BAM index.");
+      return;
+    }
+
+    for (String arg : args) {
+      final File f = new File(arg);
+      if (f.isFile() && f.canRead()) {
+        try {
+          System.err.printf("%s:\n", f);
+          final SplittingBAMIndex bi = new SplittingBAMIndex(f);
+          if (bi.size() == 1) {
+            System.err.printf("\t0 alignments\n" + "\tassociated BAM file size %d\n", bi.bamSize());
+          } else {
+            final long first = bi.first();
+            final long last = bi.last();
+            System.err.printf(
+                "\t%d alignments\n"
+                    + "\tfirst is at %#06x in BGZF block at %#014x\n"
+                    + "\tlast  is at %#06x in BGZF block at %#014x\n"
+                    + "\tassociated BAM file size %d\n",
+                bi.size(), first & 0xffff, first >>> 16, last & 0xffff, last >>> 16, bi.bamSize());
+          }
+        } catch (IOException e) {
+          System.err.printf("Failed to read %s!\n", f);
+          e.printStackTrace();
+        }
+      } else {
+        System.err.printf("%s does not look like a readable file!\n", f);
+      }
+    }
+  }
+
+  public void readIndex(final InputStream in) throws IOException {
+    virtualOffsets.clear();
+
+    final ByteBuffer bb = ByteBuffer.allocate(8);
+
+    for (long prev = -1; in.read(bb.array()) == 8; ) {
+      final long cur = bb.getLong(0);
+      if (prev > cur) {
+        throw new IOException(
+            String.format(
+                "Invalid splitting BAM index; offsets not in order: %#x > %#x", prev, cur));
+      }
+
+      virtualOffsets.add(prev = cur);
+    }
+    in.close();
+
+    if (virtualOffsets.size() < 1) {
+      throw new IOException(
+          "Invalid splitting BAM index: " + "should contain at least the file size");
+    }
+  }
+
+  public List<Long> getVirtualOffsets() {
+    return new ArrayList<>(virtualOffsets);
+  }
+
+  public Long prevAlignment(final long filePos) {
+    return virtualOffsets.floor(filePos << 16);
+  }
+
+  public Long nextAlignment(final long filePos) {
+    return virtualOffsets.higher(filePos << 16);
+  }
+
+  public int size() {
+    return virtualOffsets.size();
+  }
+
+  private long first() {
+    return virtualOffsets.first();
+  }
+
+  private long last() {
+    return prevAlignment(bamSize() - 1);
+  }
+
+  long bamSize() {
+    return virtualOffsets.last() >>> 16;
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) {
+      return true;
+    }
+    if (o == null || getClass() != o.getClass()) {
+      return false;
+    }
+
+    SplittingBAMIndex that = (SplittingBAMIndex) o;
+
+    return virtualOffsets != null
+        ? virtualOffsets.equals(that.virtualOffsets)
+        : that.virtualOffsets == null;
+  }
+
+  @Override
+  public int hashCode() {
+    return virtualOffsets != null ? virtualOffsets.hashCode() : 0;
+  }
+
+  @Override
+  public String toString() {
+    return virtualOffsets.toString();
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndexer.java b/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndexer.java
index 59b27c3..939469e 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndexer.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SplittingBAMIndexer.java
@@ -25,12 +25,13 @@
 import htsjdk.samtools.SAMFileSource;
 import htsjdk.samtools.SAMFileSpan;
 import htsjdk.samtools.SAMRecord;
+import htsjdk.samtools.util.BlockCompressedInputStream;
 import java.io.BufferedOutputStream;
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
 import java.lang.reflect.InvocationTargetException;
 import java.lang.reflect.Method;
@@ -38,340 +39,335 @@
 import java.nio.ByteOrder;
 import java.nio.LongBuffer;
 import java.util.Arrays;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 
-import htsjdk.samtools.util.BlockCompressedInputStream;
-
 /**
  * An indexing tool and API for BAM files, making them palatable to {@link
- * org.seqdoop.hadoop_bam.BAMInputFormat}. Writes splitting BAM indices as
- * understood by {@link org.seqdoop.hadoop_bam.SplittingBAMIndex}.
+ * org.seqdoop.hadoop_bam.BAMInputFormat}. Writes splitting BAM indices as understood by {@link
+ * org.seqdoop.hadoop_bam.SplittingBAMIndex}.
  *
- * There are two ways of using this class:
- * 1) Building a splitting BAM index from an existing BAM file
- * 2) Building a splitting BAM index while building the BAM file
+ * <p>There are two ways of using this class: 1) Building a splitting BAM index from an existing BAM
+ * file 2) Building a splitting BAM index while building the BAM file
  *
- * For 1), use the static {@link #index(InputStream, OutputStream, long, int)} method,
- * which takes the input BAM and output stream to write the index to.
+ * <p>For 1), use the static {@link #index(InputStream, OutputStream, long, int)} method, which
+ * takes the input BAM and output stream to write the index to.
  *
- * For 2), use one of the constructors that takes an output stream, then pass {@link
- * SAMRecord} objects via the {@link #processAlignment} method, and then call {@link
- * #finish(long)} to complete writing the index.
+ * <p>For 2), use one of the constructors that takes an output stream, then pass {@link SAMRecord}
+ * objects via the {@link #processAlignment} method, and then call {@link #finish(long)} to complete
+ * writing the index.
  */
 public final class SplittingBAMIndexer {
-	public static final String OUTPUT_FILE_EXTENSION = ".splitting-bai";
-
-	// Default to a granularity level of 4096. This is generally sufficient
-	// for very large BAM files, relative to a maximum heap size in the
-	// gigabyte range.
-	public static final int DEFAULT_GRANULARITY = 4096;
-
-	public static void main(String[] args) {
-		if (args.length <= 1) {
-			System.out.println(
-				"Usage: SplittingBAMIndexer GRANULARITY [BAM files...]\n\n"+
-
-				"Writes, for each GRANULARITY alignments in a BAM file, its "+
-				"virtual file offset\nas a big-endian 64-bit integer into "+
-				"[filename].splitting-bai. The file is\nterminated by the BAM "+
-				"file's length, in the same format.");
-			return;
-		}
-
-		int granularity;
-		try {
-			granularity = Integer.parseInt(args[0]);
-		} catch (NumberFormatException e) {
-			granularity = 0;
-		}
-		if (granularity <= 0) {
-			System.err.printf(
-				"Granularity must be a positive integer, not '%s'!\n", args[0]);
-			return;
-		}
-
-		for (final String arg : Arrays.asList(args).subList(1, args.length)) {
-			final File f = new File(arg);
-			System.out.printf("Indexing %s...", f);
-			try {
-				SplittingBAMIndexer.index(
-					new FileInputStream(f),
-					new BufferedOutputStream(new FileOutputStream(f + OUTPUT_FILE_EXTENSION)),
-					f.length(), granularity);
-				System.out.println(" done.");
-			} catch (IOException e) {
-				System.out.println(" FAILED!");
-				e.printStackTrace();
-			}
-		}
-	}
-
-	/**
-	 * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link
-	 * org.apache.hadoop.conf.Configuration} object instead of a supplied
-	 * argument list
-	 *
-	 * @throws java.lang.IllegalArgumentException if the "input" property is not
-	 *                                            in the Configuration
-	 */
-	public static void run(final Configuration conf) throws IOException {
-		final String inputString = conf.get("input");
-		if (inputString == null)
-			throw new IllegalArgumentException(
-				"String property \"input\" path not found in given Configuration");
-
-		final FileSystem fs = FileSystem.get(conf);
-
-		final Path input = new Path(inputString);
-
-		SplittingBAMIndexer.index(
-			fs.open(input),
-			fs.create(input.suffix(OUTPUT_FILE_EXTENSION)),
-			fs.getFileStatus(input).getLen(),
-			conf.getInt("granularity", DEFAULT_GRANULARITY));
-	}
-
-	private final OutputStream out;
-	private final ByteBuffer byteBuffer = ByteBuffer.allocate(8);
-	private final int granularity;
-	private final LongBuffer lb;
-	private long count;
-	private Method getFirstOffset;
-
-	private static final int PRINT_EVERY = 500*1024*1024;
-
-	/**
-	 * Prepare to index a BAM file.
-	 * @param out the stream to write the index to
-	 */
-	public SplittingBAMIndexer(final OutputStream out) {
-		this(out, SplittingBAMIndexer.DEFAULT_GRANULARITY);
-	}
-
-	/**
-	 * Prepare to index a BAM file.
-	 * @param out the stream to write the index to
-	 * @param granularity write the offset of every n-th alignment to the index
-	 */
-	public SplittingBAMIndexer(final OutputStream out, final int granularity) {
-		this.out = out;
-		this.lb = byteBuffer.order(ByteOrder.BIG_ENDIAN).asLongBuffer();
-		this.granularity = granularity;
-	}
-
-	/**
-	 * Process the given record for the index.
-	 * @param rec the record from the file being indexed
-	 * @throws IOException
-	 */
-	public void processAlignment(final SAMRecord rec) throws IOException {
-		// write an offset for the first record and for the g-th record thereafter (where
-		// g is the granularity), to be consistent with the index method
-		if (count == 0 || (count + 1) % granularity == 0) {
-			SAMFileSource fileSource = rec.getFileSource();
-			SAMFileSpan filePointer = fileSource.getFilePointer();
-			writeVirtualOffset(getPos(filePointer));
-		}
-		count++;
-	}
-
-	void processAlignment(final long virtualOffset) throws IOException {
-		if (count == 0 || (count + 1) % granularity == 0) {
-			writeVirtualOffset(virtualOffset);
-		}
-		count++;
-	}
-
-	private long getPos(SAMFileSpan filePointer) {
-		// Use reflection since BAMFileSpan is package private in htsjdk 1.141. Note that
-		// Hadoop-BAM cannot use a later version of htsjdk since it requires Java 8.
-		if (getFirstOffset == null) {
-			try {
-				getFirstOffset = filePointer.getClass().getDeclaredMethod("getFirstOffset");
-				getFirstOffset.setAccessible(true);
-			} catch (NoSuchMethodException e) {
-				throw new IllegalStateException(e);
-			}
-		}
-		try {
-			return (Long) getFirstOffset.invoke(filePointer);
-		} catch (IllegalAccessException e) {
-			throw new IllegalStateException(e);
-		} catch (InvocationTargetException e) {
-			throw new IllegalStateException(e);
-		}
-	}
-
-	/**
-	 * Write the given virtual offset to the index. This method is for internal use only.
-	 * @param virtualOffset virtual file pointer
-	 * @throws IOException
-	 */
-	public void writeVirtualOffset(long virtualOffset) throws IOException {
-		lb.put(0, virtualOffset);
-		out.write(byteBuffer.array());
-	}
-
-	/**
-	 * Complete the index by writing the input BAM file size to the index, and closing
-	 * the output stream.
-	 * @param inputSize the size of the input BAM file
-	 * @throws IOException
-	 */
-	public void finish(long inputSize) throws IOException {
-		writeVirtualOffset(inputSize << 16);
-		out.close();
-	}
-
-	/**
-	 * Perform indexing on the given BAM file, at the granularity level specified.
-	 */
-	public static void index(
-			final InputStream rawIn, final OutputStream out, final long inputSize,
-			final int granularity)
-		throws IOException
-	{
-		final BlockCompressedInputStream in =
-			new BlockCompressedInputStream(rawIn);
-
-		final ByteBuffer byteBuffer = ByteBuffer.allocate(8); // Enough to fit a long
-		final LongBuffer lb =
-			byteBuffer.order(ByteOrder.BIG_ENDIAN).asLongBuffer();
-
-		skipToAlignmentList(byteBuffer, in);
-
-		// Always write the first one to make sure it's not skipped
-		lb.put(0, in.getFilePointer());
-		out.write(byteBuffer.array());
-
-		long prevPrint = in.getFilePointer() >> 16;
-
-		for (int i = 0;;) {
-			final PtrSkipPair pair = readAlignment(byteBuffer, in);
-			if (pair == null)
-				break;
-
-			if (++i == granularity) {
-				i = 0;
-				lb.put(0, pair.ptr);
-				out.write(byteBuffer.array());
-
-				final long filePos = pair.ptr >> 16;
-				if (filePos - prevPrint >= PRINT_EVERY) {
-					System.out.print("-");
-					prevPrint = filePos;
-				}
-			}
-			fullySkip(in, pair.skip);
-		}
-		lb.put(0, inputSize << 16);
-		out.write(byteBuffer.array());
-		out.close();
-		in.close();
-	}
-
-	private static void skipToAlignmentList(final ByteBuffer byteBuffer, final InputStream in)
-			throws IOException {
-		// Check magic number
-		if (!readExactlyBytes(byteBuffer, in, 4))
-			ioError("Invalid BAM header: too short, no magic");
-
-		final int magic = byteBuffer.order(ByteOrder.BIG_ENDIAN).getInt(0);
-		if (magic != 0x42414d01)
-			ioError("Invalid BAM header: bad magic %#x != 0x42414d01", magic);
-
-		// Skip the SAM header
-		if (!readExactlyBytes(byteBuffer, in, 4))
-			ioError("Invalid BAM header: too short, no SAM header length");
-
-		byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
-
-		final int samLen = byteBuffer.getInt(0);
-		if (samLen < 0)
-			ioError("Invalid BAM header: negative SAM header length %d", samLen);
-
-		fullySkip(in, samLen);
-
-		// Get the number of reference sequences
-		if (!readExactlyBytes(byteBuffer, in, 4))
-			ioError("Invalid BAM header: too short, no reference sequence count");
-
-		final int referenceSeqs = byteBuffer.getInt(0);
-
-		// Skip over each reference sequence datum individually
-		for (int s = 0; s < referenceSeqs; ++s) {
-			if (!readExactlyBytes(byteBuffer, in, 4))
-				ioError("Invalid reference list: EOF before reference %d", s+1);
-
-			// Skip over the name + the int giving the sequence length
-			fullySkip(in, byteBuffer.getInt(0) + 4);
-		}
-	}
-
-	private static final class PtrSkipPair {
-		public long ptr;
-		public int skip;
-
-		public PtrSkipPair(long p, int s) {
-			ptr  = p;
-			skip = s;
-		}
-	}
-
-	private static PtrSkipPair readAlignment(final ByteBuffer byteBuffer,
-			final BlockCompressedInputStream in) throws IOException
-	{
-		final long ptr = in.getFilePointer();
-		final int read = readBytes(byteBuffer, in, 4);
-		if (read != 4) {
-			if (read == 0)
-				return null;
-			ioError(
-				"Invalid alignment at virtual offset %#x: "+
-				"less than 4 bytes long", in.getFilePointer());
-		}
-		return new PtrSkipPair(ptr, byteBuffer.getInt(0));
-	}
-
-	private static void fullySkip(final InputStream in, final int skip)
-		throws IOException
-	{
-		// Skip repeatedly until we're either done skipping or can't skip any
-		// more, in case some kind of IO error is temporarily preventing it. That
-		// kind of situation might not necessarily be possible; the docs are
-		// rather vague about the whole thing.
-		for (int s = skip; s > 0;) {
-			final long skipped = in.skip(s);
-			if (skipped == 0)
-				throw new IOException("Skip failed");
-			s -= skipped;
-		}
-	}
-
-	private static int readBytes(final ByteBuffer byteBuffer, final InputStream in,
-			final int n) throws IOException
-	{
-		assert n <= byteBuffer.capacity();
-
-		int read = 0;
-		while (read < n) {
-			final int readNow = in.read(byteBuffer.array(), read, n - read);
-			if (readNow <= 0)
-				break;
-			read += readNow;
-		}
-		return read;
-	}
-	private static boolean readExactlyBytes(final ByteBuffer byteBuffer,
-			final InputStream in, final int n) throws IOException
-	{
-		return readBytes(byteBuffer, in, n) == n;
-	}
-
-	private static void ioError(String s, Object... va) throws IOException {
-		throw new IOException(String.format(s, va));
-	}
+
+  public static final String OUTPUT_FILE_EXTENSION = ".splitting-bai";
+
+  // Default to a granularity level of 4096. This is generally sufficient
+  // for very large BAM files, relative to a maximum heap size in the
+  // gigabyte range.
+  public static final int DEFAULT_GRANULARITY = 4096;
+  private static final int PRINT_EVERY = 500 * 1024 * 1024;
+  private final OutputStream out;
+  private final ByteBuffer byteBuffer = ByteBuffer.allocate(8);
+  private final int granularity;
+  private final LongBuffer lb;
+  private long count;
+  private Method getFirstOffset;
+  /**
+   * Prepare to index a BAM file.
+   *
+   * @param out the stream to write the index to
+   */
+  public SplittingBAMIndexer(final OutputStream out) {
+    this(out, SplittingBAMIndexer.DEFAULT_GRANULARITY);
+  }
+
+  /**
+   * Prepare to index a BAM file.
+   *
+   * @param out the stream to write the index to
+   * @param granularity write the offset of every n-th alignment to the index
+   */
+  public SplittingBAMIndexer(final OutputStream out, final int granularity) {
+    this.out = out;
+    this.lb = byteBuffer.order(ByteOrder.BIG_ENDIAN).asLongBuffer();
+    this.granularity = granularity;
+  }
+
+  public static void main(String[] args) {
+    if (args.length <= 1) {
+      System.out.println(
+          "Usage: SplittingBAMIndexer GRANULARITY [BAM files...]\n\n"
+              + "Writes, for each GRANULARITY alignments in a BAM file, its "
+              + "virtual file offset\nas a big-endian 64-bit integer into "
+              + "[filename].splitting-bai. The file is\nterminated by the BAM "
+              + "file's length, in the same format.");
+      return;
+    }
+
+    int granularity;
+    try {
+      granularity = Integer.parseInt(args[0]);
+    } catch (NumberFormatException e) {
+      granularity = 0;
+    }
+    if (granularity <= 0) {
+      System.err.printf("Granularity must be a positive integer, not '%s'!\n", args[0]);
+      return;
+    }
+
+    for (final String arg : Arrays.asList(args).subList(1, args.length)) {
+      final File f = new File(arg);
+      System.out.printf("Indexing %s...", f);
+      try {
+        SplittingBAMIndexer.index(
+            new FileInputStream(f),
+            new BufferedOutputStream(new FileOutputStream(f + OUTPUT_FILE_EXTENSION)),
+            f.length(),
+            granularity);
+        System.out.println(" done.");
+      } catch (IOException e) {
+        System.out.println(" FAILED!");
+        e.printStackTrace();
+      }
+    }
+  }
+
+  /**
+   * Invoke a new SplittingBAMIndexer object, operating on the supplied {@link
+   * org.apache.hadoop.conf.Configuration} object instead of a supplied argument list
+   *
+   * @throws java.lang.IllegalArgumentException if the "input" property is not in the Configuration
+   */
+  public static void run(final Configuration conf) throws IOException {
+    final String inputString = conf.get("input");
+    if (inputString == null) {
+      throw new IllegalArgumentException(
+          "String property \"input\" path not found in given Configuration");
+    }
+
+    final FileSystem fs = FileSystem.get(conf);
+
+    final Path input = new Path(inputString);
+
+    SplittingBAMIndexer.index(
+        fs.open(input),
+        fs.create(input.suffix(OUTPUT_FILE_EXTENSION)),
+        fs.getFileStatus(input).getLen(),
+        conf.getInt("granularity", DEFAULT_GRANULARITY));
+  }
+
+  /** Perform indexing on the given BAM file, at the granularity level specified. */
+  public static void index(
+      final InputStream rawIn, final OutputStream out, final long inputSize, final int granularity)
+      throws IOException {
+    final BlockCompressedInputStream in = new BlockCompressedInputStream(rawIn);
+
+    final ByteBuffer byteBuffer = ByteBuffer.allocate(8); // Enough to fit a long
+    final LongBuffer lb = byteBuffer.order(ByteOrder.BIG_ENDIAN).asLongBuffer();
+
+    skipToAlignmentList(byteBuffer, in);
+
+    // Always write the first one to make sure it's not skipped
+    lb.put(0, in.getFilePointer());
+    out.write(byteBuffer.array());
+
+    long prevPrint = in.getFilePointer() >> 16;
+
+    for (int i = 0; ; ) {
+      final PtrSkipPair pair = readAlignment(byteBuffer, in);
+      if (pair == null) {
+        break;
+      }
+
+      if (++i == granularity) {
+        i = 0;
+        lb.put(0, pair.ptr);
+        out.write(byteBuffer.array());
+
+        final long filePos = pair.ptr >> 16;
+        if (filePos - prevPrint >= PRINT_EVERY) {
+          System.out.print("-");
+          prevPrint = filePos;
+        }
+      }
+      fullySkip(in, pair.skip);
+    }
+    lb.put(0, inputSize << 16);
+    out.write(byteBuffer.array());
+    out.close();
+    in.close();
+  }
+
+  private static void skipToAlignmentList(final ByteBuffer byteBuffer, final InputStream in)
+      throws IOException {
+    // Check magic number
+    if (!readExactlyBytes(byteBuffer, in, 4)) {
+      ioError("Invalid BAM header: too short, no magic");
+    }
+
+    final int magic = byteBuffer.order(ByteOrder.BIG_ENDIAN).getInt(0);
+    if (magic != 0x42414d01) {
+      ioError("Invalid BAM header: bad magic %#x != 0x42414d01", magic);
+    }
+
+    // Skip the SAM header
+    if (!readExactlyBytes(byteBuffer, in, 4)) {
+      ioError("Invalid BAM header: too short, no SAM header length");
+    }
+
+    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+
+    final int samLen = byteBuffer.getInt(0);
+    if (samLen < 0) {
+      ioError("Invalid BAM header: negative SAM header length %d", samLen);
+    }
+
+    fullySkip(in, samLen);
+
+    // Get the number of reference sequences
+    if (!readExactlyBytes(byteBuffer, in, 4)) {
+      ioError("Invalid BAM header: too short, no reference sequence count");
+    }
+
+    final int referenceSeqs = byteBuffer.getInt(0);
+
+    // Skip over each reference sequence datum individually
+    for (int s = 0; s < referenceSeqs; ++s) {
+      if (!readExactlyBytes(byteBuffer, in, 4)) {
+        ioError("Invalid reference list: EOF before reference %d", s + 1);
+      }
+
+      // Skip over the name + the int giving the sequence length
+      fullySkip(in, byteBuffer.getInt(0) + 4);
+    }
+  }
+
+  private static PtrSkipPair readAlignment(
+      final ByteBuffer byteBuffer, final BlockCompressedInputStream in) throws IOException {
+    final long ptr = in.getFilePointer();
+    final int read = readBytes(byteBuffer, in, 4);
+    if (read != 4) {
+      if (read == 0) {
+        return null;
+      }
+      ioError(
+          "Invalid alignment at virtual offset %#x: " + "less than 4 bytes long",
+          in.getFilePointer());
+    }
+    return new PtrSkipPair(ptr, byteBuffer.getInt(0));
+  }
+
+  private static void fullySkip(final InputStream in, final int skip) throws IOException {
+    // Skip repeatedly until we're either done skipping or can't skip any
+    // more, in case some kind of IO error is temporarily preventing it. That
+    // kind of situation might not necessarily be possible; the docs are
+    // rather vague about the whole thing.
+    for (int s = skip; s > 0; ) {
+      final long skipped = in.skip(s);
+      if (skipped == 0) {
+        throw new IOException("Skip failed");
+      }
+      s -= skipped;
+    }
+  }
+
+  private static int readBytes(final ByteBuffer byteBuffer, final InputStream in, final int n)
+      throws IOException {
+    assert n <= byteBuffer.capacity();
+
+    int read = 0;
+    while (read < n) {
+      final int readNow = in.read(byteBuffer.array(), read, n - read);
+      if (readNow <= 0) {
+        break;
+      }
+      read += readNow;
+    }
+    return read;
+  }
+
+  private static boolean readExactlyBytes(
+      final ByteBuffer byteBuffer, final InputStream in, final int n) throws IOException {
+    return readBytes(byteBuffer, in, n) == n;
+  }
+
+  private static void ioError(String s, Object... va) throws IOException {
+    throw new IOException(String.format(s, va));
+  }
+
+  /**
+   * Process the given record for the index.
+   *
+   * @param rec the record from the file being indexed
+   */
+  public void processAlignment(final SAMRecord rec) throws IOException {
+    // write an offset for the first record and for the g-th record thereafter (where
+    // g is the granularity), to be consistent with the index method
+    if (count == 0 || (count + 1) % granularity == 0) {
+      SAMFileSource fileSource = rec.getFileSource();
+      SAMFileSpan filePointer = fileSource.getFilePointer();
+      writeVirtualOffset(getPos(filePointer));
+    }
+    count++;
+  }
+
+  void processAlignment(final long virtualOffset) throws IOException {
+    if (count == 0 || (count + 1) % granularity == 0) {
+      writeVirtualOffset(virtualOffset);
+    }
+    count++;
+  }
+
+  private long getPos(SAMFileSpan filePointer) {
+    // Use reflection since BAMFileSpan is package private in htsjdk 1.141. Note that
+    // Hadoop-BAM cannot use a later version of htsjdk since it requires Java 8.
+    if (getFirstOffset == null) {
+      try {
+        getFirstOffset = filePointer.getClass().getDeclaredMethod("getFirstOffset");
+        getFirstOffset.setAccessible(true);
+      } catch (NoSuchMethodException e) {
+        throw new IllegalStateException(e);
+      }
+    }
+    try {
+      return (Long) getFirstOffset.invoke(filePointer);
+    } catch (IllegalAccessException e) {
+      throw new IllegalStateException(e);
+    } catch (InvocationTargetException e) {
+      throw new IllegalStateException(e);
+    }
+  }
+
+  /**
+   * Write the given virtual offset to the index. This method is for internal use only.
+   *
+   * @param virtualOffset virtual file pointer
+   */
+  public void writeVirtualOffset(long virtualOffset) throws IOException {
+    lb.put(0, virtualOffset);
+    out.write(byteBuffer.array());
+  }
+
+  /**
+   * Complete the index by writing the input BAM file size to the index, and closing the output
+   * stream.
+   *
+   * @param inputSize the size of the input BAM file
+   */
+  public void finish(long inputSize) throws IOException {
+    writeVirtualOffset(inputSize << 16);
+    out.close();
+  }
+
+  private static final class PtrSkipPair {
+
+    public long ptr;
+    public int skip;
+
+    public PtrSkipPair(long p, int s) {
+      ptr = p;
+      skip = s;
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VCFFormat.java b/src/main/java/org/seqdoop/hadoop_bam/VCFFormat.java
index 890ce33..d5112f0 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VCFFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VCFFormat.java
@@ -22,63 +22,69 @@
 
 package org.seqdoop.hadoop_bam;
 
-import htsjdk.samtools.util.BlockCompressedInputStream;
 import java.io.BufferedInputStream;
-import java.io.InputStream;
 import java.io.IOException;
-
+import java.io.InputStream;
 import java.util.zip.GZIPInputStream;
 import org.apache.hadoop.fs.Path;
 
 /** Describes a VCF format. */
 public enum VCFFormat {
-	VCF, BCF;
-
-	/** Infers the VCF format by looking at the filename of the given path.
-	 *
-	 * @see #inferFromFilePath(String)
-	 */
-	public static VCFFormat inferFromFilePath(final Path path) {
-		return inferFromFilePath(path.getName());
-	}
+  VCF,
+  BCF;
 
-	/** Infers the VCF format by looking at the extension of the given file
-	 * name. <code>*.vcf</code> is recognized as {@link #VCF} and
-	 * <code>*.bcf</code> as {@link #BCF}.
-	 */
-	public static VCFFormat inferFromFilePath(final String name) {
-		if (name.endsWith(".bcf")) return BCF;
-		if (name.endsWith(".vcf")) return VCF;
-		if (name.endsWith(".gz")) return VCF;
-		if (name.endsWith(".bgz")) return VCF;
-		return null;
-	}
+  /**
+   * Infers the VCF format by looking at the filename of the given path.
+   *
+   * @see #inferFromFilePath(String)
+   */
+  public static VCFFormat inferFromFilePath(final Path path) {
+    return inferFromFilePath(path.getName());
+  }
 
-	/** Infers the VCF format by looking at the first few bytes of the input.
-	 */
-	public static VCFFormat inferFromData(final InputStream in) throws IOException {
-		BufferedInputStream bis = new BufferedInputStream(in); // so mark/reset is supported
-		return inferFromUncompressedData(isGzip(bis) ? new GZIPInputStream(bis) : bis);
-	}
+  /**
+   * Infers the VCF format by looking at the extension of the given file name. <code>*.vcf</code> is
+   * recognized as {@link #VCF} and <code>*.bcf</code> as {@link #BCF}.
+   */
+  public static VCFFormat inferFromFilePath(final String name) {
+    if (name.endsWith(".bcf")) {
+      return BCF;
+    }
+    if (name.endsWith(".vcf")) {
+      return VCF;
+    }
+    if (name.endsWith(".gz")) {
+      return VCF;
+    }
+    if (name.endsWith(".bgz")) {
+      return VCF;
+    }
+    return null;
+  }
 
-	private static VCFFormat inferFromUncompressedData(final InputStream in) throws IOException {
-		final byte b = (byte)in.read();
-		in.close();
-		switch (b) {
-			case 'B':  return BCF;
-			case '#':  return VCF;
-		}
-		return null;
-	}
+  /** Infers the VCF format by looking at the first few bytes of the input. */
+  public static VCFFormat inferFromData(final InputStream in) throws IOException {
+    BufferedInputStream bis = new BufferedInputStream(in); // so mark/reset is supported
+    return inferFromUncompressedData(isGzip(bis) ? new GZIPInputStream(bis) : bis);
+  }
 
-	/**
-	 * @return <code>true</code> if the stream is compressed with gzip (or BGZF)
-	*/
-	public static boolean isGzip(final InputStream in) throws IOException {
-		in.mark(1);
-		final byte b = (byte)in.read();
-		in.reset();
-		return b == 0x1f;
-	}
+  private static VCFFormat inferFromUncompressedData(final InputStream in) throws IOException {
+    final byte b = (byte) in.read();
+    in.close();
+    switch (b) {
+      case 'B':
+        return BCF;
+      case '#':
+        return VCF;
+    }
+    return null;
+  }
 
+  /** @return <code>true</code> if the stream is compressed with gzip (or BGZF) */
+  public static boolean isGzip(final InputStream in) throws IOException {
+    in.mark(1);
+    final byte b = (byte) in.read();
+    in.reset();
+    return b == 0x1f;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VCFInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/VCFInputFormat.java
index 846ec6d..c45c85f 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VCFInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VCFInputFormat.java
@@ -22,6 +22,7 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.Interval;
 import htsjdk.samtools.util.Locatable;
@@ -39,7 +40,6 @@
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Map;
-
 import java.util.Set;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
@@ -50,429 +50,431 @@
 import org.apache.hadoop.io.compress.CompressionCodecFactory;
 import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.io.compress.SplittableCompressionCodec;
-import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.InputSplit;
+import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
-
-import htsjdk.samtools.seekablestream.SeekableStream;
-
-import org.seqdoop.hadoop_bam.util.BGZFEnhancedGzipCodec;
 import org.seqdoop.hadoop_bam.util.BGZFCodec;
+import org.seqdoop.hadoop_bam.util.BGZFEnhancedGzipCodec;
 import org.seqdoop.hadoop_bam.util.IntervalUtil;
 import org.seqdoop.hadoop_bam.util.WrapSeekable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** An {@link org.apache.hadoop.mapreduce.InputFormat} for VCF files. Values
- * are the individual records; see {@link VCFRecordReader} for the meaning of
- * the key.
+/**
+ * An {@link org.apache.hadoop.mapreduce.InputFormat} for VCF files. Values are the individual
+ * records; see {@link VCFRecordReader} for the meaning of the key.
  */
-public class VCFInputFormat
-	extends FileInputFormat<LongWritable,VariantContextWritable>
-{
-	private static final Logger logger = LoggerFactory.getLogger(VCFInputFormat.class);
-
-	/** Whether file extensions are to be trusted, defaults to true.
-	 *
-	 * @see VCFFormat#inferFromFilePath
-	 */
-
-	public static final String TRUST_EXTS_PROPERTY =
-		"hadoopbam.vcf.trust-exts";
-
-	/**
-	 * Filter by region, like <code>-L</code> in SAMtools. Takes a comma-separated
-	 * list of intervals, e.g. <code>chr1:1-20000,chr2:12000-20000</code>. For
-	 * programmatic use {@link #setIntervals(Configuration, List)} should be preferred.
-	 */
-	public static final String INTERVALS_PROPERTY = "hadoopbam.vcf.intervals";
-
-	public static <T extends Locatable> void setIntervals(Configuration conf,
-			List<T> intervals) {
-		StringBuilder sb = new StringBuilder();
-		for (Iterator<T> it = intervals.iterator(); it.hasNext(); ) {
-			Locatable l = it.next();
-			sb.append(String.format("%s:%d-%d", l.getContig(), l.getStart(), l.getEnd()));
-			if (it.hasNext()) {
-				sb.append(",");
-			}
-		}
-		conf.set(INTERVALS_PROPERTY, sb.toString());
-	}
-
-	static List<Interval> getIntervals(Configuration conf) {
-		return IntervalUtil.getIntervals(conf, INTERVALS_PROPERTY);
-	}
-
-	private final Map<Path,VCFFormat> formatMap;
-	private final boolean              givenMap;
-
-	private Configuration conf;
-	private boolean trustExts;
-
-	/** Creates a new input format, which will use the
-	 * <code>Configuration</code> from the first public method called. Thus this
-	 * will behave as though constructed with a <code>Configuration</code>
-	 * directly, but only after it has received it in
-	 * <code>createRecordReader</code> (via the <code>TaskAttemptContext</code>)
-	 * or <code>isSplitable</code> or <code>getSplits</code> (via the
-	 * <code>JobContext</code>). Until then, other methods will throw an {@link
-	 * IllegalStateException}.
-	 *
-	 * This constructor exists mainly as a convenience, e.g. so that
-	 * <code>VCFInputFormat</code> can be used directly in
-	 * <code>Job.setInputFormatClass</code>.
-	 */
-	public VCFInputFormat() {
-		this.formatMap = new HashMap<Path,VCFFormat>();
-		this.givenMap  = false;
-		this.conf      = null;
-	}
-
-	/** Creates a new input format, reading {@link #TRUST_EXTS_PROPERTY} from
-	 * the given <code>Configuration</code>.
-	 */
-	public VCFInputFormat(Configuration conf) {
-		this.formatMap = new HashMap<Path,VCFFormat>();
-		this.conf      = conf;
-		this.trustExts = conf.getBoolean(TRUST_EXTS_PROPERTY, true);
-		this.givenMap  = false;
-	}
-
-	/** Creates a new input format, trusting the given <code>Map</code> to
-	 * define the file-to-format associations. Neither file paths nor their
-	 * contents are looked at, only the <code>Map</code> is used.
-	 *
-	 * <p>The <code>Map</code> is not copied, so it should not be modified while
-	 * this input format is in use!</p>
-	 * */
-	public VCFInputFormat(Map<Path,VCFFormat> formatMap) {
-		this.formatMap = formatMap;
-		this.givenMap  = true;
-
-		// Arbitrary values.
-		this.conf = null;
-		this.trustExts = false;
-	}
-
-	/** Returns the {@link VCFFormat} corresponding to the given path. Returns
-	 * <code>null</code> if it cannot be determined even based on the file
-	 * contents (unless future VCF/BCF formats are very different, this means
-	 * that the path does not refer to a VCF or BCF file).
-	 *
-	 * <p>If this input format was constructed using a given
-	 * <code>Map&lt;Path,VCFFormat&gt;</code> and the path is not contained
-	 * within that map, throws an {@link IllegalArgumentException}.</p>
-	 */
-	public VCFFormat getFormat(final Path path) {
-		VCFFormat fmt = formatMap.get(path);
-		if (fmt != null || formatMap.containsKey(path))
-			return fmt;
-
-		if (givenMap)
-			throw new IllegalArgumentException(
-				"VCF format for '"+path+"' not in given map");
-
-		if (this.conf == null)
-			throw new IllegalStateException("Don't have a Configuration yet");
-
-		if (trustExts) {
-			final VCFFormat f = VCFFormat.inferFromFilePath(path);
-			if (f != null) {
-
-			    formatMap.put(path, f);
-				return f;
-			}
-		}
-
-		try(InputStream is = path.getFileSystem(conf).open(path)) {
-			fmt = VCFFormat.inferFromData(is);
-		} catch (IOException e) {}
-
-		formatMap.put(path, fmt);
-		return fmt;
-	}
-
-	@Override
-	protected boolean isSplitable(JobContext context, Path filename) {
-		Configuration conf = context.getConfiguration();
-		final CompressionCodec codec =
-				new CompressionCodecFactory(context.getConfiguration()).getCodec(filename);
-		if (codec == null) {
-			return true;
-		}
-		if (codec instanceof BGZFCodec || codec instanceof BGZFEnhancedGzipCodec) {
-			boolean splittable;
-			try {
-				try (FSDataInputStream in = filename.getFileSystem(conf).open(filename)) {
-					splittable = BlockCompressedInputStream.isValidFile(new BufferedInputStream(in));
-				}
-			} catch (IOException e) {
-				// can't determine if BGZF or GZIP, conservatively assume latter
-				splittable = false;
-			}
-			if (!splittable) {
-				logger.warn("{} is not splittable, consider using block-compressed gzip (BGZF)", filename);
-			}
-			return splittable;
-		} else if (codec instanceof GzipCodec) {
-			logger.warn("Using GzipCodec, which is not splittable, consider using block compressed gzip (BGZF) and BGZFCodec/BGZFEnhancedGzipCodec.");
-		}
-		return codec instanceof SplittableCompressionCodec;
-	}
-
-	/** Returns a {@link BCFRecordReader} or {@link VCFRecordReader} as
-	 * appropriate, initialized with the given parameters.
-	 *
-	 * <p>Throws {@link IllegalArgumentException} if the given input split is
-	 * not a {@link FileVirtualSplit} or a {@link FileSplit}, or if the path
-	 * referred to is not recognized as a VCF or BCF file (see {@link
-	 * #getFormat}).</p>
-	 */
-	@Override public RecordReader<LongWritable,VariantContextWritable>
-		createRecordReader(InputSplit split, TaskAttemptContext ctx)
-			throws InterruptedException, IOException
-	{
-		final Path path;
-		if (split instanceof FileSplit)
-			path = ((FileSplit)split).getPath();
-		else if (split instanceof FileVirtualSplit)
-			path = ((FileVirtualSplit)split).getPath();
-		else
-			throw new IllegalArgumentException(
-				"split '"+split+"' has unknown type: cannot extract path");
-
-		if (this.conf == null)
-			this.conf = ctx.getConfiguration();
-
-		final VCFFormat fmt = getFormat(path);
-		if (fmt == null)
-			throw new IllegalArgumentException(
-				"unknown VCF format, cannot create RecordReader: "+path);
-
-		final RecordReader<LongWritable, VariantContextWritable> rr;
-
-		switch (fmt) {
-			case VCF: rr = new VCFRecordReader(); break;
-			case BCF: rr = new BCFRecordReader(); break;
-			default: assert false; return null;
-		}
-
-		rr.initialize(split, ctx);
-		return rr;
-	}
-
-	/** Defers to {@link BCFSplitGuesser} as appropriate for each individual
-	 * path. VCF paths do not require special handling, so their splits are left
-	 * unchanged.
-	 */
-	@Override public List<InputSplit> getSplits(JobContext job)
-		throws IOException
-	{
-		if (this.conf == null)
-			this.conf = job.getConfiguration();
-
-		final List<InputSplit> origSplits = super.getSplits(job);
-
-		// We have to partition the splits by input format and hand the BCF ones
-		// over to getBCFSplits().
-
-		final List<FileSplit>
-			bcfOrigSplits = new ArrayList<FileSplit>(origSplits.size());
-		final List<InputSplit>
-			newSplits     = new ArrayList<InputSplit>(origSplits.size());
-
-		for (final InputSplit iSplit : origSplits) {
-			final FileSplit split = (FileSplit)iSplit;
-
-			if (VCFFormat.BCF.equals(getFormat(split.getPath())))
-				bcfOrigSplits.add(split);
-			else
-				newSplits.add(split);
-		}
-		fixBCFSplits(bcfOrigSplits, newSplits);
-		return filterByInterval(newSplits, conf);
-	}
-
-	// The given FileSplits should all be for BCF files. Adds InputSplits
-	// aligned to record boundaries. Compressed BCF results in
-	// FileVirtualSplits, uncompressed in FileSplits.
-	private void fixBCFSplits(
-			List<FileSplit> splits, List<InputSplit> newSplits)
-		throws IOException
-	{
-		// addGuessedSplits() requires the given splits to be sorted by file
-		// path, so do so. Although FileInputFormat.getSplits() does, at the time
-		// of writing this, generate them in that order, we shouldn't rely on it.
-		Collections.sort(splits, new Comparator<FileSplit>() {
-			public int compare(FileSplit a, FileSplit b) {
-				return a.getPath().compareTo(b.getPath());
-			}
-		});
-
-		for (int i = 0; i < splits.size();)
-			i = addGuessedSplits(splits, i, newSplits);
-	}
-
-	// Handles all the splits that share the Path of the one at index i,
-	// returning the next index to be used.
-	private int addGuessedSplits(
-			List<FileSplit> splits, int i, List<InputSplit> newSplits)
-		throws IOException
-	{
-		final Path path = splits.get(i).getPath();
-		final SeekableStream sin = WrapSeekable.openPath(conf, path);
-
-		final BCFSplitGuesser guesser = new BCFSplitGuesser(sin);
-
-		final boolean isBGZF = guesser.isBGZF();
-
-		InputSplit prevSplit = null;
-
-		for (; i < splits.size(); ++i) {
-			final FileSplit fspl = splits.get(i);
-			if (!fspl.getPath().equals(path))
-				break;
-
-			final String[] locs = fspl.getLocations();
-
-			final long beg =       fspl.getStart();
-			final long end = beg + fspl.getLength();
-
-			final long alignBeg = guesser.guessNextBCFRecordStart(beg, end);
-
-			// As the guesser goes to the next BGZF block before looking for BCF
-			// records, the ending BGZF blocks have to always be traversed fully.
-			// Hence force the length to be 0xffff, the maximum possible.
-			final long alignEnd = isBGZF ? end << 16 | 0xffff : end;
-
-			final long length = alignEnd - alignBeg;
-
-			if (alignBeg == end) {
-				// No records detected in this split: merge it to the previous one.
-				// This could legitimately happen e.g. if we have a split that is
-				// so small that it only contains the middle part of a BGZF block.
-				//
-				// Of course, if it's the first split, then this is simply not a
-				// valid BCF file.
-				//
-				// FIXME: In theory, any number of splits could only contain parts
-				// of the BCF header before we start to see splits that contain BCF
-				// records. For now, we require that the split size is at least as
-				// big as the header and don't handle that case.
-				if (prevSplit == null)
-					throw new IOException("'" + path + "': no records in first "+
-						"split: bad BCF file or tiny split size?");
-
-				if (isBGZF) {
-					((FileVirtualSplit)prevSplit).setEndVirtualOffset(alignEnd);
-					continue;
-				}
-				prevSplit = new FileSplit(path, alignBeg, length, locs);
-				newSplits.remove(newSplits.size() - 1);
-			} else {
-				prevSplit =
-					isBGZF ? new FileVirtualSplit(path, alignBeg, alignEnd, locs)
-					       : new FileSplit       (path, alignBeg, length,   locs);
-			}
-			newSplits.add(prevSplit);
-		}
-
-		sin.close();
-		return i;
-	}
-
-	private List<InputSplit> filterByInterval(List<InputSplit> splits, Configuration conf)
-			throws IOException {
-		List<Interval> intervals = getIntervals(conf);
-		if (intervals == null) {
-			return splits;
-		}
-		List<Block> blocks = new ArrayList<>();
-		Set<Path> vcfFiles = new LinkedHashSet<Path>();
-		for (InputSplit split : splits) {
-			if (split instanceof FileSplit) {
-				vcfFiles.add(((FileSplit) split).getPath());
-			} else if (split instanceof FileVirtualSplit) {
-				vcfFiles.add(((FileVirtualSplit) split).getPath());
-			} else {
-				throw new IllegalArgumentException(
-						"split '"+split+"' has unknown type: cannot extract path");
-			}
-		}
-		for (Path vcfFile : vcfFiles) {
-			Path indexFile = vcfFile.suffix(TabixUtils.STANDARD_INDEX_EXTENSION);
-			FileSystem fs = vcfFile.getFileSystem(conf);
-			if (!fs.exists(indexFile)) {
-				logger.warn(
-				        "No tabix index file found for {}, splits will not be filtered, which may be very inefficient",
-                                        indexFile);
-				return splits;
-			}
-
-			try (InputStream in = new BlockCompressedInputStream(fs.open(indexFile))) {
-				TabixIndex index = new TabixIndex(in);
-				for (Locatable interval : intervals) {
-					String contig = interval.getContig();
-					int intervalStart = interval.getStart();
-					int intervalEnd = interval.getEnd();
-					blocks.addAll(index.getBlocks(contig, intervalStart, intervalEnd));
-				}
-			}
-		}
-
-		// Use the blocks to filter the splits
-		List<InputSplit> filteredSplits = new ArrayList<InputSplit>();
-		for (InputSplit split : splits) {
-			if (split instanceof FileSplit) {
-				FileSplit fileSplit = (FileSplit) split;
-				long splitStart = fileSplit.getStart() << 16;
-				long splitEnd = (fileSplit.getStart() + fileSplit.getLength()) << 16;
-				// if any block overlaps with the split, keep the split, but don't adjust its size
-				// as the BGZF block decompression is handled by BGZFCodec, not by the reader
-				// directly
-				for (Block block : blocks) {
-					long blockStart = block.getStartPosition();
-					long blockEnd = block.getEndPosition();
-					if (overlaps(splitStart, splitEnd, blockStart, blockEnd)) {
-						filteredSplits.add(split);
-						break;
-					}
-				}
-			} else {
-				FileVirtualSplit virtualSplit = (FileVirtualSplit) split;
-				long splitStart = virtualSplit.getStartVirtualOffset();
-				long splitEnd = virtualSplit.getEndVirtualOffset();
-				// if any block overlaps with the split, keep the split, but adjust the start and
-				// end to the maximally overlapping portion for all blocks that overlap
-				long newStart = Long.MAX_VALUE;
-				long newEnd = Long.MIN_VALUE;
-				boolean overlaps = false;
-				for (Block block : blocks) {
-					long blockStart = block.getStartPosition();
-					long blockEnd = block.getEndPosition();
-					if (overlaps(splitStart, splitEnd, blockStart, blockEnd)) {
-						long overlapStart = Math.max(splitStart, blockStart);
-						long overlapEnd = Math.min(splitEnd, blockEnd);
-						newStart = Math.min(newStart, overlapStart);
-						newEnd = Math.max(newEnd, overlapEnd);
-						overlaps = true;
-					}
-				}
-				if (overlaps) {
-					filteredSplits.add(new FileVirtualSplit(virtualSplit.getPath(), newStart, newEnd,
-							virtualSplit.getLocations()));
-				}
-			}
-		}
-		return filteredSplits;
-	}
-
-	private static boolean overlaps(long start, long end, long start2, long end2) {
-		return (start2 >= start && start2 <= end) || (end2 >=start && end2 <= end) ||
-				(start >= start2 && end <= end2);
-	}
+public class VCFInputFormat extends FileInputFormat<LongWritable, VariantContextWritable> {
+
+  /**
+   * Whether file extensions are to be trusted, defaults to true.
+   *
+   * @see VCFFormat#inferFromFilePath
+   */
+  public static final String TRUST_EXTS_PROPERTY = "hadoopbam.vcf.trust-exts";
+  /**
+   * Filter by region, like <code>-L</code> in SAMtools. Takes a comma-separated list of intervals,
+   * e.g. <code>chr1:1-20000,chr2:12000-20000</code>. For programmatic use {@link
+   * #setIntervals(Configuration, List)} should be preferred.
+   */
+  public static final String INTERVALS_PROPERTY = "hadoopbam.vcf.intervals";
+
+  private static final Logger logger = LoggerFactory.getLogger(VCFInputFormat.class);
+  private final Map<Path, VCFFormat> formatMap;
+  private final boolean givenMap;
+  private Configuration conf;
+  private boolean trustExts;
+
+  /**
+   * Creates a new input format, which will use the <code>Configuration</code> from the first public
+   * method called. Thus this will behave as though constructed with a <code>Configuration</code>
+   * directly, but only after it has received it in <code>createRecordReader</code> (via the <code>
+   * TaskAttemptContext</code>) or <code>isSplitable</code> or <code>getSplits</code> (via the
+   * <code>JobContext</code>). Until then, other methods will throw an {@link
+   * IllegalStateException}.
+   *
+   * <p>This constructor exists mainly as a convenience, e.g. so that <code>VCFInputFormat</code>
+   * can be used directly in <code>Job.setInputFormatClass</code>.
+   */
+  public VCFInputFormat() {
+    this.formatMap = new HashMap<Path, VCFFormat>();
+    this.givenMap = false;
+    this.conf = null;
+  }
+  /**
+   * Creates a new input format, reading {@link #TRUST_EXTS_PROPERTY} from the given <code>
+   * Configuration</code>.
+   */
+  public VCFInputFormat(Configuration conf) {
+    this.formatMap = new HashMap<Path, VCFFormat>();
+    this.conf = conf;
+    this.trustExts = conf.getBoolean(TRUST_EXTS_PROPERTY, true);
+    this.givenMap = false;
+  }
+
+  /**
+   * Creates a new input format, trusting the given <code>Map</code> to define the file-to-format
+   * associations. Neither file paths nor their contents are looked at, only the <code>Map</code> is
+   * used.
+   *
+   * <p>The <code>Map</code> is not copied, so it should not be modified while this input format is
+   * in use!
+   */
+  public VCFInputFormat(Map<Path, VCFFormat> formatMap) {
+    this.formatMap = formatMap;
+    this.givenMap = true;
+
+    // Arbitrary values.
+    this.conf = null;
+    this.trustExts = false;
+  }
+
+  public static <T extends Locatable> void setIntervals(Configuration conf, List<T> intervals) {
+    StringBuilder sb = new StringBuilder();
+    for (Iterator<T> it = intervals.iterator(); it.hasNext(); ) {
+      Locatable l = it.next();
+      sb.append(String.format("%s:%d-%d", l.getContig(), l.getStart(), l.getEnd()));
+      if (it.hasNext()) {
+        sb.append(",");
+      }
+    }
+    conf.set(INTERVALS_PROPERTY, sb.toString());
+  }
+
+  static List<Interval> getIntervals(Configuration conf) {
+    return IntervalUtil.getIntervals(conf, INTERVALS_PROPERTY);
+  }
+
+  private static boolean overlaps(long start, long end, long start2, long end2) {
+    return (start2 >= start && start2 <= end)
+        || (end2 >= start && end2 <= end)
+        || (start >= start2 && end <= end2);
+  }
+
+  /**
+   * Returns the {@link VCFFormat} corresponding to the given path. Returns <code>null</code> if it
+   * cannot be determined even based on the file contents (unless future VCF/BCF formats are very
+   * different, this means that the path does not refer to a VCF or BCF file).
+   *
+   * <p>If this input format was constructed using a given <code>Map&lt;Path,VCFFormat&gt;</code>
+   * and the path is not contained within that map, throws an {@link IllegalArgumentException}.
+   */
+  public VCFFormat getFormat(final Path path) {
+    VCFFormat fmt = formatMap.get(path);
+    if (fmt != null || formatMap.containsKey(path)) {
+      return fmt;
+    }
+
+    if (givenMap) {
+      throw new IllegalArgumentException("VCF format for '" + path + "' not in given map");
+    }
+
+    if (this.conf == null) {
+      throw new IllegalStateException("Don't have a Configuration yet");
+    }
+
+    if (trustExts) {
+      final VCFFormat f = VCFFormat.inferFromFilePath(path);
+      if (f != null) {
+
+        formatMap.put(path, f);
+        return f;
+      }
+    }
+
+    try (InputStream is = path.getFileSystem(conf).open(path)) {
+      fmt = VCFFormat.inferFromData(is);
+    } catch (IOException e) {
+    }
+
+    formatMap.put(path, fmt);
+    return fmt;
+  }
+
+  @Override
+  protected boolean isSplitable(JobContext context, Path filename) {
+    Configuration conf = context.getConfiguration();
+    final CompressionCodec codec =
+        new CompressionCodecFactory(context.getConfiguration()).getCodec(filename);
+    if (codec == null) {
+      return true;
+    }
+    if (codec instanceof BGZFCodec || codec instanceof BGZFEnhancedGzipCodec) {
+      boolean splittable;
+      try {
+        try (FSDataInputStream in = filename.getFileSystem(conf).open(filename)) {
+          splittable = BlockCompressedInputStream.isValidFile(new BufferedInputStream(in));
+        }
+      } catch (IOException e) {
+        // can't determine if BGZF or GZIP, conservatively assume latter
+        splittable = false;
+      }
+      if (!splittable) {
+        logger.warn("{} is not splittable, consider using block-compressed gzip (BGZF)", filename);
+      }
+      return splittable;
+    } else if (codec instanceof GzipCodec) {
+      logger.warn(
+          "Using GzipCodec, which is not splittable, consider using block compressed gzip (BGZF) and BGZFCodec/BGZFEnhancedGzipCodec.");
+    }
+    return codec instanceof SplittableCompressionCodec;
+  }
+
+  /**
+   * Returns a {@link BCFRecordReader} or {@link VCFRecordReader} as appropriate, initialized with
+   * the given parameters.
+   *
+   * <p>Throws {@link IllegalArgumentException} if the given input split is not a {@link
+   * FileVirtualSplit} or a {@link FileSplit}, or if the path referred to is not recognized as a VCF
+   * or BCF file (see {@link #getFormat}).
+   */
+  @Override
+  public RecordReader<LongWritable, VariantContextWritable> createRecordReader(
+      InputSplit split, TaskAttemptContext ctx) throws InterruptedException, IOException {
+    final Path path;
+    if (split instanceof FileSplit) {
+      path = ((FileSplit) split).getPath();
+    } else if (split instanceof FileVirtualSplit) {
+      path = ((FileVirtualSplit) split).getPath();
+    } else {
+      throw new IllegalArgumentException(
+          "split '" + split + "' has unknown type: cannot extract path");
+    }
+
+    if (this.conf == null) {
+      this.conf = ctx.getConfiguration();
+    }
+
+    final VCFFormat fmt = getFormat(path);
+    if (fmt == null) {
+      throw new IllegalArgumentException("unknown VCF format, cannot create RecordReader: " + path);
+    }
+
+    final RecordReader<LongWritable, VariantContextWritable> rr;
+
+    switch (fmt) {
+      case VCF:
+        rr = new VCFRecordReader();
+        break;
+      case BCF:
+        rr = new BCFRecordReader();
+        break;
+      default:
+        assert false;
+        return null;
+    }
+
+    rr.initialize(split, ctx);
+    return rr;
+  }
+
+  /**
+   * Defers to {@link BCFSplitGuesser} as appropriate for each individual path. VCF paths do not
+   * require special handling, so their splits are left unchanged.
+   */
+  @Override
+  public List<InputSplit> getSplits(JobContext job) throws IOException {
+    if (this.conf == null) {
+      this.conf = job.getConfiguration();
+    }
+
+    final List<InputSplit> origSplits = super.getSplits(job);
+
+    // We have to partition the splits by input format and hand the BCF ones
+    // over to getBCFSplits().
+
+    final List<FileSplit> bcfOrigSplits = new ArrayList<FileSplit>(origSplits.size());
+    final List<InputSplit> newSplits = new ArrayList<InputSplit>(origSplits.size());
+
+    for (final InputSplit iSplit : origSplits) {
+      final FileSplit split = (FileSplit) iSplit;
+
+      if (VCFFormat.BCF.equals(getFormat(split.getPath()))) {
+        bcfOrigSplits.add(split);
+      } else {
+        newSplits.add(split);
+      }
+    }
+    fixBCFSplits(bcfOrigSplits, newSplits);
+    return filterByInterval(newSplits, conf);
+  }
+
+  // The given FileSplits should all be for BCF files. Adds InputSplits
+  // aligned to record boundaries. Compressed BCF results in
+  // FileVirtualSplits, uncompressed in FileSplits.
+  private void fixBCFSplits(List<FileSplit> splits, List<InputSplit> newSplits) throws IOException {
+    // addGuessedSplits() requires the given splits to be sorted by file
+    // path, so do so. Although FileInputFormat.getSplits() does, at the time
+    // of writing this, generate them in that order, we shouldn't rely on it.
+    Collections.sort(
+        splits,
+        new Comparator<FileSplit>() {
+          public int compare(FileSplit a, FileSplit b) {
+            return a.getPath().compareTo(b.getPath());
+          }
+        });
+
+    for (int i = 0; i < splits.size(); ) {
+      i = addGuessedSplits(splits, i, newSplits);
+    }
+  }
+
+  // Handles all the splits that share the Path of the one at index i,
+  // returning the next index to be used.
+  private int addGuessedSplits(List<FileSplit> splits, int i, List<InputSplit> newSplits)
+      throws IOException {
+    final Path path = splits.get(i).getPath();
+    final SeekableStream sin = WrapSeekable.openPath(conf, path);
+
+    final BCFSplitGuesser guesser = new BCFSplitGuesser(sin);
+
+    final boolean isBGZF = guesser.isBGZF();
+
+    InputSplit prevSplit = null;
+
+    for (; i < splits.size(); ++i) {
+      final FileSplit fspl = splits.get(i);
+      if (!fspl.getPath().equals(path)) {
+        break;
+      }
+
+      final String[] locs = fspl.getLocations();
+
+      final long beg = fspl.getStart();
+      final long end = beg + fspl.getLength();
+
+      final long alignBeg = guesser.guessNextBCFRecordStart(beg, end);
+
+      // As the guesser goes to the next BGZF block before looking for BCF
+      // records, the ending BGZF blocks have to always be traversed fully.
+      // Hence force the length to be 0xffff, the maximum possible.
+      final long alignEnd = isBGZF ? end << 16 | 0xffff : end;
+
+      final long length = alignEnd - alignBeg;
+
+      if (alignBeg == end) {
+        // No records detected in this split: merge it to the previous one.
+        // This could legitimately happen e.g. if we have a split that is
+        // so small that it only contains the middle part of a BGZF block.
+        //
+        // Of course, if it's the first split, then this is simply not a
+        // valid BCF file.
+        //
+        // FIXME: In theory, any number of splits could only contain parts
+        // of the BCF header before we start to see splits that contain BCF
+        // records. For now, we require that the split size is at least as
+        // big as the header and don't handle that case.
+        if (prevSplit == null) {
+          throw new IOException(
+              "'" + path + "': no records in first " + "split: bad BCF file or tiny split size?");
+        }
+
+        if (isBGZF) {
+          ((FileVirtualSplit) prevSplit).setEndVirtualOffset(alignEnd);
+          continue;
+        }
+        prevSplit = new FileSplit(path, alignBeg, length, locs);
+        newSplits.remove(newSplits.size() - 1);
+      } else {
+        prevSplit =
+            isBGZF
+                ? new FileVirtualSplit(path, alignBeg, alignEnd, locs)
+                : new FileSplit(path, alignBeg, length, locs);
+      }
+      newSplits.add(prevSplit);
+    }
+
+    sin.close();
+    return i;
+  }
+
+  private List<InputSplit> filterByInterval(List<InputSplit> splits, Configuration conf)
+      throws IOException {
+    List<Interval> intervals = getIntervals(conf);
+    if (intervals == null) {
+      return splits;
+    }
+    List<Block> blocks = new ArrayList<>();
+    Set<Path> vcfFiles = new LinkedHashSet<Path>();
+    for (InputSplit split : splits) {
+      if (split instanceof FileSplit) {
+        vcfFiles.add(((FileSplit) split).getPath());
+      } else if (split instanceof FileVirtualSplit) {
+        vcfFiles.add(((FileVirtualSplit) split).getPath());
+      } else {
+        throw new IllegalArgumentException(
+            "split '" + split + "' has unknown type: cannot extract path");
+      }
+    }
+    for (Path vcfFile : vcfFiles) {
+      Path indexFile = vcfFile.suffix(TabixUtils.STANDARD_INDEX_EXTENSION);
+      FileSystem fs = vcfFile.getFileSystem(conf);
+      if (!fs.exists(indexFile)) {
+        logger.warn(
+            "No tabix index file found for {}, splits will not be filtered, which may be very inefficient",
+            indexFile);
+        return splits;
+      }
+
+      try (InputStream in = new BlockCompressedInputStream(fs.open(indexFile))) {
+        TabixIndex index = new TabixIndex(in);
+        for (Locatable interval : intervals) {
+          String contig = interval.getContig();
+          int intervalStart = interval.getStart();
+          int intervalEnd = interval.getEnd();
+          blocks.addAll(index.getBlocks(contig, intervalStart, intervalEnd));
+        }
+      }
+    }
+
+    // Use the blocks to filter the splits
+    List<InputSplit> filteredSplits = new ArrayList<InputSplit>();
+    for (InputSplit split : splits) {
+      if (split instanceof FileSplit) {
+        FileSplit fileSplit = (FileSplit) split;
+        long splitStart = fileSplit.getStart() << 16;
+        long splitEnd = (fileSplit.getStart() + fileSplit.getLength()) << 16;
+        // if any block overlaps with the split, keep the split, but don't adjust its size
+        // as the BGZF block decompression is handled by BGZFCodec, not by the reader
+        // directly
+        for (Block block : blocks) {
+          long blockStart = block.getStartPosition();
+          long blockEnd = block.getEndPosition();
+          if (overlaps(splitStart, splitEnd, blockStart, blockEnd)) {
+            filteredSplits.add(split);
+            break;
+          }
+        }
+      } else {
+        FileVirtualSplit virtualSplit = (FileVirtualSplit) split;
+        long splitStart = virtualSplit.getStartVirtualOffset();
+        long splitEnd = virtualSplit.getEndVirtualOffset();
+        // if any block overlaps with the split, keep the split, but adjust the start and
+        // end to the maximally overlapping portion for all blocks that overlap
+        long newStart = Long.MAX_VALUE;
+        long newEnd = Long.MIN_VALUE;
+        boolean overlaps = false;
+        for (Block block : blocks) {
+          long blockStart = block.getStartPosition();
+          long blockEnd = block.getEndPosition();
+          if (overlaps(splitStart, splitEnd, blockStart, blockEnd)) {
+            long overlapStart = Math.max(splitStart, blockStart);
+            long overlapEnd = Math.min(splitEnd, blockEnd);
+            newStart = Math.min(newStart, overlapStart);
+            newEnd = Math.max(newEnd, overlapEnd);
+            overlaps = true;
+          }
+        }
+        if (overlaps) {
+          filteredSplits.add(
+              new FileVirtualSplit(
+                  virtualSplit.getPath(), newStart, newEnd, virtualSplit.getLocations()));
+        }
+      }
+    }
+    return filteredSplits;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VCFOutputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/VCFOutputFormat.java
index 36768f9..ef407e8 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VCFOutputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VCFOutputFormat.java
@@ -25,34 +25,35 @@
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
 
-/** An abstract {@link org.apache.hadoop.mapreduce.OutputFormat} for VCF and
- * BCF files. Only locks down the value type and stores the output format
- * requested.
+/**
+ * An abstract {@link org.apache.hadoop.mapreduce.OutputFormat} for VCF and BCF files. Only locks
+ * down the value type and stores the output format requested.
  */
-public abstract class VCFOutputFormat<K>
-	extends FileOutputFormat<K,VariantContextWritable>
-{
-	/** A string property defining the output format to use. The value is read
-	 * directly by {@link VCFFormat#valueOf}.
-	 */
-	public static final String OUTPUT_VCF_FORMAT_PROPERTY =
-		"hadoopbam.vcf.output-format";
-
-	protected VCFFormat format;
-
-	/** Creates a new output format, reading {@link #OUTPUT_VCF_FORMAT_PROPERTY}
-	 * from the given <code>Configuration</code>.
-	 */
-	protected VCFOutputFormat(Configuration conf) {
-		final String fmtStr = conf.get(OUTPUT_VCF_FORMAT_PROPERTY);
-
-		format = fmtStr == null ? null : VCFFormat.valueOf(fmtStr);
-	}
-
-	/** Creates a new output format for the given VCF format. */
-	protected VCFOutputFormat(VCFFormat fmt) {
-		if (fmt == null)
-			throw new IllegalArgumentException("null VCFFormat");
-		format = fmt;
-	}
+public abstract class VCFOutputFormat<K> extends FileOutputFormat<K, VariantContextWritable> {
+
+  /**
+   * A string property defining the output format to use. The value is read directly by {@link
+   * VCFFormat#valueOf}.
+   */
+  public static final String OUTPUT_VCF_FORMAT_PROPERTY = "hadoopbam.vcf.output-format";
+
+  protected VCFFormat format;
+
+  /**
+   * Creates a new output format, reading {@link #OUTPUT_VCF_FORMAT_PROPERTY} from the given <code>
+   * Configuration</code>.
+   */
+  protected VCFOutputFormat(Configuration conf) {
+    final String fmtStr = conf.get(OUTPUT_VCF_FORMAT_PROPERTY);
+
+    format = fmtStr == null ? null : VCFFormat.valueOf(fmtStr);
+  }
+
+  /** Creates a new output format for the given VCF format. */
+  protected VCFOutputFormat(VCFFormat fmt) {
+    if (fmt == null) {
+      throw new IllegalArgumentException("null VCFFormat");
+    }
+    format = fmt;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VCFRecordReader.java b/src/main/java/org/seqdoop/hadoop_bam/VCFRecordReader.java
index d21d70f..cacba28 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VCFRecordReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VCFRecordReader.java
@@ -34,7 +34,6 @@
 import htsjdk.variant.vcf.VCFContigHeaderLine;
 import htsjdk.variant.vcf.VCFHeader;
 import java.io.IOException;
-import java.util.Collection;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -57,162 +56,161 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** The key is the bitwise OR of the chromosome index in the upper 32 bits
- * and the 0-based leftmost coordinate in the lower.
+/**
+ * The key is the bitwise OR of the chromosome index in the upper 32 bits and the 0-based leftmost
+ * coordinate in the lower.
  *
- * The chromosome index is based on the ordering of the contig lines in the VCF
- * header. If a chromosome name that cannot be found in the contig lines is
- * used, that name is instead hashed to form the upper part of the key.
+ * <p>The chromosome index is based on the ordering of the contig lines in the VCF header. If a
+ * chromosome name that cannot be found in the contig lines is used, that name is instead hashed to
+ * form the upper part of the key.
  */
-public class VCFRecordReader
-	extends RecordReader<LongWritable,VariantContextWritable>
-{
-
-	private static final Logger logger = LoggerFactory.getLogger(VCFRecordReader.class);
-    
-	/** A String property corresponding to a ValidationStringency
-	 * value. If set, the given stringency is used when any part of the
-	 * Hadoop-BAM library reads VCF.
-	 */
-	public static final String VALIDATION_STRINGENCY_PROPERTY =
-		"hadoopbam.vcfrecordreader.validation-stringency";
-
-	static ValidationStringency getValidationStringency(
-		final Configuration conf)
-	{
-		final String p = conf.get(VALIDATION_STRINGENCY_PROPERTY);
-		return p == null ? ValidationStringency.STRICT : ValidationStringency.valueOf(p);
-	}
-
-	public static void setValidationStringency(
-		final Configuration conf,
-		final ValidationStringency stringency)
-	{
-		conf.set(VALIDATION_STRINGENCY_PROPERTY, stringency.toString());
-	}
-
-    
-	private final LongWritable          key = new LongWritable();
-	private final VariantContextWritable vc = new VariantContextWritable();
-
-	private VCFCodec codec = new VCFCodec();
-	private LineRecordReader lineRecordReader = new LineRecordReader();
-
-	private VCFHeader header;
-
-	private final Map<String,Integer> contigDict =
-		new HashMap<String,Integer>();
-
-	private List<Interval> intervals;
-	private OverlapDetector<Interval> overlapDetector;
-
-	private ValidationStringency stringency;
-    
-	@Override public void initialize(InputSplit spl, TaskAttemptContext ctx)
-		throws IOException
-	{
-		final FileSplit split = (FileSplit)spl;
-
-		final Path file = split.getPath();
-		final FileSystem fs = file.getFileSystem(ctx.getConfiguration());
-
-		final FSDataInputStream ins = fs.open(file);
-
-		CompressionCodec compressionCodec =
-				new CompressionCodecFactory(ctx.getConfiguration()).getCodec(file);
-		AsciiLineReader reader;
-		if (compressionCodec == null) {
-			reader = new AsciiLineReader(ins);
-		} else {
-			Decompressor decompressor = CodecPool.getDecompressor(compressionCodec);
-			CompressionInputStream in = compressionCodec.createInputStream(ins,
-					decompressor);
-			reader = new AsciiLineReader(in);
-		}
-
-		AsciiLineReaderIterator it = new AsciiLineReaderIterator(reader);
-
-		final FeatureCodecHeader h = codec.readHeader(it);
-		if (h == null || !(h.getHeaderValue() instanceof VCFHeader))
-			throw new IOException("No VCF header found in "+ file);
-
-		header = (VCFHeader) h.getHeaderValue();
-
-		contigDict.clear();
-		int i = 0;
-		for (final VCFContigHeaderLine contig : header.getContigLines())
-			contigDict.put(contig.getID(), i++);
-
-		lineRecordReader.initialize(spl, ctx);
-
-		intervals = VCFInputFormat.getIntervals(ctx.getConfiguration());
-		if (intervals != null) {
-			overlapDetector = OverlapDetector.create(intervals);
-		}
-
-                stringency = VCFRecordReader.getValidationStringency(ctx.getConfiguration());
-	}
-	@Override public void close() throws IOException { lineRecordReader.close(); }
-
-	@Override public float getProgress() throws IOException {
-		return lineRecordReader.getProgress();
-	}
-
-	@Override public LongWritable           getCurrentKey  () { return key; }
-	@Override public VariantContextWritable getCurrentValue() { return vc; }
-
-	@Override public boolean nextKeyValue() throws IOException {
-		while (true) {
-			String line;
-			while (true) {
-				if (!lineRecordReader.nextKeyValue()) {
-					return false;
-				}
-				line = lineRecordReader.getCurrentValue().toString();
-				if (!line.startsWith("#")) {
-					break;
-				}
-			}
-
-                        final VariantContext v;
-                        try {
-				v = codec.decode(line);
-			} catch (TribbleException e) {
-				if (stringency == ValidationStringency.STRICT) {
-					if (logger.isErrorEnabled()) {
-						logger.error("Parsing line {} failed with {}.", line, e);
-					}
-					throw e;
-				} else {
-					if (stringency == ValidationStringency.LENIENT &&
-                                            logger.isWarnEnabled()) {
-						logger.warn("Parsing line {} failed with {}. Skipping...",
-                                                            line, e);
-					}
-					continue;
-				}
-			}
-
-			if (!overlaps(v)) {
-				continue;
-			}
-
-			Integer chromIdx = contigDict.get(v.getContig());
-			if (chromIdx == null)
-				chromIdx = (int) MurmurHash3.murmurhash3(v.getContig(), 0);
-
-			key.set((long) chromIdx << 32 | (long) (v.getStart() - 1));
-			vc.set(v, header);
-
-			return true;
-		}
-	}
-
-	private boolean overlaps(VariantContext v) {
-		if (intervals == null) {
-			return true;
-		}
-		final Interval interval = new Interval(v.getContig(), v.getStart(), v.getEnd());
-		return overlapDetector.overlapsAny(interval);
-	}
+public class VCFRecordReader extends RecordReader<LongWritable, VariantContextWritable> {
+
+  /**
+   * A String property corresponding to a ValidationStringency value. If set, the given stringency
+   * is used when any part of the Hadoop-BAM library reads VCF.
+   */
+  public static final String VALIDATION_STRINGENCY_PROPERTY =
+      "hadoopbam.vcfrecordreader.validation-stringency";
+
+  private static final Logger logger = LoggerFactory.getLogger(VCFRecordReader.class);
+  private final LongWritable key = new LongWritable();
+  private final VariantContextWritable vc = new VariantContextWritable();
+  private final Map<String, Integer> contigDict = new HashMap<String, Integer>();
+  private VCFCodec codec = new VCFCodec();
+  private LineRecordReader lineRecordReader = new LineRecordReader();
+  private VCFHeader header;
+  private List<Interval> intervals;
+  private OverlapDetector<Interval> overlapDetector;
+  private ValidationStringency stringency;
+
+  static ValidationStringency getValidationStringency(final Configuration conf) {
+    final String p = conf.get(VALIDATION_STRINGENCY_PROPERTY);
+    return p == null ? ValidationStringency.STRICT : ValidationStringency.valueOf(p);
+  }
+
+  public static void setValidationStringency(
+      final Configuration conf, final ValidationStringency stringency) {
+    conf.set(VALIDATION_STRINGENCY_PROPERTY, stringency.toString());
+  }
+
+  @Override
+  public void initialize(InputSplit spl, TaskAttemptContext ctx) throws IOException {
+    final FileSplit split = (FileSplit) spl;
+
+    final Path file = split.getPath();
+    final FileSystem fs = file.getFileSystem(ctx.getConfiguration());
+
+    final FSDataInputStream ins = fs.open(file);
+
+    CompressionCodec compressionCodec =
+        new CompressionCodecFactory(ctx.getConfiguration()).getCodec(file);
+    AsciiLineReader reader;
+    if (compressionCodec == null) {
+      reader = new AsciiLineReader(ins);
+    } else {
+      Decompressor decompressor = CodecPool.getDecompressor(compressionCodec);
+      CompressionInputStream in = compressionCodec.createInputStream(ins, decompressor);
+      reader = new AsciiLineReader(in);
+    }
+
+    AsciiLineReaderIterator it = new AsciiLineReaderIterator(reader);
+
+    final FeatureCodecHeader h = codec.readHeader(it);
+    if (h == null || !(h.getHeaderValue() instanceof VCFHeader)) {
+      throw new IOException("No VCF header found in " + file);
+    }
+
+    header = (VCFHeader) h.getHeaderValue();
+
+    contigDict.clear();
+    int i = 0;
+    for (final VCFContigHeaderLine contig : header.getContigLines()) {
+      contigDict.put(contig.getID(), i++);
+    }
+
+    lineRecordReader.initialize(spl, ctx);
+
+    intervals = VCFInputFormat.getIntervals(ctx.getConfiguration());
+    if (intervals != null) {
+      overlapDetector = OverlapDetector.create(intervals);
+    }
+
+    stringency = VCFRecordReader.getValidationStringency(ctx.getConfiguration());
+  }
+
+  @Override
+  public void close() throws IOException {
+    lineRecordReader.close();
+  }
+
+  @Override
+  public float getProgress() throws IOException {
+    return lineRecordReader.getProgress();
+  }
+
+  @Override
+  public LongWritable getCurrentKey() {
+    return key;
+  }
+
+  @Override
+  public VariantContextWritable getCurrentValue() {
+    return vc;
+  }
+
+  @Override
+  public boolean nextKeyValue() throws IOException {
+    while (true) {
+      String line;
+      while (true) {
+        if (!lineRecordReader.nextKeyValue()) {
+          return false;
+        }
+        line = lineRecordReader.getCurrentValue().toString();
+        if (!line.startsWith("#")) {
+          break;
+        }
+      }
+
+      final VariantContext v;
+      try {
+        v = codec.decode(line);
+      } catch (TribbleException e) {
+        if (stringency == ValidationStringency.STRICT) {
+          if (logger.isErrorEnabled()) {
+            logger.error("Parsing line {} failed with {}.", line, e);
+          }
+          throw e;
+        } else {
+          if (stringency == ValidationStringency.LENIENT && logger.isWarnEnabled()) {
+            logger.warn("Parsing line {} failed with {}. Skipping...", line, e);
+          }
+          continue;
+        }
+      }
+
+      if (!overlaps(v)) {
+        continue;
+      }
+
+      Integer chromIdx = contigDict.get(v.getContig());
+      if (chromIdx == null) {
+        chromIdx = (int) MurmurHash3.murmurhash3(v.getContig(), 0);
+      }
+
+      key.set((long) chromIdx << 32 | (long) (v.getStart() - 1));
+      vc.set(v, header);
+
+      return true;
+    }
+  }
+
+  private boolean overlaps(VariantContext v) {
+    if (intervals == null) {
+      return true;
+    }
+    final Interval interval = new Interval(v.getContig(), v.getStart(), v.getEnd());
+    return overlapDetector.overlapsAny(interval);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VCFRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/VCFRecordWriter.java
index 0155096..0c12d43 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VCFRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VCFRecordWriter.java
@@ -23,130 +23,114 @@
 package org.seqdoop.hadoop_bam;
 
 import htsjdk.tribble.FeatureCodecHeader;
+import htsjdk.tribble.readers.AsciiLineReader;
+import htsjdk.tribble.readers.AsciiLineReaderIterator;
+import htsjdk.variant.variantcontext.GenotypesContext;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.writer.VariantContextWriter;
 import htsjdk.variant.variantcontext.writer.VariantContextWriterBuilder;
+import htsjdk.variant.vcf.VCFCodec;
+import htsjdk.variant.vcf.VCFHeader;
 import java.io.FilterOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
-import java.io.StringWriter;
-import java.io.Writer;
-
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import htsjdk.tribble.readers.AsciiLineReader;
-import htsjdk.tribble.readers.AsciiLineReaderIterator;
-import htsjdk.variant.vcf.VCFCodec;
-import htsjdk.variant.vcf.VCFHeader;
-import htsjdk.variant.variantcontext.GenotypesContext;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.variantcontext.writer.VariantContextWriter;
 
-/** A base {@link RecordWriter} for VCF.
+/**
+ * A base {@link RecordWriter} for VCF.
  *
- * <p>Handles the output stream, writing the header if requested, and provides
- * the {@link #writeRecord} function for subclasses.</p>
+ * <p>Handles the output stream, writing the header if requested, and provides the {@link
+ * #writeRecord} function for subclasses.
  */
-public abstract class VCFRecordWriter<K>
-	extends RecordWriter<K,VariantContextWritable>
-{
-	private VCFCodec codec = new VCFCodec();
-	private VariantContextWriter writer;
-
-	private LazyVCFGenotypesContext.HeaderDataCache vcfHeaderDataCache =
-		new LazyVCFGenotypesContext.HeaderDataCache();
-	private LazyBCFGenotypesContext.HeaderDataCache bcfHeaderDataCache =
-		new LazyBCFGenotypesContext.HeaderDataCache();
-
-	/** A VCFHeader is read from the input Path. */
-	public VCFRecordWriter(
-			Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
-		throws IOException
-	{
-		final AsciiLineReader r = new AsciiLineReader(
-			input.getFileSystem(ctx.getConfiguration()).open(input));
-
-		final FeatureCodecHeader h = codec.readHeader(new AsciiLineReaderIterator(r));
-		if (h == null || !(h.getHeaderValue() instanceof VCFHeader))
-			throw new IOException("No VCF header found in "+ input);
-
-		r.close();
-
-		init(output, (VCFHeader) h.getHeaderValue(), writeHeader, ctx);
-	}
-	public VCFRecordWriter(
-			Path output, VCFHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader, ctx);
-	}
-	public VCFRecordWriter(
-			OutputStream output, VCFHeader header, boolean writeHeader)
-		throws IOException
-	{
-		init(output, header, writeHeader, null);
-	}
-
-	// Working around not being able to call a constructor other than as the
-	// first statement...
-	private void init(
-			Path output, VCFHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		init(
-			output.getFileSystem(ctx.getConfiguration()).create(output),
-			header, writeHeader, ctx);
-	}
-	private void init(
-			OutputStream output, VCFHeader header, boolean writeHeader,
-			TaskAttemptContext ctx)
-		throws IOException
-	{
-		final StoppableOutputStream stopOut =
-			new StoppableOutputStream(!writeHeader, output);
-
-		writer = createVariantContextWriter(ctx == null ? null : ctx.getConfiguration(),
-				stopOut);
-
-		writer.writeHeader(header);
-		stopOut.stopped = false;
-
-		setInputHeader(header);
-	}
-
-	protected VariantContextWriter createVariantContextWriter(Configuration conf,
-			OutputStream out) {
-		return new VariantContextWriterBuilder().clearOptions()
-				.setOutputStream(out).build();
-	}
-
-	@Override public void close(TaskAttemptContext ctx) throws IOException {
-		writer.close();
-	}
-
-	/** Used for lazy decoding of genotype data. Of course, each input record
-	 * may have a different header, but we currently only support one header
-	 * here... This is in part due to the fact that it's not clear what the best
-	 * solution is. */
-	public void setInputHeader(VCFHeader header) {
-		vcfHeaderDataCache.setHeader(header);
-		bcfHeaderDataCache.setHeader(header);
-	}
-
-	protected void writeRecord(VariantContext vc) {
-		final GenotypesContext gc = vc.getGenotypes();
-		if (gc instanceof LazyParsingGenotypesContext)
-			((LazyParsingGenotypesContext)gc).getParser().setHeaderDataCache(
-				gc instanceof LazyVCFGenotypesContext ? vcfHeaderDataCache
-				                                      : bcfHeaderDataCache);
-
-		writer.add(vc);
-	}
+public abstract class VCFRecordWriter<K> extends RecordWriter<K, VariantContextWritable> {
+
+  private VCFCodec codec = new VCFCodec();
+  private VariantContextWriter writer;
+
+  private LazyVCFGenotypesContext.HeaderDataCache vcfHeaderDataCache =
+      new LazyVCFGenotypesContext.HeaderDataCache();
+  private LazyBCFGenotypesContext.HeaderDataCache bcfHeaderDataCache =
+      new LazyBCFGenotypesContext.HeaderDataCache();
+
+  /** A VCFHeader is read from the input Path. */
+  public VCFRecordWriter(Path output, Path input, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    final AsciiLineReader r =
+        new AsciiLineReader(input.getFileSystem(ctx.getConfiguration()).open(input));
+
+    final FeatureCodecHeader h = codec.readHeader(new AsciiLineReaderIterator(r));
+    if (h == null || !(h.getHeaderValue() instanceof VCFHeader)) {
+      throw new IOException("No VCF header found in " + input);
+    }
+
+    r.close();
+
+    init(output, (VCFHeader) h.getHeaderValue(), writeHeader, ctx);
+  }
+
+  public VCFRecordWriter(Path output, VCFHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader, ctx);
+  }
+
+  public VCFRecordWriter(OutputStream output, VCFHeader header, boolean writeHeader)
+      throws IOException {
+    init(output, header, writeHeader, null);
+  }
+
+  // Working around not being able to call a constructor other than as the
+  // first statement...
+  private void init(Path output, VCFHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    init(output.getFileSystem(ctx.getConfiguration()).create(output), header, writeHeader, ctx);
+  }
+
+  private void init(
+      OutputStream output, VCFHeader header, boolean writeHeader, TaskAttemptContext ctx)
+      throws IOException {
+    final StoppableOutputStream stopOut = new StoppableOutputStream(!writeHeader, output);
+
+    writer = createVariantContextWriter(ctx == null ? null : ctx.getConfiguration(), stopOut);
+
+    writer.writeHeader(header);
+    stopOut.stopped = false;
+
+    setInputHeader(header);
+  }
+
+  protected VariantContextWriter createVariantContextWriter(Configuration conf, OutputStream out) {
+    return new VariantContextWriterBuilder().clearOptions().setOutputStream(out).build();
+  }
+
+  @Override
+  public void close(TaskAttemptContext ctx) throws IOException {
+    writer.close();
+  }
+
+  /**
+   * Used for lazy decoding of genotype data. Of course, each input record may have a different
+   * header, but we currently only support one header here... This is in part due to the fact that
+   * it's not clear what the best solution is.
+   */
+  public void setInputHeader(VCFHeader header) {
+    vcfHeaderDataCache.setHeader(header);
+    bcfHeaderDataCache.setHeader(header);
+  }
+
+  protected void writeRecord(VariantContext vc) {
+    final GenotypesContext gc = vc.getGenotypes();
+    if (gc instanceof LazyParsingGenotypesContext) {
+      ((LazyParsingGenotypesContext) gc)
+          .getParser()
+          .setHeaderDataCache(
+              gc instanceof LazyVCFGenotypesContext ? vcfHeaderDataCache : bcfHeaderDataCache);
+    }
+
+    writer.add(vc);
+  }
 }
 
 // We must always call writer.writeHeader() because the writer requires
@@ -158,20 +142,32 @@ protected void writeRecord(VariantContext vc) {
 // does any buffering of its own and doesn't flush after writing the
 // header, this isn't as easy as this.
 final class StoppableOutputStream extends FilterOutputStream {
-	public boolean stopped;
-
-	public StoppableOutputStream(boolean startStopped, OutputStream out) {
-		super(out);
-		stopped = startStopped;
-	}
-
-	@Override public void write(int b) throws IOException {
-		if (!stopped) super.write(b);
-	}
-	@Override public void write(byte[] b) throws IOException {
-		if (!stopped) super.write(b);
-	}
-	@Override public void write(byte[] b, int off, int len) throws IOException {
-		if (!stopped) super.write(b, off, len);
-	}
+
+  public boolean stopped;
+
+  public StoppableOutputStream(boolean startStopped, OutputStream out) {
+    super(out);
+    stopped = startStopped;
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    if (!stopped) {
+      super.write(b);
+    }
+  }
+
+  @Override
+  public void write(byte[] b) throws IOException {
+    if (!stopped) {
+      super.write(b);
+    }
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    if (!stopped) {
+      super.write(b, off, len);
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VariantContextCodec.java b/src/main/java/org/seqdoop/hadoop_bam/VariantContextCodec.java
index 5e71315..e1299ed 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VariantContextCodec.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VariantContextCodec.java
@@ -22,315 +22,342 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.DataOutput;
+import htsjdk.tribble.util.ParsingUtils;
+import htsjdk.variant.bcf2.BCF2Codec;
+import htsjdk.variant.variantcontext.Allele;
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.variantcontext.VariantContextBuilder;
+import htsjdk.variant.vcf.VCFConstants;
+import htsjdk.variant.vcf.VCFEncoder;
+import htsjdk.variant.vcf.VCFHeader;
 import java.io.DataInput;
+import java.io.DataOutput;
 import java.io.IOException;
+import java.lang.reflect.Array;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Set;
-import java.lang.reflect.Array;
-
-import htsjdk.tribble.util.ParsingUtils;
-import htsjdk.variant.variantcontext.Allele;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.variantcontext.VariantContextBuilder;
-import htsjdk.variant.bcf2.BCF2Codec;
-import htsjdk.variant.vcf.VCFConstants;
-import htsjdk.variant.vcf.VCFEncoder;
-import htsjdk.variant.vcf.VCFHeader;
 
 // See the comment in VariantContextWritable explaining what this is used for.
 public final class VariantContextCodec {
-	public static void write(final DataOutput out, final VariantContext vc)
-		throws IOException
-	{
-        Object genotypesData;
-        int numGenotypes;
-        if (vc.getGenotypes().isLazyWithData()) {
-            final htsjdk.variant.variantcontext.LazyGenotypesContext gc =
-                    (htsjdk.variant.variantcontext.LazyGenotypesContext)
-                            vc.getGenotypes();
-
-            genotypesData = gc.getUnparsedGenotypeData();
-            numGenotypes = gc.size();
-        }
-        else if (vc instanceof VariantContextWithHeader) {
 
-            final VCFHeader header = ((VariantContextWithHeader)vc).getHeader();
-
-            if (header == null) {
-                throw new IllegalStateException( "Header not set inside VariantContextWithHeader" );
-            }
-
-            final List<String> genotypeAttributeKeys = vc.calcVCFGenotypeKeys(header);
-            final StringBuilder builder = new StringBuilder();
-            if ( ! genotypeAttributeKeys.isEmpty()) {
-                // TODO: the VCFEncoder equivalent of this code checks for missing header fields here.  do we care?
-
-                final String genotypeFormatString = ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
-
-                builder.append(VCFConstants.FIELD_SEPARATOR);
-                builder.append(genotypeFormatString);
-
-                final VCFEncoder encoder = new VCFEncoder(header, true, false);
-                final Map<Allele, String> alleleStrings = encoder.buildAlleleStrings(vc);
-                encoder.addGenotypeData(vc, alleleStrings, genotypeAttributeKeys, builder);
-            }
-            genotypesData = builder.toString();
-            numGenotypes = vc.getGenotypes().size();
+  public static void write(final DataOutput out, final VariantContext vc) throws IOException {
+    Object genotypesData;
+    int numGenotypes;
+    if (vc.getGenotypes().isLazyWithData()) {
+      final htsjdk.variant.variantcontext.LazyGenotypesContext gc =
+          (htsjdk.variant.variantcontext.LazyGenotypesContext) vc.getGenotypes();
+
+      genotypesData = gc.getUnparsedGenotypeData();
+      numGenotypes = gc.size();
+    } else if (vc instanceof VariantContextWithHeader) {
+
+      final VCFHeader header = ((VariantContextWithHeader) vc).getHeader();
+
+      if (header == null) {
+        throw new IllegalStateException("Header not set inside VariantContextWithHeader");
+      }
+
+      final List<String> genotypeAttributeKeys = vc.calcVCFGenotypeKeys(header);
+      final StringBuilder builder = new StringBuilder();
+      if (!genotypeAttributeKeys.isEmpty()) {
+        // TODO: the VCFEncoder equivalent of this code checks for missing header fields here.  do
+        // we care?
+
+        final String genotypeFormatString =
+            ParsingUtils.join(VCFConstants.GENOTYPE_FIELD_SEPARATOR, genotypeAttributeKeys);
+
+        builder.append(VCFConstants.FIELD_SEPARATOR);
+        builder.append(genotypeFormatString);
+
+        final VCFEncoder encoder = new VCFEncoder(header, true, false);
+        final Map<Allele, String> alleleStrings = encoder.buildAlleleStrings(vc);
+        encoder.addGenotypeData(vc, alleleStrings, genotypeAttributeKeys, builder);
+      }
+      genotypesData = builder.toString();
+      numGenotypes = vc.getGenotypes().size();
+    } else {
+      throw new IllegalStateException(
+          "Cannot write fully decoded VariantContext: need lazy genotypes or VCF Header");
+    }
+
+    if (!(genotypesData instanceof String || genotypesData instanceof BCF2Codec.LazyData)) {
+      throw new IllegalStateException(
+          "Unrecognized unparsed genotype data, expected String or "
+              + "BCF2Codec.LazyData: "
+              + genotypesData.getClass());
+    }
+
+    final byte[] chrom = vc.getContig().getBytes("UTF-8");
+    out.writeInt(chrom.length);
+    out.write(chrom);
+
+    out.writeInt(vc.getStart());
+    out.writeInt(vc.getEnd());
+
+    final byte[] id = vc.getID().getBytes("UTF-8");
+    out.writeInt(id.length);
+    out.write(id);
+
+    final List<Allele> alleles = vc.getAlleles();
+    out.writeInt(alleles.size());
+    for (final Allele allele : alleles) {
+      final byte[] b = allele.getDisplayBases();
+      out.writeInt(b.length);
+      out.write(b);
+    }
+
+    if (vc.hasLog10PError()) {
+      out.writeFloat((float) vc.getLog10PError());
+    } else {
+      // The "missing value" used in BCF2, a signaling NaN.
+      out.writeInt(0x7f800001);
+    }
+
+    if (vc.isFiltered()) {
+      final Set<String> filters = vc.getFilters();
+      out.writeInt(filters.size());
+      for (final String s : filters) {
+        final byte[] b = s.getBytes("UTF-8");
+        out.writeInt(b.length);
+        out.write(b);
+      }
+    } else {
+      out.writeInt(vc.filtersWereApplied() ? -1 : -2);
+    }
+
+    final Map<String, Object> attrs = vc.getAttributes();
+    out.writeInt(attrs.size());
+    for (final Map.Entry<String, Object> ent : attrs.entrySet()) {
+      final byte[] k = ent.getKey().getBytes("UTF-8");
+      out.writeInt(k.length);
+      out.write(k);
+
+      encodeAttrVal(out, ent.getValue());
+    }
+
+    out.writeInt(numGenotypes);
+
+    if (genotypesData instanceof String) {
+      out.writeByte(0);
+      final byte[] genob = ((String) genotypesData).getBytes("UTF-8");
+      out.writeInt(genob.length);
+      out.write(genob);
+    } else {
+      assert genotypesData instanceof BCF2Codec.LazyData;
+      final BCF2Codec.LazyData data = (BCF2Codec.LazyData) genotypesData;
+      out.writeByte(1);
+      out.writeInt(data.bytes.length);
+      out.write(data.bytes);
+      out.writeInt(data.nGenotypeFields);
+    }
+  }
+
+  public static VariantContext read(final DataInput in) throws IOException {
+    final VariantContextBuilder builder = new VariantContextBuilder();
+
+    int count, len;
+    byte[] b;
+
+    len = in.readInt();
+    b = new byte[len];
+    in.readFully(b);
+    final String chrom = new String(b, "UTF-8");
+    builder.chr(chrom);
+
+    final int start = in.readInt();
+    builder.start(start);
+    builder.stop(in.readInt());
+
+    len = in.readInt();
+    if (len == 0) {
+      builder.noID();
+    } else {
+      if (len > b.length) {
+        b = new byte[len];
+      }
+      in.readFully(b, 0, len);
+      builder.id(new String(b, 0, len, "UTF-8"));
+    }
+
+    count = in.readInt();
+    final List<Allele> alleles = new ArrayList<Allele>(count);
+    for (int i = 0; i < count; ++i) {
+      len = in.readInt();
+      if (len > b.length) {
+        b = new byte[len];
+      }
+      in.readFully(b, 0, len);
+      alleles.add(Allele.create(Arrays.copyOf(b, len), i == 0));
+    }
+    builder.alleles(alleles);
+
+    final int qualInt = in.readInt();
+    builder.log10PError(
+        qualInt == 0x7f800001 ? VariantContext.NO_LOG10_PERROR : Float.intBitsToFloat(qualInt));
+
+    count = in.readInt();
+    switch (count) {
+      case -2:
+        builder.unfiltered();
+        break;
+      case -1:
+        builder.passFilters();
+        break;
+      default:
+        while (count-- > 0) {
+          len = in.readInt();
+          if (len > b.length) {
+            b = new byte[len];
+          }
+          in.readFully(b, 0, len);
+          builder.filter(new String(b, 0, len, "UTF-8"));
         }
-        else {
-            throw new IllegalStateException( "Cannot write fully decoded VariantContext: need lazy genotypes or VCF Header" );
+        break;
+    }
+
+    count = in.readInt();
+    final Map<String, Object> attrs = new HashMap<String, Object>(count, 1);
+    while (count-- > 0) {
+      len = in.readInt();
+      if (len > b.length) {
+        b = new byte[len];
+      }
+      in.readFully(b, 0, len);
+      attrs.put(new String(b, 0, len, "UTF-8"), decodeAttrVal(in));
+    }
+    builder.attributes(attrs);
+
+    count = in.readInt();
+    final byte genoType = in.readByte();
+    len = in.readInt();
+
+    // Resize b even if it's already big enough, minimizing the amount of
+    // memory LazyGenotypesContext hangs on to.
+    b = new byte[len];
+    in.readFully(b);
+
+    switch (genoType) {
+      case 0:
+        builder.genotypesNoValidation(new LazyVCFGenotypesContext(alleles, chrom, start, b, count));
+        break;
+
+      case 1:
+        builder.genotypesNoValidation(new LazyBCFGenotypesContext(alleles, in.readInt(), b, count));
+        break;
+
+      default:
+        throw new IOException("Invalid genotypes type identifier: cannot decode");
+    }
+
+    return builder.make();
+  }
+
+  private static void encodeAttrVal(final DataOutput out, final Object v) throws IOException {
+    if (v instanceof Integer) {
+      out.writeByte(AttrType.INT.toByte());
+      out.writeInt((Integer) v);
+    } else if (v instanceof Float) {
+      out.writeByte(AttrType.FLOAT.toByte());
+      out.writeFloat((Float) v);
+    } else if (v instanceof Double) {
+      out.writeByte(AttrType.DOUBLE.toByte());
+      out.writeDouble((Double) v);
+    } else if (v instanceof Boolean) {
+      out.writeByte(AttrType.BOOL.toByte());
+      out.writeBoolean((Boolean) v);
+    } else if (v instanceof Character) {
+      out.writeByte(AttrType.CHAR.toByte());
+      out.writeChar((Character) v);
+
+    } else if (v instanceof List) {
+      encodeAttrVal(out, ((List) v).toArray());
+
+    } else if (v != null && v.getClass().isArray()) {
+      out.writeByte(AttrType.ARRAY.toByte());
+      final int length = Array.getLength(v);
+      out.writeInt(length);
+      for (int i = 0; i < length; ++i) {
+        encodeAttrVal(out, Array.get(v, i));
+      }
+
+    } else {
+      out.writeByte(AttrType.STRING.toByte());
+      if (v == null) {
+        out.writeInt(0);
+      } else {
+        final byte[] b = v.toString().getBytes("UTF-8");
+        out.writeInt(b.length);
+        out.write(b);
+      }
+    }
+  }
+
+  private static Object decodeAttrVal(final DataInput in) throws IOException {
+    switch (AttrType.fromByte(in.readByte())) {
+      case INT:
+        return in.readInt();
+      case FLOAT:
+        return in.readFloat();
+      case DOUBLE:
+        return in.readDouble();
+      case BOOL:
+        return in.readBoolean();
+      case CHAR:
+        return in.readChar();
+      case ARRAY:
+        {
+          // VariantContext.fullyDecodeAttributes() checks for "instanceof
+          // List" so we have to return a List, not an array, here.
+          int len = in.readInt();
+          final List<Object> os = new ArrayList<Object>(len);
+          while (len-- > 0) {
+            os.add(decodeAttrVal(in));
+          }
+          return os;
         }
-
-		if (!(genotypesData instanceof String || genotypesData instanceof BCF2Codec.LazyData))
-			throw new IllegalStateException(
-				"Unrecognized unparsed genotype data, expected String or "+
-				"BCF2Codec.LazyData: "+ genotypesData.getClass());
-
-		final byte[] chrom = vc.getContig().getBytes("UTF-8");
-		out.writeInt(chrom.length);
-		out.write   (chrom);
-
-		out.writeInt(vc.getStart());
-		out.writeInt(vc.getEnd());
-
-		final byte[] id = vc.getID().getBytes("UTF-8");
-		out.writeInt(id.length);
-		out.write   (id);
-
-		final List<Allele> alleles = vc.getAlleles();
-		out.writeInt(alleles.size());
-		for (final Allele allele : alleles) {
-			final byte[] b = allele.getDisplayBases();
-			out.writeInt(b.length);
-			out.write   (b);
-		}
-
-		if (vc.hasLog10PError())
-			out.writeFloat((float)vc.getLog10PError());
-		else {
-			// The "missing value" used in BCF2, a signaling NaN.
-			out.writeInt(0x7f800001);
-		}
-
-		if (vc.isFiltered()) {
-			final Set<String> filters = vc.getFilters();
-			out.writeInt(filters.size());
-			for (final String s : filters) {
-				final byte[] b = s.getBytes("UTF-8");
-				out.writeInt(b.length);
-				out.write   (b);
-			}
-		} else
-			out.writeInt(vc.filtersWereApplied() ? -1 : -2);
-
-		final Map<String,Object> attrs = vc.getAttributes();
-		out.writeInt(attrs.size());
-		for (final Map.Entry<String,Object> ent : attrs.entrySet()) {
-			final byte[] k = ent.getKey().getBytes("UTF-8");
-			out.writeInt(k.length);
-			out.write   (k);
-
-			encodeAttrVal(out, ent.getValue());
-		}
-
-		out.writeInt(numGenotypes);
-
-		if (genotypesData instanceof String) {
-			out.writeByte(0);
-			final byte[] genob = ((String)genotypesData).getBytes("UTF-8");
-			out.writeInt(genob.length);
-			out.write   (genob);
-		} else {
-			assert genotypesData instanceof BCF2Codec.LazyData;
-			final BCF2Codec.LazyData data = (BCF2Codec.LazyData)genotypesData;
-			out.writeByte(1);
-			out.writeInt(data.bytes.length);
-			out.write   (data.bytes);
-			out.writeInt(data.nGenotypeFields);
-		}
-	}
-
-	public static VariantContext read(final DataInput in) throws IOException {
-		final VariantContextBuilder builder = new VariantContextBuilder();
-
-		int count, len;
-		byte[] b;
-
-		len = in.readInt();
-		b = new byte[len];
-		in.readFully(b);
-		final String chrom = new String(b, "UTF-8");
-		builder.chr(chrom);
-
-		final int start = in.readInt();
-		builder.start(start);
-		builder.stop (in.readInt());
-
-		len = in.readInt();
-		if (len == 0)
-			builder.noID();
-		else {
-			if (len > b.length) b = new byte[len];
-			in.readFully(b, 0, len);
-			builder.id(new String(b, 0, len, "UTF-8"));
-		}
-
-		count = in.readInt();
-		final List<Allele> alleles = new ArrayList<Allele>(count);
-		for (int i = 0; i < count; ++i) {
-			len = in.readInt();
-			if (len > b.length) b = new byte[len];
-			in.readFully(b, 0, len);
-			alleles.add(Allele.create(Arrays.copyOf(b, len), i == 0));
-		}
-		builder.alleles(alleles);
-
-		final int qualInt = in.readInt();
-		builder.log10PError(
-			qualInt == 0x7f800001
-				? VariantContext.NO_LOG10_PERROR
-				: Float.intBitsToFloat(qualInt));
-
-		count = in.readInt();
-		switch (count) {
-		case -2: builder.unfiltered(); break;
-		case -1: builder.passFilters(); break;
-		default:
-			while (count-- > 0) {
-				len = in.readInt();
-				if (len > b.length) b = new byte[len];
-				in.readFully(b, 0, len);
-				builder.filter(new String(b, 0, len, "UTF-8"));
-			}
-			break;
-		}
-
-		count = in.readInt();
-		final Map<String,Object> attrs = new HashMap<String,Object>(count, 1);
-		while (count-- > 0) {
-			len = in.readInt();
-			if (len > b.length) b = new byte[len];
-			in.readFully(b, 0, len);
-			attrs.put(new String(b, 0, len, "UTF-8"), decodeAttrVal(in));
-		}
-		builder.attributes(attrs);
-
-		count = in.readInt();
-		final byte genoType = in.readByte();
-		len = in.readInt();
-
-		// Resize b even if it's already big enough, minimizing the amount of
-		// memory LazyGenotypesContext hangs on to.
-		b = new byte[len];
-		in.readFully(b);
-
-		switch (genoType) {
-		case 0:
-			builder.genotypesNoValidation(
-				new LazyVCFGenotypesContext(alleles, chrom, start, b, count));
-			break;
-
-		case 1:
-			builder.genotypesNoValidation(
-				new LazyBCFGenotypesContext(alleles, in.readInt(), b, count));
-			break;
-
-		default:
-			throw new IOException(
-				"Invalid genotypes type identifier: cannot decode");
-		}
-
-		return builder.make();
-	}
-
-	// The VCF 4.1 spec says: "Integer, Float, Flag, Character, and String". But
-	// there can be many, so we also have ARRAY.
-	//
-	// In addition, VariantContext seems to represent some/all floats as doubles
-	// at least when reading from BCF, and at least BCF2FieldEncoder assumes
-	// them to be of class Double so we have to preserve doubles and thus must
-	// have DOUBLE.
-	private enum AttrType {
-		INT, FLOAT, BOOL, CHAR, STRING, ARRAY, DOUBLE;
-
-		public byte toByte() { return (byte)ordinal(); }
-
-		private static final AttrType[] values = values();
-		public static AttrType fromByte(byte b) { return values[b]; }
-	}
-
-	private static void encodeAttrVal(final DataOutput out, final Object v)
-		throws IOException
-	{
-		if (v instanceof Integer) {
-			out.writeByte(AttrType.INT.toByte());
-			out.writeInt ((Integer)v);
-		} else if (v instanceof Float) {
-			out.writeByte (AttrType.FLOAT.toByte());
-			out.writeFloat((Float)v);
-		} else if (v instanceof Double) {
-			out.writeByte  (AttrType.DOUBLE.toByte());
-			out.writeDouble((Double)v);
-		} else if (v instanceof Boolean) {
-			out.writeByte   (AttrType.BOOL.toByte());
-			out.writeBoolean((Boolean)v);
-		} else if (v instanceof Character) {
-			out.writeByte(AttrType.CHAR.toByte());
-			out.writeChar((Character)v);
-
-		} else if (v instanceof List) {
-			encodeAttrVal(out, ((List)v).toArray());
-
-		} else if (v != null && v.getClass().isArray()) {
-			out.writeByte(AttrType.ARRAY.toByte());
-			final int length = Array.getLength(v);
-			out.writeInt(length);
-			for (int i = 0; i < length; ++i)
-				encodeAttrVal(out, Array.get(v, i));
-
-		} else {
-			out.writeByte(AttrType.STRING.toByte());
-			if (v == null)
-				out.writeInt(0);
-			else {
-				final byte[] b = v.toString().getBytes("UTF-8");
-				out.writeInt(b.length);
-				out.write   (b);
-			}
-		}
-	}
-
-	private static Object decodeAttrVal(final DataInput in) throws IOException {
-		switch (AttrType.fromByte(in.readByte())) {
-			case INT:    return in.readInt();
-			case FLOAT:  return in.readFloat();
-			case DOUBLE: return in.readDouble();
-			case BOOL:   return in.readBoolean();
-			case CHAR:   return in.readChar();
-			case ARRAY: {
-				// VariantContext.fullyDecodeAttributes() checks for "instanceof
-				// List" so we have to return a List, not an array, here.
-				int len = in.readInt();
-				final List<Object> os = new ArrayList<Object>(len);
-				while (len-- > 0)
-					os.add(decodeAttrVal(in));
-				return os;
-			}
-			case STRING: {
-				final int len = in.readInt();
-				if (len == 0)
-					return null;
-				final byte[] b = new byte[len];
-				in.readFully(b);
-				return new String(b, "UTF-8");
-			}
-		}
-		assert (false);
-		throw new IOException("Invalid type identifier: cannot decode");
-	}
+      case STRING:
+        {
+          final int len = in.readInt();
+          if (len == 0) {
+            return null;
+          }
+          final byte[] b = new byte[len];
+          in.readFully(b);
+          return new String(b, "UTF-8");
+        }
+    }
+    assert (false);
+    throw new IOException("Invalid type identifier: cannot decode");
+  }
+
+  // The VCF 4.1 spec says: "Integer, Float, Flag, Character, and String". But
+  // there can be many, so we also have ARRAY.
+  //
+  // In addition, VariantContext seems to represent some/all floats as doubles
+  // at least when reading from BCF, and at least BCF2FieldEncoder assumes
+  // them to be of class Double so we have to preserve doubles and thus must
+  // have DOUBLE.
+  private enum AttrType {
+    INT,
+    FLOAT,
+    BOOL,
+    CHAR,
+    STRING,
+    ARRAY,
+    DOUBLE;
+
+    private static final AttrType[] values = values();
+
+    public static AttrType fromByte(byte b) {
+      return values[b];
+    }
+
+    public byte toByte() {
+      return (byte) ordinal();
+    }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VariantContextWithHeader.java b/src/main/java/org/seqdoop/hadoop_bam/VariantContextWithHeader.java
index 461585b..f67a143 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VariantContextWithHeader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VariantContextWithHeader.java
@@ -24,14 +24,15 @@
 import htsjdk.variant.vcf.VCFHeader;
 
 public class VariantContextWithHeader extends VariantContext {
-    private final VCFHeader header;
 
-    public VariantContextWithHeader(VariantContext context, VCFHeader header) {
-        super(context);
-        this.header = header;
-    }
+  private final VCFHeader header;
 
-    public VCFHeader getHeader() {
-        return header;
-    }
+  public VariantContextWithHeader(VariantContext context, VCFHeader header) {
+    super(context);
+    this.header = header;
+  }
+
+  public VCFHeader getHeader() {
+    return header;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/VariantContextWritable.java b/src/main/java/org/seqdoop/hadoop_bam/VariantContextWritable.java
index 02c921d..e48ebf6 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/VariantContextWritable.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/VariantContextWritable.java
@@ -22,39 +22,50 @@
 
 package org.seqdoop.hadoop_bam;
 
+import htsjdk.variant.variantcontext.VariantContext;
+import htsjdk.variant.vcf.VCFHeader;
 import java.io.DataInput;
 import java.io.DataOutput;
 import java.io.IOException;
-
 import org.apache.hadoop.io.Writable;
-import htsjdk.variant.variantcontext.VariantContext;
-import htsjdk.variant.vcf.VCFHeader;
 
-/** VariantContexts read here have LazyGenotypesContexts, which need to have a
- * header set before the genotype data in the VariantContexts can be decoded.
- * See the LazyGenotypesContext class.
+/**
+ * VariantContexts read here have LazyGenotypesContexts, which need to have a header set before the
+ * genotype data in the VariantContexts can be decoded. See the LazyGenotypesContext class.
  */
 public class VariantContextWritable implements Writable {
-	private VariantContext vc;
-
-	public VariantContext get()                  { return vc; }
-	public void           set(VariantContext vc) { this.vc = vc; }
-    public void           set(VariantContext vc, VCFHeader header) { this.vc = new VariantContextWithHeader(vc, header); }
-
-	// XXX: Unfortunately there's no simple way to just pass a BCF record
-	// through. Contrasting to BAM, there's no equivalent of the BAMRecord
-	// subclass of SAMRecord that saves the original BAM fields --- a
-	// VariantContext only saves the decoded info, so it's impossible to encode
-	// one to BCF without the header.
-	//
-	// VCF is also unusable because VCFWriter defensively refuses to write
-	// anything without a header, throwing IllegalStateException if attempted.
-	//
-	// Thus, we have a custom encoding.
-	@Override public void write(final DataOutput out) throws IOException {
-		VariantContextCodec.write(out, vc);
-	}
-	@Override public void readFields(final DataInput in) throws IOException {
-		vc = VariantContextCodec.read(in);
-	}
+
+  private VariantContext vc;
+
+  public VariantContext get() {
+    return vc;
+  }
+
+  public void set(VariantContext vc) {
+    this.vc = vc;
+  }
+
+  public void set(VariantContext vc, VCFHeader header) {
+    this.vc = new VariantContextWithHeader(vc, header);
+  }
+
+  // XXX: Unfortunately there's no simple way to just pass a BCF record
+  // through. Contrasting to BAM, there's no equivalent of the BAMRecord
+  // subclass of SAMRecord that saves the original BAM fields --- a
+  // VariantContext only saves the decoded info, so it's impossible to encode
+  // one to BCF without the header.
+  //
+  // VCF is also unusable because VCFWriter defensively refuses to write
+  // anything without a header, throwing IllegalStateException if attempted.
+  //
+  // Thus, we have a custom encoding.
+  @Override
+  public void write(final DataOutput out) throws IOException {
+    VariantContextCodec.write(out, vc);
+  }
+
+  @Override
+  public void readFields(final DataInput in) throws IOException {
+    vc = VariantContextCodec.read(in);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndex.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndex.java
index 164965a..650dae7 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndex.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndex.java
@@ -25,97 +25,109 @@
 import java.io.BufferedInputStream;
 import java.io.File;
 import java.io.FileInputStream;
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.util.NavigableSet;
 import java.util.TreeSet;
 
-/** An index into BGZF-compressed files, for {@link BGZFSplitFileInputFormat}.
- * Reads files that are created by {@link BGZFBlockIndexer}.
+/**
+ * An index into BGZF-compressed files, for {@link BGZFSplitFileInputFormat}. Reads files that are
+ * created by {@link BGZFBlockIndexer}.
  *
- * <p>Indexes the positions of individual gzip blocks in the file.</p>
+ * <p>Indexes the positions of individual gzip blocks in the file.
  */
 public final class BGZFBlockIndex {
-	private final NavigableSet<Long> offsets = new TreeSet<Long>();
-
-	public BGZFBlockIndex() {}
-	public BGZFBlockIndex(final File path) throws IOException {
-		this(new BufferedInputStream(new FileInputStream(path)));
-	}
-	public BGZFBlockIndex(final InputStream in) throws IOException {
-		readIndex(in);
-	}
-
-	public void readIndex(final InputStream in) throws IOException {
-		offsets.clear();
-
-		final ByteBuffer bb = ByteBuffer.allocate(8);
-
-		for (long prev = -1; in.read(bb.array(), 2, 6) == 6;) {
-			final long cur = bb.getLong(0);
-			if (prev > cur)
-				throw new IOException(String.format(
-					"Invalid BGZF block index; offsets not in order: %#x > %#x",
-					prev, cur));
-
-			offsets.add(prev = cur);
-		}
-		in.close();
-
-		if (offsets.size() < 1)
-			throw new IOException(
-				"Invalid BGZF block index: should contain at least the file size");
-
-		offsets.add(0L);
-	}
-
-	public Long prevBlock(final long filePos) {
-		return offsets.floor(filePos);
-	}
-	public Long nextBlock(final long filePos) {
-		return offsets.higher(filePos);
-	}
-
-	public int size() { return offsets.size(); }
-
-	private long secondBlock() { return nextBlock(0); }
-	private long   lastBlock() { return prevBlock(fileSize() - 1); }
-	private long    fileSize() { return offsets.last(); }
-
-	/** Writes some statistics about each BGZF block index file given as an
-	 * argument.
-	 */
-	public static void main(String[] args) {
-		if (args.length == 0) {
-			System.out.println(
-				"Usage: BGZFBlockIndex [BGZF block indices...]\n\n"+
-
-				"Writes a few statistics about each BGZF block index.");
-			return;
-		}
-
-		for (String arg : args) {
-			final File f = new File(arg);
-			if (f.isFile() && f.canRead()) {
-				try {
-					System.err.printf("%s:\n", f);
-					final BGZFBlockIndex bi = new BGZFBlockIndex(f);
-					final long second = bi.secondBlock();
-					final long last   = bi.lastBlock();
-					System.err.printf(
-						"\t%d blocks\n" +
-						"\tfirst after 0 is at %#014x\n" +
-						"\tlast          is at %#014x\n" +
-						"\tassociated BGZF file size %d\n",
-						bi.size()-1,
-						bi.secondBlock(), bi.lastBlock(), bi.fileSize());
-				} catch (IOException e) {
-					System.err.printf("Failed to read %s!\n", f);
-					e.printStackTrace();
-				}
-			} else
-				System.err.printf("%s does not look like a readable file!\n", f);
-		}
-	}
+
+  private final NavigableSet<Long> offsets = new TreeSet<Long>();
+
+  public BGZFBlockIndex() {}
+
+  public BGZFBlockIndex(final File path) throws IOException {
+    this(new BufferedInputStream(new FileInputStream(path)));
+  }
+
+  public BGZFBlockIndex(final InputStream in) throws IOException {
+    readIndex(in);
+  }
+
+  /** Writes some statistics about each BGZF block index file given as an argument. */
+  public static void main(String[] args) {
+    if (args.length == 0) {
+      System.out.println(
+          "Usage: BGZFBlockIndex [BGZF block indices...]\n\n"
+              + "Writes a few statistics about each BGZF block index.");
+      return;
+    }
+
+    for (String arg : args) {
+      final File f = new File(arg);
+      if (f.isFile() && f.canRead()) {
+        try {
+          System.err.printf("%s:\n", f);
+          final BGZFBlockIndex bi = new BGZFBlockIndex(f);
+          final long second = bi.secondBlock();
+          final long last = bi.lastBlock();
+          System.err.printf(
+              "\t%d blocks\n"
+                  + "\tfirst after 0 is at %#014x\n"
+                  + "\tlast          is at %#014x\n"
+                  + "\tassociated BGZF file size %d\n",
+              bi.size() - 1, bi.secondBlock(), bi.lastBlock(), bi.fileSize());
+        } catch (IOException e) {
+          System.err.printf("Failed to read %s!\n", f);
+          e.printStackTrace();
+        }
+      } else {
+        System.err.printf("%s does not look like a readable file!\n", f);
+      }
+    }
+  }
+
+  public void readIndex(final InputStream in) throws IOException {
+    offsets.clear();
+
+    final ByteBuffer bb = ByteBuffer.allocate(8);
+
+    for (long prev = -1; in.read(bb.array(), 2, 6) == 6; ) {
+      final long cur = bb.getLong(0);
+      if (prev > cur) {
+        throw new IOException(
+            String.format("Invalid BGZF block index; offsets not in order: %#x > %#x", prev, cur));
+      }
+
+      offsets.add(prev = cur);
+    }
+    in.close();
+
+    if (offsets.size() < 1) {
+      throw new IOException("Invalid BGZF block index: should contain at least the file size");
+    }
+
+    offsets.add(0L);
+  }
+
+  public Long prevBlock(final long filePos) {
+    return offsets.floor(filePos);
+  }
+
+  public Long nextBlock(final long filePos) {
+    return offsets.higher(filePos);
+  }
+
+  public int size() {
+    return offsets.size();
+  }
+
+  private long secondBlock() {
+    return nextBlock(0);
+  }
+
+  private long lastBlock() {
+    return prevBlock(fileSize() - 1);
+  }
+
+  private long fileSize() {
+    return offsets.last();
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndexer.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndexer.java
index b7fe928..e59fb7b 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndexer.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFBlockIndexer.java
@@ -26,200 +26,199 @@
 import java.io.File;
 import java.io.FileInputStream;
 import java.io.FileOutputStream;
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.nio.LongBuffer;
 import java.util.Arrays;
 
-/** An indexing tool for BGZF-compressed files, making them palatable to {@link
- * BGZFSplitFileInputFormat}. Writes BGZF block indices as understood by {@link
- * BGZFBlockIndex}.
+/**
+ * An indexing tool for BGZF-compressed files, making them palatable to {@link
+ * BGZFSplitFileInputFormat}. Writes BGZF block indices as understood by {@link BGZFBlockIndex}.
  */
 public final class BGZFBlockIndexer {
-	public static void main(String[] args) {
-		if (args.length <= 0) {
-			System.out.println(
-				"Usage: BGZFBlockIndexer GRANULARITY [BGZF files...]\n\n"+
-
-				"Writes, for each GRANULARITY gzip blocks in a BGZF file, its "+
-				"virtual file offset\nas a big-endian 48-bit integer into "+
-				"[filename].bgzfi. The file is terminated by\nthe BGZF file's "+
-				"length, in the same format.");
-			return;
-		}
-
-		int granularity;
-		try {
-			granularity = Integer.parseInt(args[0]);
-		} catch (NumberFormatException e) {
-			granularity = 0;
-		}
-		if (granularity <= 0) {
-			System.err.printf(
-				"Granularity must be a positive integer, not '%s'!\n", args[0]);
-			return;
-		}
-
-		final BGZFBlockIndexer indexer = new BGZFBlockIndexer(granularity);
-
-		for (final String arg : Arrays.asList(args).subList(1, args.length)) {
-			final File f = new File(arg);
-			if (f.isFile() && f.canRead()) {
-				System.out.printf("Indexing %s...", f);
-				try {
-					indexer.index(f);
-					System.out.println(" done.");
-				} catch (IOException e) {
-					System.out.println(" FAILED!");
-					e.printStackTrace();
-				}
-			} else
-				System.err.printf(
-					"%s does not look like a file, won't index!\n", f);
-		}
-	}
-
-	private final ByteBuffer byteBuffer;
-	private final int granularity;
-
-	private int pos = 0;
-
-	private static final int PRINT_EVERY = 500*1024*1024;
-
-	public BGZFBlockIndexer(int g) {
-		granularity = g;
-		byteBuffer = ByteBuffer.allocate(8); // Enough to fit a long
-	}
-
-	private void index(final File file) throws IOException {
-		final InputStream in = new FileInputStream(file);
-
-		final OutputStream out = new BufferedOutputStream(
-			new FileOutputStream(file.getPath() + ".bgzfi"));
-
-		final LongBuffer lb =
-			byteBuffer.order(ByteOrder.BIG_ENDIAN).asLongBuffer();
-
-		long prevPrint = 0;
-		pos = 0;
-
-		for (int i = 0;;) {
-			if (!skipBlock(in))
-				break;
-
-			if (++i == granularity) {
-				i = 0;
-				lb.put(0, pos);
-				out.write(byteBuffer.array(), 2, 6);
-
-				if (pos - prevPrint >= PRINT_EVERY) {
-					System.out.print("-");
-					prevPrint = pos;
-				}
-			}
-		}
-		lb.put(0, file.length());
-		out.write(byteBuffer.array(), 2, 6);
-		out.close();
-		in.close();
-	}
-
-	private boolean skipBlock(final InputStream in) throws IOException {
-
-		// Check magic number
-		final int read = readBytes(in, 4);
-		if (read != 4) {
-			if (read == 0)
-				return false;
-			ioError("Invalid gzip header: too short, no ID/CM/FLG");
-		}
-
-		final int magic = byteBuffer.order(ByteOrder.BIG_ENDIAN).getInt(0);
-		if (magic != 0x1f8b0804)
-			ioError(
-				"Invalid gzip header: bad ID/CM/FLG %#x != 0x1f8b0804", magic);
-
-		// Skip to extra-length
-		if (!readExactlyBytes(in, 8))
-			ioError("Invalid gzip header: too short, no XLEN");
-
-		byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
-
-		final int xlen = getUshort(6);
-
-		// Skip over each subfield until finding the one we care about
-		for (int offset = 0; offset < xlen;) {
-			if (!readExactlyBytes(in, 4))
-				ioError("Invalid subfields: EOF after %d subfield bytes", offset);
-			offset += 4;
-
-			byteBuffer.order(ByteOrder.BIG_ENDIAN);
-			final int siAndSlen = byteBuffer.getInt(0);
-			byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
-
-			if ((siAndSlen & ~0xff) == 0x42430200) {
-				if (!readExactlyBytes(in, 2))
-					ioError("Invalid BGZF subfield: missing BSIZE");
-				offset += 2;
-
-				final int bsize = getUshort(0);
-
-				// Skip over: rest of header + compressed data + rest of gzip block
-				fullySkip(in, (xlen - offset) + (bsize - xlen - 19) + 8);
-				pos += bsize + 1;
-				return true;
-			} else {
-				final int slen = getUshort(2);
-				fullySkip(in, slen);
-				offset += slen;
-			}
-		}
-		throw new IOException("Invalid BGZF file: block without BGZF subfield");
-	}
-
-	private int getUshort(final int idx) {
-		return (int)byteBuffer.getShort(idx) & 0xffff;
-	}
-
-	private void fullySkip(final InputStream in, final int skip)
-		throws IOException
-	{
-		// Skip repeatedly until we're either done skipping or can't skip any
-		// more, in case some kind of IO error is temporarily preventing it. That
-		// kind of situation might not necessarily be possible; the docs are
-		// rather vague about the whole thing.
-		for (int s = skip; s > 0;) {
-			final long skipped = in.skip(s);
-			if (skipped == 0)
-				throw new IOException("Skip failed");
-			s -= skipped;
-		}
-	}
-
-	private int readBytes(final InputStream in, final int n)
-		throws IOException
-	{
-		assert n <= byteBuffer.capacity();
-
-		int read = 0;
-		while (read < n) {
-			final int readNow = in.read(byteBuffer.array(), read, n - read);
-			if (readNow <= 0)
-				break;
-			read += readNow;
-		}
-		return read;
-	}
-	private boolean readExactlyBytes(final InputStream in, final int n)
-		throws IOException
-	{
-		return readBytes(in, n) == n;
-	}
-
-	private void ioError(String s, Object... va) throws IOException {
-		throw new IOException(String.format(s, va));
-	}
+
+  private static final int PRINT_EVERY = 500 * 1024 * 1024;
+  private final ByteBuffer byteBuffer;
+  private final int granularity;
+
+  private int pos = 0;
+
+  public BGZFBlockIndexer(int g) {
+    granularity = g;
+    byteBuffer = ByteBuffer.allocate(8); // Enough to fit a long
+  }
+
+  public static void main(String[] args) {
+    if (args.length <= 0) {
+      System.out.println(
+          "Usage: BGZFBlockIndexer GRANULARITY [BGZF files...]\n\n"
+              + "Writes, for each GRANULARITY gzip blocks in a BGZF file, its "
+              + "virtual file offset\nas a big-endian 48-bit integer into "
+              + "[filename].bgzfi. The file is terminated by\nthe BGZF file's "
+              + "length, in the same format.");
+      return;
+    }
+
+    int granularity;
+    try {
+      granularity = Integer.parseInt(args[0]);
+    } catch (NumberFormatException e) {
+      granularity = 0;
+    }
+    if (granularity <= 0) {
+      System.err.printf("Granularity must be a positive integer, not '%s'!\n", args[0]);
+      return;
+    }
+
+    final BGZFBlockIndexer indexer = new BGZFBlockIndexer(granularity);
+
+    for (final String arg : Arrays.asList(args).subList(1, args.length)) {
+      final File f = new File(arg);
+      if (f.isFile() && f.canRead()) {
+        System.out.printf("Indexing %s...", f);
+        try {
+          indexer.index(f);
+          System.out.println(" done.");
+        } catch (IOException e) {
+          System.out.println(" FAILED!");
+          e.printStackTrace();
+        }
+      } else {
+        System.err.printf("%s does not look like a file, won't index!\n", f);
+      }
+    }
+  }
+
+  private void index(final File file) throws IOException {
+    final InputStream in = new FileInputStream(file);
+
+    final OutputStream out =
+        new BufferedOutputStream(new FileOutputStream(file.getPath() + ".bgzfi"));
+
+    final LongBuffer lb = byteBuffer.order(ByteOrder.BIG_ENDIAN).asLongBuffer();
+
+    long prevPrint = 0;
+    pos = 0;
+
+    for (int i = 0; ; ) {
+      if (!skipBlock(in)) {
+        break;
+      }
+
+      if (++i == granularity) {
+        i = 0;
+        lb.put(0, pos);
+        out.write(byteBuffer.array(), 2, 6);
+
+        if (pos - prevPrint >= PRINT_EVERY) {
+          System.out.print("-");
+          prevPrint = pos;
+        }
+      }
+    }
+    lb.put(0, file.length());
+    out.write(byteBuffer.array(), 2, 6);
+    out.close();
+    in.close();
+  }
+
+  private boolean skipBlock(final InputStream in) throws IOException {
+
+    // Check magic number
+    final int read = readBytes(in, 4);
+    if (read != 4) {
+      if (read == 0) {
+        return false;
+      }
+      ioError("Invalid gzip header: too short, no ID/CM/FLG");
+    }
+
+    final int magic = byteBuffer.order(ByteOrder.BIG_ENDIAN).getInt(0);
+    if (magic != 0x1f8b0804) {
+      ioError("Invalid gzip header: bad ID/CM/FLG %#x != 0x1f8b0804", magic);
+    }
+
+    // Skip to extra-length
+    if (!readExactlyBytes(in, 8)) {
+      ioError("Invalid gzip header: too short, no XLEN");
+    }
+
+    byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+
+    final int xlen = getUshort(6);
+
+    // Skip over each subfield until finding the one we care about
+    for (int offset = 0; offset < xlen; ) {
+      if (!readExactlyBytes(in, 4)) {
+        ioError("Invalid subfields: EOF after %d subfield bytes", offset);
+      }
+      offset += 4;
+
+      byteBuffer.order(ByteOrder.BIG_ENDIAN);
+      final int siAndSlen = byteBuffer.getInt(0);
+      byteBuffer.order(ByteOrder.LITTLE_ENDIAN);
+
+      if ((siAndSlen & ~0xff) == 0x42430200) {
+        if (!readExactlyBytes(in, 2)) {
+          ioError("Invalid BGZF subfield: missing BSIZE");
+        }
+        offset += 2;
+
+        final int bsize = getUshort(0);
+
+        // Skip over: rest of header + compressed data + rest of gzip block
+        fullySkip(in, (xlen - offset) + (bsize - xlen - 19) + 8);
+        pos += bsize + 1;
+        return true;
+      } else {
+        final int slen = getUshort(2);
+        fullySkip(in, slen);
+        offset += slen;
+      }
+    }
+    throw new IOException("Invalid BGZF file: block without BGZF subfield");
+  }
+
+  private int getUshort(final int idx) {
+    return (int) byteBuffer.getShort(idx) & 0xffff;
+  }
+
+  private void fullySkip(final InputStream in, final int skip) throws IOException {
+    // Skip repeatedly until we're either done skipping or can't skip any
+    // more, in case some kind of IO error is temporarily preventing it. That
+    // kind of situation might not necessarily be possible; the docs are
+    // rather vague about the whole thing.
+    for (int s = skip; s > 0; ) {
+      final long skipped = in.skip(s);
+      if (skipped == 0) {
+        throw new IOException("Skip failed");
+      }
+      s -= skipped;
+    }
+  }
+
+  private int readBytes(final InputStream in, final int n) throws IOException {
+    assert n <= byteBuffer.capacity();
+
+    int read = 0;
+    while (read < n) {
+      final int readNow = in.read(byteBuffer.array(), read, n - read);
+      if (readNow <= 0) {
+        break;
+      }
+      read += readNow;
+    }
+    return read;
+  }
+
+  private boolean readExactlyBytes(final InputStream in, final int n) throws IOException {
+    return readBytes(in, n) == n;
+  }
+
+  private void ioError(String s, Object... va) throws IOException {
+    throw new IOException(String.format(s, va));
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCodec.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCodec.java
index 669e2ac..c245205 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCodec.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCodec.java
@@ -13,17 +13,15 @@
 import org.apache.hadoop.io.compress.SplittableCompressionCodec;
 
 /**
- * A Hadoop {@link CompressionCodec} for the
- * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">BGZF compression format</a>,
- * which reads and writes files with a <code>.bgz</code> suffix. There is no standard
- * suffix for BGZF-compressed files, and in fact <code>.gz</code> is commonly used, in
- * which case {@link BGZFEnhancedGzipCodec} should be used instead of this class.
- * <p>
- * To use BGZFCodec, set it on the configuration object as follows.
- * </p>
- * {@code
- * conf.set("io.compression.codecs", BGZFCodec.class.getCanonicalName())
- * }
+ * A Hadoop {@link CompressionCodec} for the <a
+ * href="https://samtools.github.io/hts-specs/SAMv1.pdf">BGZF compression format</a>, which reads
+ * and writes files with a <code>.bgz</code> suffix. There is no standard suffix for BGZF-compressed
+ * files, and in fact <code>.gz</code> is commonly used, in which case {@link BGZFEnhancedGzipCodec}
+ * should be used instead of this class.
+ *
+ * <p>To use BGZFCodec, set it on the configuration object as follows. {@code
+ * conf.set("io.compression.codecs", BGZFCodec.class.getCanonicalName()) }
+ *
  * @see BGZFEnhancedGzipCodec
  */
 public class BGZFCodec extends GzipCodec implements SplittableCompressionCodec {
@@ -38,8 +36,8 @@ public CompressionOutputStream createOutputStream(OutputStream out) throws IOExc
   // compressors are not used, so ignore/return null
 
   @Override
-  public CompressionOutputStream createOutputStream(OutputStream out,
-      Compressor compressor) throws IOException {
+  public CompressionOutputStream createOutputStream(OutputStream out, Compressor compressor)
+      throws IOException {
     return createOutputStream(out); // compressors are not used, so ignore
   }
 
@@ -54,11 +52,12 @@ public Compressor createCompressor() {
   }
 
   @Override
-  public SplitCompressionInputStream createInputStream(InputStream seekableIn,
-      Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
+  public SplitCompressionInputStream createInputStream(
+      InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode)
+      throws IOException {
     BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
     long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
-    ((Seekable)seekableIn).seek(adjustedStart);
+    ((Seekable) seekableIn).seek(adjustedStart);
     return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
   }
 
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCompressionOutputStream.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCompressionOutputStream.java
index 639dbec..b4a0862 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCompressionOutputStream.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFCompressionOutputStream.java
@@ -6,19 +6,17 @@
 import org.apache.hadoop.io.compress.CompressionOutputStream;
 
 /**
- * An implementation of {@code CompressionOutputStream} for BGZF, using
- * {@link BlockCompressedOutputStream} from htsjdk. Note that unlike
- * {@link BlockCompressedOutputStream}, an empty gzip block file terminator is
- * <i>not</i> written at the end of the stream. This is because in Hadoop, multiple
- * headerless files are often written in parallel, and merged afterwards into a single
- * file, and it's during the merge process the header and terminator are added.
+ * An implementation of {@code CompressionOutputStream} for BGZF, using {@link
+ * BlockCompressedOutputStream} from htsjdk. Note that unlike {@link BlockCompressedOutputStream},
+ * an empty gzip block file terminator is <i>not</i> written at the end of the stream. This is
+ * because in Hadoop, multiple headerless files are often written in parallel, and merged afterwards
+ * into a single file, and it's during the merge process the header and terminator are added.
  */
 class BGZFCompressionOutputStream extends CompressionOutputStream {
 
   private BlockCompressedOutputStream output;
 
-  public BGZFCompressionOutputStream(OutputStream out)
-      throws IOException {
+  public BGZFCompressionOutputStream(OutputStream out) throws IOException {
     super(out);
     this.output = new BlockCompressedOutputStream(out, null);
   }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFEnhancedGzipCodec.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFEnhancedGzipCodec.java
index 04112a7..cefd553 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFEnhancedGzipCodec.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFEnhancedGzipCodec.java
@@ -13,53 +13,51 @@
 import org.apache.hadoop.io.compress.SplittableCompressionCodec;
 
 /**
- * A Hadoop {@link CompressionCodec} for the
- * <a href="https://samtools.github.io/hts-specs/SAMv1.pdf">BGZF compression format</a>,
- * which reads and writes files with a <code>.gz</code> suffix.
- * <p>
- * BGZF is a splittable extension of gzip, which means that all BGZF files are standard
- * gzip files, however the reverse is not necessarily the case. BGZF files often have the
- * standard <code>.gz</code> suffix (such as those produced by the
- * <code>bcftools</code> command),
- * which causes a difficulty since it is not immediately apparent from the filename alone
- * whether a file is a BGZF file, or merely a regular gzip file. BGZFEnhancedGzipCodec
- * will read the start of the file to look for BGZF headers to detect the type of
- * compression.
- * </p>
- * <p>
- * BGZFEnhancedGzipCodec will read BGZF or gzip files, but currently always writes regular gzip files.
- * </p>
- * <p>
- * To use BGZFEnhancedGzipCodec, set it on the configuration object as follows. This will
- * override the built-in GzipCodec that is mapped to the <code>.gz</code> suffix.
- * </p>
- * {@code
- * conf.set("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName())
- * }
+ * A Hadoop {@link CompressionCodec} for the <a
+ * href="https://samtools.github.io/hts-specs/SAMv1.pdf">BGZF compression format</a>, which reads
+ * and writes files with a <code>.gz</code> suffix.
+ *
+ * <p>BGZF is a splittable extension of gzip, which means that all BGZF files are standard gzip
+ * files, however the reverse is not necessarily the case. BGZF files often have the standard <code>
+ * .gz</code> suffix (such as those produced by the <code>bcftools</code> command), which causes a
+ * difficulty since it is not immediately apparent from the filename alone whether a file is a BGZF
+ * file, or merely a regular gzip file. BGZFEnhancedGzipCodec will read the start of the file to
+ * look for BGZF headers to detect the type of compression.
+ *
+ * <p>BGZFEnhancedGzipCodec will read BGZF or gzip files, but currently always writes regular gzip
+ * files.
+ *
+ * <p>To use BGZFEnhancedGzipCodec, set it on the configuration object as follows. This will
+ * override the built-in GzipCodec that is mapped to the <code>.gz</code> suffix. {@code
+ * conf.set("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName()) }
+ *
  * @see BGZFCodec
  */
 public class BGZFEnhancedGzipCodec extends GzipCodec implements SplittableCompressionCodec {
 
   @Override
-  public SplitCompressionInputStream createInputStream(InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode) throws IOException {
+  public SplitCompressionInputStream createInputStream(
+      InputStream seekableIn, Decompressor decompressor, long start, long end, READ_MODE readMode)
+      throws IOException {
     if (!(seekableIn instanceof Seekable)) {
-      throw new IOException("seekableIn must be an instance of " +
-          Seekable.class.getName());
+      throw new IOException("seekableIn must be an instance of " + Seekable.class.getName());
     }
     if (!BlockCompressedInputStream.isValidFile(new BufferedInputStream(seekableIn))) {
       // data is regular gzip, not BGZF
-      ((Seekable)seekableIn).seek(0);
-      final CompressionInputStream compressionInputStream = createInputStream(seekableIn,
-          decompressor);
+      ((Seekable) seekableIn).seek(0);
+      final CompressionInputStream compressionInputStream =
+          createInputStream(seekableIn, decompressor);
       return new SplitCompressionInputStream(compressionInputStream, start, end) {
         @Override
         public int read(byte[] b, int off, int len) throws IOException {
           return compressionInputStream.read(b, off, len);
         }
+
         @Override
         public void resetState() throws IOException {
           compressionInputStream.resetState();
         }
+
         @Override
         public int read() throws IOException {
           return compressionInputStream.read();
@@ -68,8 +66,7 @@ public int read() throws IOException {
     }
     BGZFSplitGuesser splitGuesser = new BGZFSplitGuesser(seekableIn);
     long adjustedStart = splitGuesser.guessNextBGZFBlockStart(start, end);
-    ((Seekable)seekableIn).seek(adjustedStart);
+    ((Seekable) seekableIn).seek(adjustedStart);
     return new BGZFSplitCompressionInputStream(seekableIn, adjustedStart, end);
   }
-
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitCompressionInputStream.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitCompressionInputStream.java
index 8786757..a200689 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitCompressionInputStream.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitCompressionInputStream.java
@@ -7,26 +7,22 @@
 import org.apache.hadoop.io.compress.SplitCompressionInputStream;
 
 /**
- * An implementation of {@code SplitCompressionInputStream} for BGZF, based on
- * {@code BZip2CompressionInputStream} and {@code CBZip2InputStream} from Hadoop.
- * (BZip2 is the only splittable compression codec in Hadoop.)
+ * An implementation of {@code SplitCompressionInputStream} for BGZF, based on {@code
+ * BZip2CompressionInputStream} and {@code CBZip2InputStream} from Hadoop. (BZip2 is the only
+ * splittable compression codec in Hadoop.)
  */
 class BGZFSplitCompressionInputStream extends SplitCompressionInputStream {
+
   private static final int END_OF_BLOCK = -2;
   private final BlockCompressedInputStream input;
-  private BufferedInputStream bufferedIn;
-  private long startingPos = 0L;
-  private long processedPosition;
-
-  private enum POS_ADVERTISEMENT_STATE_MACHINE {
-    HOLD, ADVERTISE
-  };
-
   POS_ADVERTISEMENT_STATE_MACHINE posSM = POS_ADVERTISEMENT_STATE_MACHINE.HOLD;
   long compressedStreamPosition = 0;
+  private BufferedInputStream bufferedIn;
+  private long startingPos = 0L;;
+
+  private long processedPosition;
 
-  public BGZFSplitCompressionInputStream(InputStream in, long start, long end)
-      throws IOException {
+  public BGZFSplitCompressionInputStream(InputStream in, long start, long end) throws IOException {
     super(in, start, end);
     bufferedIn = new BufferedInputStream(super.in);
     this.startingPos = super.getPos();
@@ -61,11 +57,12 @@ public int read(byte[] b, int off, int len) throws IOException {
 
   /**
    * Read up to <code>len</code> bytes from the stream, but no further than the end of the
-   * compressed block. If at the end of the block then no bytes will be read and a return
-   * value of -2 will be returned; on the next call to read, bytes from the next block
-   * will be returned. This is the same contract as CBZip2InputStream in Hadoop.
-   * @return int The return value greater than 0 are the bytes read.  A value
-   * of -1 means end of stream while -2 represents end of block.
+   * compressed block. If at the end of the block then no bytes will be read and a return value of
+   * -2 will be returned; on the next call to read, bytes from the next block will be returned. This
+   * is the same contract as CBZip2InputStream in Hadoop.
+   *
+   * @return int The return value greater than 0 are the bytes read. A value of -1 means end of
+   *     stream while -2 represents end of block.
    */
   private int readWithinBlock(byte[] b, int off, int len) throws IOException {
     if (input.endOfBlock()) {
@@ -102,4 +99,9 @@ private void updatePos(boolean shouldAddOn) {
   public void close() throws IOException {
     input.close();
   }
+
+  private enum POS_ADVERTISEMENT_STATE_MACHINE {
+    HOLD,
+    ADVERTISE
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitFileInputFormat.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitFileInputFormat.java
index 09eedbb..f78461e 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitFileInputFormat.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitFileInputFormat.java
@@ -20,14 +20,11 @@
 
 package org.seqdoop.hadoop_bam.util;
 
-
-
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Comparator;
 import java.util.List;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.Path;
@@ -36,124 +33,120 @@
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
 
-/** An {@link org.apache.hadoop.mapreduce.InputFormat} for BGZF-compressed
- * files.
+/**
+ * An {@link org.apache.hadoop.mapreduce.InputFormat} for BGZF-compressed files.
  *
- * <p>A {@link BGZFBlockIndex} for each Path used is required, or an
- * <code>IOException</code> is thrown out of {@link #getSplits}.</p>
+ * <p>A {@link BGZFBlockIndex} for each Path used is required, or an <code>IOException</code> is
+ * thrown out of {@link #getSplits}.
  */
-public abstract class BGZFSplitFileInputFormat<K,V>
-	extends FileInputFormat<K,V>
-{
-	private Path getIdxPath(Path path) { return path.suffix(".bgzfi"); }
-
-	/** The splits returned are FileSplits. */
-	@Override public List<InputSplit> getSplits(JobContext job)
-		throws IOException
-	{
-		final List<InputSplit> splits = super.getSplits(job);
-
-		// Align the splits so that they don't cross blocks
-
-		// addIndexedSplits() requires the given splits to be sorted by file
-		// path, so do so. Although FileInputFormat.getSplits() does, at the time
-		// of writing this, generate them in that order, we shouldn't rely on it.
-		Collections.sort(splits, new Comparator<InputSplit>() {
-			public int compare(InputSplit a, InputSplit b) {
-				FileSplit fa = (FileSplit)a, fb = (FileSplit)b;
-				return fa.getPath().compareTo(fb.getPath());
-			}
-		});
-
-		final List<InputSplit> newSplits =
-			new ArrayList<InputSplit>(splits.size());
-
-		final Configuration cfg = job.getConfiguration();
-
-		for (int i = 0; i < splits.size();) {
-			try {
-				i = addIndexedSplits      (splits, i, newSplits, cfg);
-			} catch (IOException e) {
-				i = addProbabilisticSplits(splits, i, newSplits, cfg);
-			}
-		}
-		return newSplits;
-	}
-
-	// Handles all the splits that share the Path of the one at index i,
-	// returning the next index to be used.
-	private int addIndexedSplits(
-			List<InputSplit> splits, int i, List<InputSplit> newSplits,
-			Configuration cfg)
-		throws IOException
-	{
-		final Path file = ((FileSplit)splits.get(i)).getPath();
-
-		final BGZFBlockIndex idx = new BGZFBlockIndex(
-			file.getFileSystem(cfg).open(getIdxPath(file)));
-
-		int splitsEnd = splits.size();
-		for (int j = i; j < splitsEnd; ++j)
-			if (!file.equals(((FileSplit)splits.get(j)).getPath()))
-				splitsEnd = j;
-
-		for (int j = i; j < splitsEnd; ++j) {
-			final FileSplit fileSplit = (FileSplit)splits.get(j);
-
-			final long start =         fileSplit.getStart();
-			final long end   = start + fileSplit.getLength();
-
-			final Long blockStart = idx.prevBlock(start);
-			final Long blockEnd   = j == splitsEnd-1 ? idx.prevBlock(end)
-			                                         : idx.nextBlock(end);
-
-			if (blockStart == null)
-				throw new RuntimeException(
-					"Internal error or invalid index: no block start for " +start);
-
-			if (blockEnd == null)
-				throw new RuntimeException(
-					"Internal error or invalid index: no block end for " +end);
-
-			newSplits.add(new FileSplit(
-				file, blockStart, blockEnd - blockStart,
-				fileSplit.getLocations()));
-		}
-		return splitsEnd;
-	}
-
-	// Works the same way as addIndexedSplits, to avoid having to reopen the
-	// file repeatedly and checking addIndexedSplits for an index repeatedly.
-	private int addProbabilisticSplits(
-			List<InputSplit> splits, int i, List<InputSplit> newSplits,
-			Configuration cfg)
-		throws IOException
-	{
-		final Path path = ((FileSplit)splits.get(i)).getPath();
-		final FSDataInputStream in = path.getFileSystem(cfg).open(path);
-
-		final BGZFSplitGuesser guesser = new BGZFSplitGuesser(in);
-
-		FileSplit fspl;
-		do {
-			fspl = (FileSplit)splits.get(i);
-
-			final long beg =       fspl.getStart();
-			final long end = beg + fspl.getLength();
-
-			final long alignedBeg = guesser.guessNextBGZFBlockStart(beg, end);
-
-			newSplits.add(new FileSplit(
-				path, alignedBeg, end - alignedBeg, fspl.getLocations()));
-
-			++i;
-		} while (i < splits.size() && fspl.getPath().equals(path));
-
-		in.close();
-		return i;
-	}
-
-	@Override public boolean isSplitable(JobContext job, Path path) {
-		return true;
-	}
+public abstract class BGZFSplitFileInputFormat<K, V> extends FileInputFormat<K, V> {
+
+  private Path getIdxPath(Path path) {
+    return path.suffix(".bgzfi");
+  }
+
+  /** The splits returned are FileSplits. */
+  @Override
+  public List<InputSplit> getSplits(JobContext job) throws IOException {
+    final List<InputSplit> splits = super.getSplits(job);
+
+    // Align the splits so that they don't cross blocks
+
+    // addIndexedSplits() requires the given splits to be sorted by file
+    // path, so do so. Although FileInputFormat.getSplits() does, at the time
+    // of writing this, generate them in that order, we shouldn't rely on it.
+    Collections.sort(
+        splits,
+        new Comparator<InputSplit>() {
+          public int compare(InputSplit a, InputSplit b) {
+            FileSplit fa = (FileSplit) a, fb = (FileSplit) b;
+            return fa.getPath().compareTo(fb.getPath());
+          }
+        });
+
+    final List<InputSplit> newSplits = new ArrayList<InputSplit>(splits.size());
+
+    final Configuration cfg = job.getConfiguration();
+
+    for (int i = 0; i < splits.size(); ) {
+      try {
+        i = addIndexedSplits(splits, i, newSplits, cfg);
+      } catch (IOException e) {
+        i = addProbabilisticSplits(splits, i, newSplits, cfg);
+      }
+    }
+    return newSplits;
+  }
+
+  // Handles all the splits that share the Path of the one at index i,
+  // returning the next index to be used.
+  private int addIndexedSplits(
+      List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
+      throws IOException {
+    final Path file = ((FileSplit) splits.get(i)).getPath();
+
+    final BGZFBlockIndex idx = new BGZFBlockIndex(file.getFileSystem(cfg).open(getIdxPath(file)));
+
+    int splitsEnd = splits.size();
+    for (int j = i; j < splitsEnd; ++j) {
+      if (!file.equals(((FileSplit) splits.get(j)).getPath())) {
+        splitsEnd = j;
+      }
+    }
+
+    for (int j = i; j < splitsEnd; ++j) {
+      final FileSplit fileSplit = (FileSplit) splits.get(j);
+
+      final long start = fileSplit.getStart();
+      final long end = start + fileSplit.getLength();
+
+      final Long blockStart = idx.prevBlock(start);
+      final Long blockEnd = j == splitsEnd - 1 ? idx.prevBlock(end) : idx.nextBlock(end);
+
+      if (blockStart == null) {
+        throw new RuntimeException("Internal error or invalid index: no block start for " + start);
+      }
+
+      if (blockEnd == null) {
+        throw new RuntimeException("Internal error or invalid index: no block end for " + end);
+      }
+
+      newSplits.add(
+          new FileSplit(file, blockStart, blockEnd - blockStart, fileSplit.getLocations()));
+    }
+    return splitsEnd;
+  }
+
+  // Works the same way as addIndexedSplits, to avoid having to reopen the
+  // file repeatedly and checking addIndexedSplits for an index repeatedly.
+  private int addProbabilisticSplits(
+      List<InputSplit> splits, int i, List<InputSplit> newSplits, Configuration cfg)
+      throws IOException {
+    final Path path = ((FileSplit) splits.get(i)).getPath();
+    final FSDataInputStream in = path.getFileSystem(cfg).open(path);
+
+    final BGZFSplitGuesser guesser = new BGZFSplitGuesser(in);
+
+    FileSplit fspl;
+    do {
+      fspl = (FileSplit) splits.get(i);
+
+      final long beg = fspl.getStart();
+      final long end = beg + fspl.getLength();
+
+      final long alignedBeg = guesser.guessNextBGZFBlockStart(beg, end);
+
+      newSplits.add(new FileSplit(path, alignedBeg, end - alignedBeg, fspl.getLocations()));
+
+      ++i;
+    } while (i < splits.size() && fspl.getPath().equals(path));
+
+    in.close();
+    return i;
+  }
+
+  @Override
+  public boolean isSplitable(JobContext job, Path path) {
+    return true;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitGuesser.java b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitGuesser.java
index 9835ff5..d2aad3e 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitGuesser.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/BGZFSplitGuesser.java
@@ -23,151 +23,149 @@
 package org.seqdoop.hadoop_bam.util;
 
 import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
+import htsjdk.samtools.util.BlockCompressedInputStream;
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.util.Arrays;
-
-import htsjdk.samtools.util.BlockCompressedInputStream;
-
 import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.Seekable;
 
 public class BGZFSplitGuesser {
-	private InputStream inFile;
-	private Seekable seekableInFile;
-	private       ByteArraySeekableStream in;
-	private final ByteBuffer buf;
-
-	private final static int BGZF_MAGIC     = 0x04088b1f;
-	private final static int BGZF_MAGIC_SUB = 0x00024342;
-	private final static int BGZF_SUB_SIZE  = 4 + 2;
-
-	public BGZFSplitGuesser(InputStream is) {
-		inFile = is;
-		seekableInFile = (Seekable) is;
-
-		buf = ByteBuffer.allocate(8);
-		buf.order(ByteOrder.LITTLE_ENDIAN);
-	}
-
-	public BGZFSplitGuesser(FSDataInputStream is) {
-		inFile = is;
-		seekableInFile = is;
-
-		buf = ByteBuffer.allocate(8);
-		buf.order(ByteOrder.LITTLE_ENDIAN);
-	}
-
-	/// Looks in the range [beg,end). Returns end if no BAM record was found.
-	public long guessNextBGZFBlockStart(long beg, long end)
-		throws IOException
-	{
-		// Buffer what we need to go through. Since the max size of a BGZF block
-		// is 0xffff (64K), and we might be just one byte off from the start of
-		// the previous one, we need 0xfffe bytes for the start, and then 0xffff
-		// for the block we're looking for.
-
-		byte[] arr = new byte[2*0xffff - 1];
-
-		this.seekableInFile.seek(beg);
-		int totalRead = 0;
-		for (int left = Math.min((int)(end - beg), arr.length); left > 0;) {
-			final int r = inFile.read(arr, totalRead, left);
-			if (r < 0)
-				break;
-			totalRead += r;
-			left -= r;
-		}
-		arr = Arrays.copyOf(arr, totalRead);
-
-		this.in = new ByteArraySeekableStream(arr);
-
-		final BlockCompressedInputStream bgzf =
-			new BlockCompressedInputStream(this.in);
-		bgzf.setCheckCrcs(true);
-
-		final int firstBGZFEnd = Math.min((int)(end - beg), 0xffff);
-
-		for (int pos = 0;;) {
-			pos = guessNextBGZFPos(pos, firstBGZFEnd);
-			if (pos < 0)
-				return end;
-
-			try {
-				// Seek in order to trigger decompression of the block and a CRC
-				// check.
-				bgzf.seek((long)pos << 16);
-
-			// This has to catch Throwable, because it's possible to get an
-			// OutOfMemoryError due to an overly large size.
-			} catch (Throwable e) {
-				// Guessed BGZF position incorrectly: try the next guess.
-				++pos;
-				continue;
-			}
-			return beg + pos;
-		}
-	}
-
-	// Returns a negative number if it doesn't find anything.
-	private int guessNextBGZFPos(int p, int end)
-		throws IOException
-	{
-		for (;;) {
-			for (;;) {
-				in.seek(p);
-				in.read(buf.array(), 0, 4);
-				int n = buf.getInt(0);
-
-				if (n == BGZF_MAGIC)
-					break;
-
-				// Skip ahead a bit more than 1 byte if you can.
-				if (n >>> 8 == BGZF_MAGIC << 8 >>> 8)
-					++p;
-				else if (n >>> 16 == BGZF_MAGIC << 16 >>> 16)
-					p += 2;
-				else
-					p += 3;
-
-				if (p >= end)
-					return -1;
-			}
-			// Found what looks like a gzip block header: now get XLEN and
-			// search for the BGZF subfield.
-			final int p0 = p;
-			p += 10;
-			in.seek(p);
-			in.read(buf.array(), 0, 2);
-			p += 2;
-			final int xlen   = getUShort(0);
-			final int subEnd = p + xlen;
-
-			while (p < subEnd) {
-				in.read(buf.array(), 0, 4);
-
-				if (buf.getInt(0) != BGZF_MAGIC_SUB) {
-					p += 4 + getUShort(2);
-					in.seek(p);
-					continue;
-				}
-
-				// Found it: this is close enough to a BGZF block, make it
-				// our guess.
-				return p0;
-			}
-			// No luck: look for the next gzip block header. Start right after
-			// where we last saw the identifiers, although we could probably
-			// safely skip further ahead. (If we find the correct one right
-			// now, the previous block contained 0x1f8b0804 bytes of data: that
-			// seems... unlikely.)
-			p = p0 + 4;
-		}
-	}
-
-	private int getUShort(final int idx) {
-		return (int)buf.getShort(idx) & 0xffff;
-	}
+
+  private static final int BGZF_MAGIC = 0x04088b1f;
+  private static final int BGZF_MAGIC_SUB = 0x00024342;
+  private static final int BGZF_SUB_SIZE = 4 + 2;
+  private final ByteBuffer buf;
+  private InputStream inFile;
+  private Seekable seekableInFile;
+  private ByteArraySeekableStream in;
+
+  public BGZFSplitGuesser(InputStream is) {
+    inFile = is;
+    seekableInFile = (Seekable) is;
+
+    buf = ByteBuffer.allocate(8);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+  }
+
+  public BGZFSplitGuesser(FSDataInputStream is) {
+    inFile = is;
+    seekableInFile = is;
+
+    buf = ByteBuffer.allocate(8);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+  }
+
+  /// Looks in the range [beg,end). Returns end if no BAM record was found.
+  public long guessNextBGZFBlockStart(long beg, long end) throws IOException {
+    // Buffer what we need to go through. Since the max size of a BGZF block
+    // is 0xffff (64K), and we might be just one byte off from the start of
+    // the previous one, we need 0xfffe bytes for the start, and then 0xffff
+    // for the block we're looking for.
+
+    byte[] arr = new byte[2 * 0xffff - 1];
+
+    this.seekableInFile.seek(beg);
+    int totalRead = 0;
+    for (int left = Math.min((int) (end - beg), arr.length); left > 0; ) {
+      final int r = inFile.read(arr, totalRead, left);
+      if (r < 0) {
+        break;
+      }
+      totalRead += r;
+      left -= r;
+    }
+    arr = Arrays.copyOf(arr, totalRead);
+
+    this.in = new ByteArraySeekableStream(arr);
+
+    final BlockCompressedInputStream bgzf = new BlockCompressedInputStream(this.in);
+    bgzf.setCheckCrcs(true);
+
+    final int firstBGZFEnd = Math.min((int) (end - beg), 0xffff);
+
+    for (int pos = 0; ; ) {
+      pos = guessNextBGZFPos(pos, firstBGZFEnd);
+      if (pos < 0) {
+        return end;
+      }
+
+      try {
+        // Seek in order to trigger decompression of the block and a CRC
+        // check.
+        bgzf.seek((long) pos << 16);
+
+        // This has to catch Throwable, because it's possible to get an
+        // OutOfMemoryError due to an overly large size.
+      } catch (Throwable e) {
+        // Guessed BGZF position incorrectly: try the next guess.
+        ++pos;
+        continue;
+      }
+      return beg + pos;
+    }
+  }
+
+  // Returns a negative number if it doesn't find anything.
+  private int guessNextBGZFPos(int p, int end) throws IOException {
+    for (; ; ) {
+      for (; ; ) {
+        in.seek(p);
+        in.read(buf.array(), 0, 4);
+        int n = buf.getInt(0);
+
+        if (n == BGZF_MAGIC) {
+          break;
+        }
+
+        // Skip ahead a bit more than 1 byte if you can.
+        if (n >>> 8 == BGZF_MAGIC << 8 >>> 8) {
+          ++p;
+        } else if (n >>> 16 == BGZF_MAGIC << 16 >>> 16) {
+          p += 2;
+        } else {
+          p += 3;
+        }
+
+        if (p >= end) {
+          return -1;
+        }
+      }
+      // Found what looks like a gzip block header: now get XLEN and
+      // search for the BGZF subfield.
+      final int p0 = p;
+      p += 10;
+      in.seek(p);
+      in.read(buf.array(), 0, 2);
+      p += 2;
+      final int xlen = getUShort(0);
+      final int subEnd = p + xlen;
+
+      while (p < subEnd) {
+        in.read(buf.array(), 0, 4);
+
+        if (buf.getInt(0) != BGZF_MAGIC_SUB) {
+          p += 4 + getUShort(2);
+          in.seek(p);
+          continue;
+        }
+
+        // Found it: this is close enough to a BGZF block, make it
+        // our guess.
+        return p0;
+      }
+      // No luck: look for the next gzip block header. Start right after
+      // where we last saw the identifiers, although we could probably
+      // safely skip further ahead. (If we find the correct one right
+      // now, the previous block contained 0x1f8b0804 bytes of data: that
+      // seems... unlikely.)
+      p = p0 + 4;
+    }
+  }
+
+  private int getUShort(final int idx) {
+    return (int) buf.getShort(idx) & 0xffff;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/ConfHelper.java b/src/main/java/org/seqdoop/hadoop_bam/util/ConfHelper.java
index 1fb3be8..c3343e7 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/ConfHelper.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/ConfHelper.java
@@ -24,47 +24,46 @@
 
 import org.apache.hadoop.conf.Configuration;
 
-public class ConfHelper
-{
-	/**
-	 * Convert a string to a boolean.
-	 *
-	 * Accepted values: "yes", "true", "t", "y", "1"
-	 *                  "no", "false", "f", "n", "0" 
-	 * All comparisons are case insensitive.
-	 *
-	 * If the value provided is null, defaultValue is returned.
-	 *
-	 * @exception IllegalArgumentException Thrown if value is not
-	 * null and doesn't match any of the accepted strings.
-	 */
-	public static boolean parseBoolean(String value, boolean defaultValue)
-	{
-		if (value == null)
-			return defaultValue;
+public class ConfHelper {
 
-		value = value.trim();
+  /**
+   * Convert a string to a boolean.
+   *
+   * <p>Accepted values: "yes", "true", "t", "y", "1" "no", "false", "f", "n", "0" All comparisons
+   * are case insensitive.
+   *
+   * <p>If the value provided is null, defaultValue is returned.
+   *
+   * @throws IllegalArgumentException Thrown if value is not null and doesn't match any of the
+   *     accepted strings.
+   */
+  public static boolean parseBoolean(String value, boolean defaultValue) {
+    if (value == null) {
+      return defaultValue;
+    }
 
-		// any of the following will 
-		final String[] acceptedTrue = new String[]{ "yes", "true", "t", "y", "1" };
-		final String[] acceptedFalse = new String[]{ "no", "false", "f", "n", "0" };
+    value = value.trim();
 
-		for (String possible: acceptedTrue)
-		{
-			if (possible.equalsIgnoreCase(value))
-				return true;
-		}
-		for (String possible: acceptedFalse)
-		{
-			if (possible.equalsIgnoreCase(value))
-				return false;
-		}
+    // any of the following will
+    final String[] acceptedTrue = new String[] {"yes", "true", "t", "y", "1"};
+    final String[] acceptedFalse = new String[] {"no", "false", "f", "n", "0"};
 
-		throw new IllegalArgumentException("Unrecognized boolean value '" + value + "'");
-	}
+    for (String possible : acceptedTrue) {
+      if (possible.equalsIgnoreCase(value)) {
+        return true;
+      }
+    }
+    for (String possible : acceptedFalse) {
+      if (possible.equalsIgnoreCase(value)) {
+        return false;
+      }
+    }
 
-	public static boolean parseBoolean(Configuration conf, String propertyName, boolean defaultValue)
-	{
-		return parseBoolean(conf.get(propertyName), defaultValue);
-	}
+    throw new IllegalArgumentException("Unrecognized boolean value '" + value + "'");
+  }
+
+  public static boolean parseBoolean(
+      Configuration conf, String propertyName, boolean defaultValue) {
+    return parseBoolean(conf.get(propertyName), defaultValue);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/DataInputWrapper.java b/src/main/java/org/seqdoop/hadoop_bam/util/DataInputWrapper.java
index 915f533..7d4e8e6 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/DataInputWrapper.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/DataInputWrapper.java
@@ -23,27 +23,36 @@
 package org.seqdoop.hadoop_bam.util;
 
 import java.io.DataInput;
-import java.io.InputStream;
 import java.io.IOException;
+import java.io.InputStream;
 
 public class DataInputWrapper extends InputStream {
-	private final DataInput in;
-
-	public DataInputWrapper(DataInput i) { in = i; }
-
-	@Override public long skip(long n) throws IOException {
-		for (; n > Integer.MAX_VALUE; n -= Integer.MAX_VALUE) {
-			final int skipped = in.skipBytes(Integer.MAX_VALUE);
-			if (skipped < Integer.MAX_VALUE)
-				return skipped;
-		}
-		return in.skipBytes((int)n);
-	}
-	@Override public int read(byte[] b, int off, int len) throws IOException {
-		in.readFully(b, off, len);
-		return len;
-	}
-	@Override public int read() throws IOException {
-		return in.readByte();
-	}
+
+  private final DataInput in;
+
+  public DataInputWrapper(DataInput i) {
+    in = i;
+  }
+
+  @Override
+  public long skip(long n) throws IOException {
+    for (; n > Integer.MAX_VALUE; n -= Integer.MAX_VALUE) {
+      final int skipped = in.skipBytes(Integer.MAX_VALUE);
+      if (skipped < Integer.MAX_VALUE) {
+        return skipped;
+      }
+    }
+    return in.skipBytes((int) n);
+  }
+
+  @Override
+  public int read(byte[] b, int off, int len) throws IOException {
+    in.readFully(b, off, len);
+    return len;
+  }
+
+  @Override
+  public int read() throws IOException {
+    return in.readByte();
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/DataOutputWrapper.java b/src/main/java/org/seqdoop/hadoop_bam/util/DataOutputWrapper.java
index 482623f..2603d1f 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/DataOutputWrapper.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/DataOutputWrapper.java
@@ -27,14 +27,20 @@
 import java.io.OutputStream;
 
 public class DataOutputWrapper extends OutputStream {
-	private final DataOutput out;
 
-	public DataOutputWrapper(DataOutput o) { out = o; }
+  private final DataOutput out;
 
-	@Override public void write(int b) throws IOException {
-		out.writeByte(b);
-	}
-	@Override public void write(byte[] b, int off, int len) throws IOException {
-		out.write(b, off, len);
-	}
+  public DataOutputWrapper(DataOutput o) {
+    out = o;
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    out.writeByte(b);
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    out.write(b, off, len);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/GetSortedBAMHeader.java b/src/main/java/org/seqdoop/hadoop_bam/util/GetSortedBAMHeader.java
index a6083e2..bd1c076 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/GetSortedBAMHeader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/GetSortedBAMHeader.java
@@ -22,36 +22,35 @@
 
 package org.seqdoop.hadoop_bam.util;
 
-import java.io.File;
-import java.io.FileOutputStream;
-import java.io.IOException;
-
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SamReaderFactory;
 import htsjdk.samtools.ValidationStringency;
-
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
 import org.seqdoop.hadoop_bam.SAMFormat;
 
 public final class GetSortedBAMHeader {
-	public static void main(String[] args) throws IOException {
-		if (args.length < 2) {
-			System.err.println(
-				"Usage: GetSortedBAMHeader input output\n\n"+
-
-				"Reads the BAM header from input (a standard BGZF-compressed BAM "+
-				"file), and\nwrites it (BGZF-compressed, no terminator block) to "+
-				"output. Sets the sort order\nindicated in the SAM header to "+
-				"'coordinate'.");
-			System.exit(1);
-		}
-
-		final SAMFileHeader h =
-				SamReaderFactory.makeDefault().validationStringency(ValidationStringency.SILENT)
-						.setUseAsyncIo(false)
-						.open(new File(args[0])).getFileHeader();
-		h.setSortOrder(SAMFileHeader.SortOrder.coordinate);
 
-		new SAMOutputPreparer().prepareForRecords(
-			new FileOutputStream(args[1]), SAMFormat.BAM, h);
-	}
+  public static void main(String[] args) throws IOException {
+    if (args.length < 2) {
+      System.err.println(
+          "Usage: GetSortedBAMHeader input output\n\n"
+              + "Reads the BAM header from input (a standard BGZF-compressed BAM "
+              + "file), and\nwrites it (BGZF-compressed, no terminator block) to "
+              + "output. Sets the sort order\nindicated in the SAM header to "
+              + "'coordinate'.");
+      System.exit(1);
+    }
+
+    final SAMFileHeader h =
+        SamReaderFactory.makeDefault()
+            .validationStringency(ValidationStringency.SILENT)
+            .setUseAsyncIo(false)
+            .open(new File(args[0]))
+            .getFileHeader();
+    h.setSortOrder(SAMFileHeader.SortOrder.coordinate);
+
+    new SAMOutputPreparer().prepareForRecords(new FileOutputStream(args[1]), SAMFormat.BAM, h);
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/IntervalUtil.java b/src/main/java/org/seqdoop/hadoop_bam/util/IntervalUtil.java
index 3229e47..acf9e9a 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/IntervalUtil.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/IntervalUtil.java
@@ -2,61 +2,61 @@
 
 import com.google.common.collect.ImmutableList;
 import htsjdk.samtools.util.Interval;
-import org.apache.hadoop.conf.Configuration;
-import org.seqdoop.hadoop_bam.FormatException;
-
 import java.util.ArrayList;
 import java.util.List;
-import java.util.function.Supplier;
+import org.apache.hadoop.conf.Configuration;
+import org.seqdoop.hadoop_bam.FormatException;
 
-/**
- * Common utilities across different file formats.
- */
+/** Common utilities across different file formats. */
 public final class IntervalUtil {
 
-    // declared to prevent instantiation.
-    private IntervalUtil() {}
+  // declared to prevent instantiation.
+  private IntervalUtil() {}
 
-    /**
-     * Returns the list of intervals found in a string configuration property separated by colons.
-     * @param conf the source configuration.
-     * @param intervalPropertyName the property name holding the intervals.
-     * @return {@code null} if there is no such a property in the configuration.
-     * @throws NullPointerException if either input is null.
-     */
-    public static List<Interval> getIntervals(final Configuration conf, final String intervalPropertyName) {
-        final String intervalsProperty = conf.get(intervalPropertyName);
-        if (intervalsProperty == null) {
-            return null;
-        }
-        if (intervalsProperty.isEmpty()) {
-            return ImmutableList.of();
-        }
-        final List<Interval> intervals = new ArrayList<>();
-        for (final String s : intervalsProperty.split(",")) {
-            final int lastColonIdx = s.lastIndexOf(':');
-            if (lastColonIdx < 0) {
-                throw new FormatException("no colon found in interval string: " + s);
-            }
-            final int hyphenIdx = s.indexOf('-', lastColonIdx + 1);
-            if (hyphenIdx < 0) {
-                throw new FormatException("no hyphen found after colon interval string: " + s);
-            }
-            final String sequence = s.substring(0, lastColonIdx);
-            final int start = parseIntOrThrowFormatException(s.substring(lastColonIdx + 1, hyphenIdx),
-                    "invalid start position", s);
-            final int stop = parseIntOrThrowFormatException(s.substring(hyphenIdx + 1),
-                    "invalid stop position", s);
-            intervals.add(new Interval(sequence, start, stop));
-        }
-        return intervals;
+  /**
+   * Returns the list of intervals found in a string configuration property separated by colons.
+   *
+   * @param conf the source configuration.
+   * @param intervalPropertyName the property name holding the intervals.
+   * @return {@code null} if there is no such a property in the configuration.
+   * @throws NullPointerException if either input is null.
+   */
+  public static List<Interval> getIntervals(
+      final Configuration conf, final String intervalPropertyName) {
+    final String intervalsProperty = conf.get(intervalPropertyName);
+    if (intervalsProperty == null) {
+      return null;
+    }
+    if (intervalsProperty.isEmpty()) {
+      return ImmutableList.of();
+    }
+    final List<Interval> intervals = new ArrayList<>();
+    for (final String s : intervalsProperty.split(",")) {
+      final int lastColonIdx = s.lastIndexOf(':');
+      if (lastColonIdx < 0) {
+        throw new FormatException("no colon found in interval string: " + s);
+      }
+      final int hyphenIdx = s.indexOf('-', lastColonIdx + 1);
+      if (hyphenIdx < 0) {
+        throw new FormatException("no hyphen found after colon interval string: " + s);
+      }
+      final String sequence = s.substring(0, lastColonIdx);
+      final int start =
+          parseIntOrThrowFormatException(
+              s.substring(lastColonIdx + 1, hyphenIdx), "invalid start position", s);
+      final int stop =
+          parseIntOrThrowFormatException(s.substring(hyphenIdx + 1), "invalid stop position", s);
+      intervals.add(new Interval(sequence, start, stop));
     }
+    return intervals;
+  }
 
-    private static int parseIntOrThrowFormatException(final String str, final String error, final String input) {
-        try {
-            return Integer.parseInt(str);
-        } catch (final NumberFormatException ex) {
-            throw new FormatException(error + " in  interval '" + input + "': '" + str + "'");
-        }
+  private static int parseIntOrThrowFormatException(
+      final String str, final String error, final String input) {
+    try {
+      return Integer.parseInt(str);
+    } catch (final NumberFormatException ex) {
+      throw new FormatException(error + " in  interval '" + input + "': '" + str + "'");
     }
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/MurmurHash3.java b/src/main/java/org/seqdoop/hadoop_bam/util/MurmurHash3.java
index 35e2be9..04413fa 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/MurmurHash3.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/MurmurHash3.java
@@ -21,161 +21,221 @@
 package org.seqdoop.hadoop_bam.util;
 
 import java.nio.ByteBuffer;
-import java.nio.LongBuffer;
 import java.nio.ByteOrder;
+import java.nio.LongBuffer;
 
-/** This class implements a hash function giving the first 64 bits of the
- * MurmurHash3_x64_128 hash.
+/**
+ * This class implements a hash function giving the first 64 bits of the MurmurHash3_x64_128 hash.
  */
 @SuppressWarnings("fallthrough")
 public final class MurmurHash3 {
-	public static long murmurhash3(byte[] key, int seed) {
-
-		final ByteBuffer data =
-			ByteBuffer.wrap(key).order(ByteOrder.LITTLE_ENDIAN);
-
-		final int len = key.length;
-
-		final int nblocks = len / 16;
-
-		long h1 = seed;
-		long h2 = seed;
-
-		final long c1 = 0x87c37b91114253d5L;
-		final long c2 = 0x4cf5ad432745937fL;
-
-		final LongBuffer blocks = data.asLongBuffer();
-
-		for (int i = 0; i < nblocks; ++i) {
-			long k1 = blocks.get(i*2 + 0);
-			long k2 = blocks.get(i*2 + 1);
-
-			k1 *= c1; k1 = k1 << 31 | k1 >>> (64-31); k1 *= c2; h1 ^= k1;
-
-			h1 = h1 << 27 | h1 >>> (64-27); h1 += h2; h1 = h1*5 + 0x52dce729;
-
-			k2 *= c2; k2 = k2 << 33 | k2 >>> (64-33); k2 *= c1; h2 ^= k2;
-
-			h2 = h2 << 31 | h1 >>> (64-31); h2 += h1; h2 = h2*5 + 0x38495ab5;
-		}
-
-		data.position(nblocks * 16);
-		final ByteBuffer tail = data.slice();
-
-		long k1 = 0;
-		long k2 = 0;
-
-		switch (len & 15) {
-			case 15: k2 ^= ((long)tail.get(14) & 0xff) << 48;
-			case 14: k2 ^= ((long)tail.get(13) & 0xff) << 40;
-			case 13: k2 ^= ((long)tail.get(12) & 0xff) << 32;
-			case 12: k2 ^= ((long)tail.get(11) & 0xff) << 24;
-			case 11: k2 ^= ((long)tail.get(10) & 0xff) << 16;
-			case 10: k2 ^= ((long)tail.get( 9) & 0xff) << 8;
-			case  9: k2 ^= ((long)tail.get( 8) & 0xff) << 0;
-						k2 *= c2; k2 = k2 << 33 | k2 >>> (64-33); k2 *= c1; h2 ^= k2;
-
-			case  8: k1 ^= ((long)tail.get( 7) & 0xff) << 56;
-			case  7: k1 ^= ((long)tail.get( 6) & 0xff) << 48;
-			case  6: k1 ^= ((long)tail.get( 5) & 0xff) << 40;
-			case  5: k1 ^= ((long)tail.get( 4) & 0xff) << 32;
-			case  4: k1 ^= ((long)tail.get( 3) & 0xff) << 24;
-			case  3: k1 ^= ((long)tail.get( 2) & 0xff) << 16;
-			case  2: k1 ^= ((long)tail.get( 1) & 0xff) << 8;
-			case  1: k1 ^= ((long)tail.get( 0) & 0xff) << 0;
-						k1 *= c1; k1 = k1 << 31 | k1 >>> (64-31); k1 *= c2; h1 ^= k1;
-			case  0: break;
-		}
-
-		h1 ^= len; h2 ^= len;
-
-		h1 += h2;
-		h2 += h1;
-
-		h1 = fmix(h1);
-		h2 = fmix(h2);
-
-		h1 += h2;
-		// h2 += h1;
-
-		return h1;
-	}
-
-	/** This version hashes the characters directly. It is not equivalent to
-	 * hashing chars.toString().getBytes(), as it hashes UTF-16 code units, but
-	 * it is much faster.
-	 */
-	public static long murmurhash3(CharSequence chars, int seed) {
-
-		final int len = chars.length();
-
-		final int nblocks = len / 8;
-
-		long h1 = seed;
-		long h2 = seed;
-
-		final long c1 = 0x87c37b91114253d5L;
-		final long c2 = 0x4cf5ad432745937fL;
-
-		for (int i = 0; i < nblocks; ++i) {
-			int i0 = (i*2 + 0) * 4;
-			int i1 = (i*2 + 1) * 4;
-
-			long k1 = (long)chars.charAt(i0)
-				     | (long)chars.charAt(i0+1) << 16
-				     | (long)chars.charAt(i0+2) << 32
-				     | (long)chars.charAt(i0+3) << 48;
-			long k2 = (long)chars.charAt(i1)
-				     | (long)chars.charAt(i1+1) << 16
-				     | (long)chars.charAt(i1+2) << 32
-				     | (long)chars.charAt(i1+3) << 48;
-
-			k1 *= c1; k1 = k1 << 31 | k1 >>> (64-31); k1 *= c2; h1 ^= k1;
-
-			h1 = h1 << 27 | h1 >>> (64-27); h1 += h2; h1 = h1*5 + 0x52dce729;
-
-			k2 *= c2; k2 = k2 << 33 | k2 >>> (64-33); k2 *= c1; h2 ^= k2;
-
-			h2 = h2 << 31 | h1 >>> (64-31); h2 += h1; h2 = h2*5 + 0x38495ab5;
-		}
-
-		long k1 = 0;
-		long k2 = 0;
-
-		switch (len & 7) {
-			case  7: k2 ^= (long)chars.charAt(6) << 32;
-			case  6: k2 ^= (long)chars.charAt(5) << 16;
-			case  5: k2 ^= (long)chars.charAt(4) << 0;
-						k2 *= c2; k2 = k2 << 33 | k2 >>> (64-33); k2 *= c1; h2 ^= k2;
-
-			case  4: k1 ^= (long)chars.charAt(3) << 48;
-			case  3: k1 ^= (long)chars.charAt(2) << 32;
-			case  2: k1 ^= (long)chars.charAt(1) << 16;
-			case  1: k1 ^= (long)chars.charAt(0) << 0;
-						k1 *= c1; k1 = k1 << 31 | k1 >>> (64-31); k1 *= c2; h1 ^= k1;
-			case  0: break;
-		}
 
-		h1 ^= len; h2 ^= len;
-
-		h1 += h2;
-		h2 += h1;
-
-		h1 = fmix(h1);
-		h2 = fmix(h2);
-
-		h1 += h2;
-		// h2 += h1;
-
-		return h1;
-	}
-
-	private static long fmix(long k) {
-		k ^= k >>> 33;
-		k *= 0xff51afd7ed558ccdL;
-		k ^= k >>> 33;
-		k *= 0xc4ceb9fe1a85ec53L;
-		k ^= k >>> 33;
-		return k;
-	}
+  public static long murmurhash3(byte[] key, int seed) {
+
+    final ByteBuffer data = ByteBuffer.wrap(key).order(ByteOrder.LITTLE_ENDIAN);
+
+    final int len = key.length;
+
+    final int nblocks = len / 16;
+
+    long h1 = seed;
+    long h2 = seed;
+
+    final long c1 = 0x87c37b91114253d5L;
+    final long c2 = 0x4cf5ad432745937fL;
+
+    final LongBuffer blocks = data.asLongBuffer();
+
+    for (int i = 0; i < nblocks; ++i) {
+      long k1 = blocks.get(i * 2 + 0);
+      long k2 = blocks.get(i * 2 + 1);
+
+      k1 *= c1;
+      k1 = k1 << 31 | k1 >>> (64 - 31);
+      k1 *= c2;
+      h1 ^= k1;
+
+      h1 = h1 << 27 | h1 >>> (64 - 27);
+      h1 += h2;
+      h1 = h1 * 5 + 0x52dce729;
+
+      k2 *= c2;
+      k2 = k2 << 33 | k2 >>> (64 - 33);
+      k2 *= c1;
+      h2 ^= k2;
+
+      h2 = h2 << 31 | h1 >>> (64 - 31);
+      h2 += h1;
+      h2 = h2 * 5 + 0x38495ab5;
+    }
+
+    data.position(nblocks * 16);
+    final ByteBuffer tail = data.slice();
+
+    long k1 = 0;
+    long k2 = 0;
+
+    switch (len & 15) {
+      case 15:
+        k2 ^= ((long) tail.get(14) & 0xff) << 48;
+      case 14:
+        k2 ^= ((long) tail.get(13) & 0xff) << 40;
+      case 13:
+        k2 ^= ((long) tail.get(12) & 0xff) << 32;
+      case 12:
+        k2 ^= ((long) tail.get(11) & 0xff) << 24;
+      case 11:
+        k2 ^= ((long) tail.get(10) & 0xff) << 16;
+      case 10:
+        k2 ^= ((long) tail.get(9) & 0xff) << 8;
+      case 9:
+        k2 ^= ((long) tail.get(8) & 0xff) << 0;
+        k2 *= c2;
+        k2 = k2 << 33 | k2 >>> (64 - 33);
+        k2 *= c1;
+        h2 ^= k2;
+
+      case 8:
+        k1 ^= ((long) tail.get(7) & 0xff) << 56;
+      case 7:
+        k1 ^= ((long) tail.get(6) & 0xff) << 48;
+      case 6:
+        k1 ^= ((long) tail.get(5) & 0xff) << 40;
+      case 5:
+        k1 ^= ((long) tail.get(4) & 0xff) << 32;
+      case 4:
+        k1 ^= ((long) tail.get(3) & 0xff) << 24;
+      case 3:
+        k1 ^= ((long) tail.get(2) & 0xff) << 16;
+      case 2:
+        k1 ^= ((long) tail.get(1) & 0xff) << 8;
+      case 1:
+        k1 ^= ((long) tail.get(0) & 0xff) << 0;
+        k1 *= c1;
+        k1 = k1 << 31 | k1 >>> (64 - 31);
+        k1 *= c2;
+        h1 ^= k1;
+      case 0:
+        break;
+    }
+
+    h1 ^= len;
+    h2 ^= len;
+
+    h1 += h2;
+    h2 += h1;
+
+    h1 = fmix(h1);
+    h2 = fmix(h2);
+
+    h1 += h2;
+    // h2 += h1;
+
+    return h1;
+  }
+
+  /**
+   * This version hashes the characters directly. It is not equivalent to hashing
+   * chars.toString().getBytes(), as it hashes UTF-16 code units, but it is much faster.
+   */
+  public static long murmurhash3(CharSequence chars, int seed) {
+
+    final int len = chars.length();
+
+    final int nblocks = len / 8;
+
+    long h1 = seed;
+    long h2 = seed;
+
+    final long c1 = 0x87c37b91114253d5L;
+    final long c2 = 0x4cf5ad432745937fL;
+
+    for (int i = 0; i < nblocks; ++i) {
+      int i0 = (i * 2 + 0) * 4;
+      int i1 = (i * 2 + 1) * 4;
+
+      long k1 =
+          (long) chars.charAt(i0)
+              | (long) chars.charAt(i0 + 1) << 16
+              | (long) chars.charAt(i0 + 2) << 32
+              | (long) chars.charAt(i0 + 3) << 48;
+      long k2 =
+          (long) chars.charAt(i1)
+              | (long) chars.charAt(i1 + 1) << 16
+              | (long) chars.charAt(i1 + 2) << 32
+              | (long) chars.charAt(i1 + 3) << 48;
+
+      k1 *= c1;
+      k1 = k1 << 31 | k1 >>> (64 - 31);
+      k1 *= c2;
+      h1 ^= k1;
+
+      h1 = h1 << 27 | h1 >>> (64 - 27);
+      h1 += h2;
+      h1 = h1 * 5 + 0x52dce729;
+
+      k2 *= c2;
+      k2 = k2 << 33 | k2 >>> (64 - 33);
+      k2 *= c1;
+      h2 ^= k2;
+
+      h2 = h2 << 31 | h1 >>> (64 - 31);
+      h2 += h1;
+      h2 = h2 * 5 + 0x38495ab5;
+    }
+
+    long k1 = 0;
+    long k2 = 0;
+
+    switch (len & 7) {
+      case 7:
+        k2 ^= (long) chars.charAt(6) << 32;
+      case 6:
+        k2 ^= (long) chars.charAt(5) << 16;
+      case 5:
+        k2 ^= (long) chars.charAt(4) << 0;
+        k2 *= c2;
+        k2 = k2 << 33 | k2 >>> (64 - 33);
+        k2 *= c1;
+        h2 ^= k2;
+
+      case 4:
+        k1 ^= (long) chars.charAt(3) << 48;
+      case 3:
+        k1 ^= (long) chars.charAt(2) << 32;
+      case 2:
+        k1 ^= (long) chars.charAt(1) << 16;
+      case 1:
+        k1 ^= (long) chars.charAt(0) << 0;
+        k1 *= c1;
+        k1 = k1 << 31 | k1 >>> (64 - 31);
+        k1 *= c2;
+        h1 ^= k1;
+      case 0:
+        break;
+    }
+
+    h1 ^= len;
+    h2 ^= len;
+
+    h1 += h2;
+    h2 += h1;
+
+    h1 = fmix(h1);
+    h2 = fmix(h2);
+
+    h1 += h2;
+    // h2 += h1;
+
+    return h1;
+  }
+
+  private static long fmix(long k) {
+    k ^= k >>> 33;
+    k *= 0xff51afd7ed558ccdL;
+    k ^= k >>> 33;
+    k *= 0xc4ceb9fe1a85ec53L;
+    k ^= k >>> 33;
+    return k;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/NIOFileUtil.java b/src/main/java/org/seqdoop/hadoop_bam/util/NIOFileUtil.java
index 065eef4..407ffb9 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/NIOFileUtil.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/NIOFileUtil.java
@@ -18,13 +18,14 @@
 import java.util.stream.Collectors;
 
 public class NIOFileUtil {
-  private NIOFileUtil() {
-  }
 
   static final String PARTS_GLOB = "glob:**/part-[mr]-[0-9][0-9][0-9][0-9][0-9]*";
 
+  private NIOFileUtil() {}
+
   /**
    * Convert the given path {@link URI} to a {@link Path} object.
+   *
    * @param uri the path to convert
    * @return a {@link Path} object
    */
@@ -46,6 +47,7 @@ public static Path asPath(URI uri) {
 
   /**
    * Convert the given path string to a {@link Path} object.
+   *
    * @param path the path to convert
    * @return a {@link Path} object
    */
@@ -56,54 +58,57 @@ public static Path asPath(String path) {
 
   /**
    * Delete the given directory and all of its contents if non-empty.
+   *
    * @param directory the directory to delete
-   * @throws IOException
    */
   static void deleteRecursive(Path directory) throws IOException {
-    Files.walkFileTree(directory, new SimpleFileVisitor<Path>() {
-      @Override
-      public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
-        Files.delete(file);
-        return FileVisitResult.CONTINUE;
-      }
-      @Override
-      public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
-        Files.deleteIfExists(dir);
-        return FileVisitResult.CONTINUE;
-      }
-    });
+    Files.walkFileTree(
+        directory,
+        new SimpleFileVisitor<Path>() {
+          @Override
+          public FileVisitResult visitFile(Path file, BasicFileAttributes attrs)
+              throws IOException {
+            Files.delete(file);
+            return FileVisitResult.CONTINUE;
+          }
+
+          @Override
+          public FileVisitResult postVisitDirectory(Path dir, IOException exc) throws IOException {
+            Files.deleteIfExists(dir);
+            return FileVisitResult.CONTINUE;
+          }
+        });
   }
 
   /**
-   * Returns all the files in a directory that match the given pattern, and that don't
-   * have the given extension.
-   * @param directory the directory to look for files in, subdirectories are not
-   *                  considered
-   * @param syntaxAndPattern the syntax and pattern to use for matching (see
-   * {@link java.nio.file.FileSystem#getPathMatcher}
+   * Returns all the files in a directory that match the given pattern, and that don't have the
+   * given extension.
+   *
+   * @param directory the directory to look for files in, subdirectories are not considered
+   * @param syntaxAndPattern the syntax and pattern to use for matching (see {@link
+   *     java.nio.file.FileSystem#getPathMatcher}
    * @param excludesExt the extension to exclude, or null to exclude nothing
    * @return a list of files, sorted by name
-   * @throws IOException
    */
-  static List<Path> getFilesMatching(Path directory,
-      String syntaxAndPattern, String excludesExt) throws IOException {
+  static List<Path> getFilesMatching(Path directory, String syntaxAndPattern, String excludesExt)
+      throws IOException {
     PathMatcher matcher = directory.getFileSystem().getPathMatcher(syntaxAndPattern);
-    List<Path> parts = Files.walk(directory)
-        .filter(matcher::matches)
-        .filter(path -> excludesExt == null || !path.toString().endsWith(excludesExt))
-        .collect(Collectors.toList());
+    List<Path> parts =
+        Files.walk(directory)
+            .filter(matcher::matches)
+            .filter(path -> excludesExt == null || !path.toString().endsWith(excludesExt))
+            .collect(Collectors.toList());
     Collections.sort(parts);
     return parts;
   }
 
   /**
    * Merge the given part files in order into an output stream.
+   *
    * @param parts the part files to merge
    * @param out the stream to write each file into, in order
-   * @throws IOException
    */
-  static void mergeInto(List<Path> parts, OutputStream out)
-      throws IOException {
+  static void mergeInto(List<Path> parts, OutputStream out) throws IOException {
     for (final Path part : parts) {
       Files.copy(part, out);
     }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/SAMFileMerger.java b/src/main/java/org/seqdoop/hadoop_bam/util/SAMFileMerger.java
index 761d578..97a6b27 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/SAMFileMerger.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/SAMFileMerger.java
@@ -1,5 +1,10 @@
 package org.seqdoop.hadoop_bam.util;
 
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.asPath;
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.deleteRecursive;
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.getFilesMatching;
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.mergeInto;
+
 import com.google.common.io.CountingOutputStream;
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.cram.build.CramIO;
@@ -20,31 +25,29 @@
 import org.seqdoop.hadoop_bam.SplittingBAMIndex;
 import org.seqdoop.hadoop_bam.SplittingBAMIndexer;
 
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.asPath;
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.deleteRecursive;
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.getFilesMatching;
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.mergeInto;
-
 /**
- * Merges headerless BAM or CRAM files produced by {@link KeyIgnoringAnySAMOutputFormat}
- * into a single file.
+ * Merges headerless BAM or CRAM files produced by {@link KeyIgnoringAnySAMOutputFormat} into a
+ * single file.
  */
 public class SAMFileMerger {
 
-  private SAMFileMerger() {
-  }
+  private SAMFileMerger() {}
 
   /**
-   * Merge part file shards produced by {@link KeyIgnoringAnySAMOutputFormat} into a
-   * single file with the given header.
+   * Merge part file shards produced by {@link KeyIgnoringAnySAMOutputFormat} into a single file
+   * with the given header.
+   *
    * @param partDirectory the directory containing part files
    * @param outputFile the file to write the merged file to
    * @param samOutputFormat the format (must be BAM or CRAM; SAM is not supported)
    * @param header the header for the merged file
-   * @throws IOException
    */
-  public static void mergeParts(final String partDirectory, final String outputFile,
-      final SAMFormat samOutputFormat, final SAMFileHeader header) throws IOException {
+  public static void mergeParts(
+      final String partDirectory,
+      final String outputFile,
+      final SAMFormat samOutputFormat,
+      final SAMFileHeader header)
+      throws IOException {
 
     // First, check for the _SUCCESS file.
     final Path partPath = asPath(partDirectory);
@@ -54,22 +57,23 @@ public static void mergeParts(final String partDirectory, final String outputFil
     }
     final Path outputPath = asPath(outputFile);
     if (partPath.equals(outputPath)) {
-      throw new IllegalArgumentException("Cannot merge parts into output with same " +
-          "path: " + partPath);
+      throw new IllegalArgumentException(
+          "Cannot merge parts into output with same " + "path: " + partPath);
     }
 
-    List<Path> parts = getFilesMatching(partPath, NIOFileUtil.PARTS_GLOB,
-        SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
+    List<Path> parts =
+        getFilesMatching(
+            partPath, NIOFileUtil.PARTS_GLOB, SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
     if (parts.isEmpty()) {
-      throw new IllegalArgumentException("Could not write bam file because no part " +
-          "files were found in " + partPath);
+      throw new IllegalArgumentException(
+          "Could not write bam file because no part " + "files were found in " + partPath);
     }
 
     Files.deleteIfExists(outputPath);
 
     long headerLength;
     try (final CountingOutputStream out =
-             new CountingOutputStream(Files.newOutputStream(outputPath))) {
+        new CountingOutputStream(Files.newOutputStream(outputPath))) {
       if (header != null) {
         new SAMOutputPreparer().prepareForRecords(out, samOutputFormat, header); // write the header
       }
@@ -79,8 +83,9 @@ public static void mergeParts(final String partDirectory, final String outputFil
     }
     long fileLength = Files.size(outputPath);
 
-    final Path outputSplittingBaiPath = outputPath.resolveSibling(
-        outputPath.getFileName() + SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
+    final Path outputSplittingBaiPath =
+        outputPath.resolveSibling(
+            outputPath.getFileName() + SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
     Files.deleteIfExists(outputSplittingBaiPath);
     try (final OutputStream out = Files.newOutputStream(outputSplittingBaiPath)) {
       mergeSplittingBaiFiles(out, partPath, headerLength, fileLength);
@@ -92,8 +97,10 @@ public static void mergeParts(final String partDirectory, final String outputFil
     deleteRecursive(partPath);
   }
 
-  //Terminate the aggregated output stream with an appropriate SAMOutputFormat-dependent terminator block
-  private static void writeTerminatorBlock(final OutputStream out, final SAMFormat samOutputFormat) throws IOException {
+  // Terminate the aggregated output stream with an appropriate SAMOutputFormat-dependent terminator
+  // block
+  private static void writeTerminatorBlock(final OutputStream out, final SAMFormat samOutputFormat)
+      throws IOException {
     if (SAMFormat.CRAM == samOutputFormat) {
       CramIO.issueEOF(CramVersions.DEFAULT_CRAM_VERSION, out); // terminate with CRAM EOF container
     } else {
@@ -101,10 +108,11 @@ private static void writeTerminatorBlock(final OutputStream out, final SAMFormat
     }
   }
 
-  static void mergeSplittingBaiFiles(OutputStream out, Path directory, long headerLength,
-      long fileLength) throws IOException {
-    final List<Path> parts = getFilesMatching(directory,
-        NIOFileUtil.PARTS_GLOB + SplittingBAMIndexer.OUTPUT_FILE_EXTENSION, null);
+  static void mergeSplittingBaiFiles(
+      OutputStream out, Path directory, long headerLength, long fileLength) throws IOException {
+    final List<Path> parts =
+        getFilesMatching(
+            directory, NIOFileUtil.PARTS_GLOB + SplittingBAMIndexer.OUTPUT_FILE_EXTENSION, null);
     if (parts.isEmpty()) {
       return; // nothing to merge
     }
@@ -124,7 +132,6 @@ static void mergeSplittingBaiFiles(OutputStream out, Path directory, long header
       }
     }
 
-
     SplittingBAMIndexer splittingBAMIndexer = new SplittingBAMIndexer(out);
     for (Long offset : mergedVirtualOffsets) {
       splittingBAMIndexer.writeVirtualOffset(offset);
@@ -132,8 +139,11 @@ static void mergeSplittingBaiFiles(OutputStream out, Path directory, long header
     splittingBAMIndexer.finish(partFileOffset);
     int terminatingBlockLength = BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length;
     if (partFileOffset + terminatingBlockLength != fileLength) {
-      throw new IOException("Part file length mismatch. Last part file offset is " +
-          partFileOffset + ", expected: " + (fileLength - terminatingBlockLength));
+      throw new IOException(
+          "Part file length mismatch. Last part file offset is "
+              + partFileOffset
+              + ", expected: "
+              + (fileLength - terminatingBlockLength));
     }
 
     for (final Path part : parts) {
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/SAMHeaderReader.java b/src/main/java/org/seqdoop/hadoop_bam/util/SAMHeaderReader.java
index b4d062d..9f92c90 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/SAMHeaderReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/SAMHeaderReader.java
@@ -22,75 +22,65 @@
 
 package org.seqdoop.hadoop_bam.util;
 
-import java.io.IOException;
-import java.io.InputStream;
-import java.net.URI;
-import java.nio.file.Paths;
-
-import htsjdk.samtools.cram.ref.ReferenceSource;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SamInputResource;
 import htsjdk.samtools.SamReaderFactory;
 import htsjdk.samtools.ValidationStringency;
+import htsjdk.samtools.cram.ref.ReferenceSource;
+import java.io.IOException;
+import java.io.InputStream;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
 import org.seqdoop.hadoop_bam.CRAMInputFormat;
 
 public final class SAMHeaderReader {
-	/** A String property corresponding to a ValidationStringency
-	 * value. If set, the given stringency is used when any part of the
-	 * Hadoop-BAM library reads SAM or BAM.
-	 */
-	public static final String VALIDATION_STRINGENCY_PROPERTY =
-		"hadoopbam.samheaderreader.validation-stringency";
 
-	public static SAMFileHeader readSAMHeaderFrom(Path path, Configuration conf)
-		throws IOException
-	{
-		InputStream i = path.getFileSystem(conf).open(path);
-		final SAMFileHeader h = readSAMHeaderFrom(i, conf);
-		i.close();
-		return h;
-	}
+  /**
+   * A String property corresponding to a ValidationStringency value. If set, the given stringency
+   * is used when any part of the Hadoop-BAM library reads SAM or BAM.
+   */
+  public static final String VALIDATION_STRINGENCY_PROPERTY =
+      "hadoopbam.samheaderreader.validation-stringency";
+
+  public static SAMFileHeader readSAMHeaderFrom(Path path, Configuration conf) throws IOException {
+    InputStream i = path.getFileSystem(conf).open(path);
+    final SAMFileHeader h = readSAMHeaderFrom(i, conf);
+    i.close();
+    return h;
+  }
 
-	/** Does not close the stream. */
-	public static SAMFileHeader readSAMHeaderFrom(
-		final InputStream in, final Configuration conf)
-	{
-		final ValidationStringency
-			stringency = getValidationStringency(conf);
-		SamReaderFactory readerFactory = SamReaderFactory.makeDefault()
-				.setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
-				.setUseAsyncIo(false);
-		if (stringency != null) {
-			readerFactory.validationStringency(stringency);
-		}
+  /** Does not close the stream. */
+  public static SAMFileHeader readSAMHeaderFrom(final InputStream in, final Configuration conf) {
+    final ValidationStringency stringency = getValidationStringency(conf);
+    SamReaderFactory readerFactory =
+        SamReaderFactory.makeDefault()
+            .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
+            .setUseAsyncIo(false);
+    if (stringency != null) {
+      readerFactory.validationStringency(stringency);
+    }
 
-		final ReferenceSource refSource = getReferenceSource(conf);
-		if (null != refSource) {
-			readerFactory.referenceSource(refSource);
-		}
-		return readerFactory.open(SamInputResource.of(in)).getFileHeader();
-	}
+    final ReferenceSource refSource = getReferenceSource(conf);
+    if (null != refSource) {
+      readerFactory.referenceSource(refSource);
+    }
+    return readerFactory.open(SamInputResource.of(in)).getFileHeader();
+  }
 
-	public static ValidationStringency getValidationStringency(
-		final Configuration conf)
-	{
-		final String p = conf.get(VALIDATION_STRINGENCY_PROPERTY);
-		return p == null ? null : ValidationStringency.valueOf(p);
-	}
+  public static ValidationStringency getValidationStringency(final Configuration conf) {
+    final String p = conf.get(VALIDATION_STRINGENCY_PROPERTY);
+    return p == null ? null : ValidationStringency.valueOf(p);
+  }
 
-	public static ReferenceSource getReferenceSource(
-			final Configuration conf)
-	{
-		//TODO: There isn't anything particularly CRAM-specific about reference source or validation
-		// stringency other than that a reference source is required for CRAM files. We should move
-		// the reference source and validation stringency property names and utility methods out of
-		// CRAMInputFormat and SAMHeaderReader and combine them together into a single class for extracting
-		// configuration params, but it would break backward compatibility with existing code that
-		// is dependent on the CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY.
-		final String refSourcePath = conf.get(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
-		return refSourcePath == null ? null : new ReferenceSource(NIOFileUtil.asPath(refSourcePath));
-	}
+  public static ReferenceSource getReferenceSource(final Configuration conf) {
+    // TODO: There isn't anything particularly CRAM-specific about reference source or validation
+    // stringency other than that a reference source is required for CRAM files. We should move
+    // the reference source and validation stringency property names and utility methods out of
+    // CRAMInputFormat and SAMHeaderReader and combine them together into a single class for
+    // extracting
+    // configuration params, but it would break backward compatibility with existing code that
+    // is dependent on the CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY.
+    final String refSourcePath = conf.get(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY);
+    return refSourcePath == null ? null : new ReferenceSource(NIOFileUtil.asPath(refSourcePath));
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/SAMOutputPreparer.java b/src/main/java/org/seqdoop/hadoop_bam/util/SAMOutputPreparer.java
index 688b467..81eb8a8 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/SAMOutputPreparer.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/SAMOutputPreparer.java
@@ -22,6 +22,12 @@
 
 package org.seqdoop.hadoop_bam.util;
 
+import htsjdk.samtools.SAMFileHeader;
+import htsjdk.samtools.SAMSequenceRecord;
+import htsjdk.samtools.SAMTextHeaderCodec;
+import htsjdk.samtools.cram.build.CramIO;
+import htsjdk.samtools.cram.common.CramVersions;
+import htsjdk.samtools.util.BlockCompressedOutputStream;
 import java.io.FilterOutputStream;
 import java.io.IOException;
 import java.io.OutputStream;
@@ -30,115 +36,102 @@
 import java.nio.ByteBuffer;
 import java.nio.ByteOrder;
 import java.util.List;
-
-import htsjdk.samtools.SAMFileHeader;
-import htsjdk.samtools.SAMSequenceRecord;
-import htsjdk.samtools.SAMTextHeaderCodec;
-import htsjdk.samtools.cram.build.CramIO;
-import htsjdk.samtools.cram.common.CramVersions;
-import htsjdk.samtools.util.BlockCompressedOutputStream;
-
 import org.seqdoop.hadoop_bam.SAMFormat;
 
 public class SAMOutputPreparer {
-	private ByteBuffer buf;
-
-	public SAMOutputPreparer() {
-		// Enough room for a 32-bit integer.
-		buf = ByteBuffer.wrap(new byte[4]);
-		buf.order(ByteOrder.LITTLE_ENDIAN);
-	}
-
-	public static final byte[] BAM_MAGIC = {'B','A','M', 1};
-
-	/** Prepares the given output stream for writing of SAMRecords in the given
-	 * format. This includes writing the given SAM header and, in the case of
-	 * BAM or CRAM, writing some further metadata as well as compressing everything
-	 * written. Returns a new stream to replace the original: it will do the
-	 * appropriate compression for BAM/CRAM files.
-	 */
-	public OutputStream prepareForRecords(
-			OutputStream out, final SAMFormat format,
-			final SAMFileHeader header)
-		throws IOException {
-
-        switch (format) {
-            case SAM:
-                out = prepareSAMOrBAMStream(out, format, header);
-                break;
-            case BAM:
-                out = prepareSAMOrBAMStream(out, format, header);
-                break;
-            case CRAM:
-                out = prepareCRAMStream(out, format, header);
-                break;
-            default:
-                throw new IllegalArgumentException
-                    ("Unsupported SAM file format, must be one of SAM, BAM or CRAM");
-        }
-
-        // Important for BAM: if the caller doesn't want to use the new stream
-        // for some reason, the BlockCompressedOutputStream's buffer would never
-        // be flushed.
-        out.flush();
-        return out;
-	}
-
-	private OutputStream prepareCRAMStream(
-			OutputStream out, final SAMFormat format,
-	        final SAMFileHeader header)  throws IOException
-	{
-		CramIO.writeHeader(CramVersions.DEFAULT_CRAM_VERSION, out, header, null);
-		return out;
-	}
-
-	private OutputStream prepareSAMOrBAMStream(
-			OutputStream out, final SAMFormat format,
-			final SAMFileHeader header) throws IOException
-	{
-		final StringWriter sw = new StringWriter();
-		new SAMTextHeaderCodec().encode(sw, header);
-		final String text = sw.toString();
-
-		if (format == SAMFormat.BAM) {
-			out = new BlockCompressedOutputStream(out, null);
-			out.write(BAM_MAGIC);
-			writeInt32(out, text.length());
-		}
-
-		writeString(out, text);
-
-		if (format == SAMFormat.BAM) {
-			final List<SAMSequenceRecord> refs =
-				header.getSequenceDictionary().getSequences();
-
-			writeInt32(out, refs.size());
-
-			for (final SAMSequenceRecord ref : refs) {
-				final String name = ref.getSequenceName();
-				writeInt32(out, name.length() + 1);
-				writeString(out, name);
-				out.write(0);
-				writeInt32(out, ref.getSequenceLength());
-			}
-		}
-
-		return out;
-	}
-
-	private static void writeString(final OutputStream out, final String s)
-		throws IOException
-	{
-		// Don't flush the underlying stream yet, only the writer: in the case of
-		// BAM, we might be able to cram more things into the gzip block still.
-		final OutputStreamWriter w = new OutputStreamWriter(
-			new FilterOutputStream(out) { @Override public void flush() {} } );
-		w.write(s);
-		w.flush();
-	}
-
-	private void writeInt32(final OutputStream out, int n) throws IOException {
-		buf.putInt(0, n);
-		out.write(buf.array());
-	}
+
+  public static final byte[] BAM_MAGIC = {'B', 'A', 'M', 1};
+  private ByteBuffer buf;
+
+  public SAMOutputPreparer() {
+    // Enough room for a 32-bit integer.
+    buf = ByteBuffer.wrap(new byte[4]);
+    buf.order(ByteOrder.LITTLE_ENDIAN);
+  }
+
+  private static void writeString(final OutputStream out, final String s) throws IOException {
+    // Don't flush the underlying stream yet, only the writer: in the case of
+    // BAM, we might be able to cram more things into the gzip block still.
+    final OutputStreamWriter w =
+        new OutputStreamWriter(
+            new FilterOutputStream(out) {
+              @Override
+              public void flush() {}
+            });
+    w.write(s);
+    w.flush();
+  }
+
+  /**
+   * Prepares the given output stream for writing of SAMRecords in the given format. This includes
+   * writing the given SAM header and, in the case of BAM or CRAM, writing some further metadata as
+   * well as compressing everything written. Returns a new stream to replace the original: it will
+   * do the appropriate compression for BAM/CRAM files.
+   */
+  public OutputStream prepareForRecords(
+      OutputStream out, final SAMFormat format, final SAMFileHeader header) throws IOException {
+
+    switch (format) {
+      case SAM:
+        out = prepareSAMOrBAMStream(out, format, header);
+        break;
+      case BAM:
+        out = prepareSAMOrBAMStream(out, format, header);
+        break;
+      case CRAM:
+        out = prepareCRAMStream(out, format, header);
+        break;
+      default:
+        throw new IllegalArgumentException(
+            "Unsupported SAM file format, must be one of SAM, BAM or CRAM");
+    }
+
+    // Important for BAM: if the caller doesn't want to use the new stream
+    // for some reason, the BlockCompressedOutputStream's buffer would never
+    // be flushed.
+    out.flush();
+    return out;
+  }
+
+  private OutputStream prepareCRAMStream(
+      OutputStream out, final SAMFormat format, final SAMFileHeader header) throws IOException {
+    CramIO.writeHeader(CramVersions.DEFAULT_CRAM_VERSION, out, header, null);
+    return out;
+  }
+
+  private OutputStream prepareSAMOrBAMStream(
+      OutputStream out, final SAMFormat format, final SAMFileHeader header) throws IOException {
+    final StringWriter sw = new StringWriter();
+    new SAMTextHeaderCodec().encode(sw, header);
+    final String text = sw.toString();
+
+    if (format == SAMFormat.BAM) {
+      out = new BlockCompressedOutputStream(out, null);
+      out.write(BAM_MAGIC);
+      writeInt32(out, text.length());
+    }
+
+    writeString(out, text);
+
+    if (format == SAMFormat.BAM) {
+      final List<SAMSequenceRecord> refs = header.getSequenceDictionary().getSequences();
+
+      writeInt32(out, refs.size());
+
+      for (final SAMSequenceRecord ref : refs) {
+        final String name = ref.getSequenceName();
+        writeInt32(out, name.length() + 1);
+        writeString(out, name);
+        out.write(0);
+        writeInt32(out, ref.getSequenceLength());
+      }
+    }
+
+    return out;
+  }
+
+  private void writeInt32(final OutputStream out, int n) throws IOException {
+    buf.putInt(0, n);
+    out.write(buf.array());
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/VCFFileMerger.java b/src/main/java/org/seqdoop/hadoop_bam/util/VCFFileMerger.java
index 81c8b9a..f6fd498 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/VCFFileMerger.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/VCFFileMerger.java
@@ -1,5 +1,10 @@
 package org.seqdoop.hadoop_bam.util;
 
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.asPath;
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.deleteRecursive;
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.getFilesMatching;
+import static org.seqdoop.hadoop_bam.util.NIOFileUtil.mergeInto;
+
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.BlockCompressedOutputStream;
 import htsjdk.samtools.util.BlockCompressedStreamConstants;
@@ -21,28 +26,25 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.asPath;
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.deleteRecursive;
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.getFilesMatching;
-import static org.seqdoop.hadoop_bam.util.NIOFileUtil.mergeInto;
-
 /**
- * Merges headerless VCF files produced by {@link KeyIgnoringVCFOutputFormat}
- * into a single file. BCF files are not supported.
+ * Merges headerless VCF files produced by {@link KeyIgnoringVCFOutputFormat} into a single file.
+ * BCF files are not supported.
  */
 public class VCFFileMerger {
+
   private static final Logger logger = LoggerFactory.getLogger(VCFFileMerger.class);
 
   /**
-   * Merge part file shards produced by {@link KeyIgnoringVCFOutputFormat} into a
-   * single file with the given header.
+   * Merge part file shards produced by {@link KeyIgnoringVCFOutputFormat} into a single file with
+   * the given header.
+   *
    * @param partDirectory the directory containing part files
    * @param outputFile the file to write the merged file to
    * @param header the header for the merged file
-   * @throws IOException
    */
-  public static void mergeParts(final String partDirectory, final String outputFile,
-      final VCFHeader header) throws IOException {
+  public static void mergeParts(
+      final String partDirectory, final String outputFile, final VCFHeader header)
+      throws IOException {
     // First, check for the _SUCCESS file.
     final Path partPath = asPath(partDirectory);
     final Path successPath = partPath.resolve("_SUCCESS");
@@ -51,15 +53,15 @@ public static void mergeParts(final String partDirectory, final String outputFil
     }
     final Path outputPath = asPath(outputFile);
     if (partPath.equals(outputPath)) {
-      throw new IllegalArgumentException("Cannot merge parts into output with same " +
-          "path: " + partPath);
+      throw new IllegalArgumentException(
+          "Cannot merge parts into output with same " + "path: " + partPath);
     }
 
-    List<Path> parts = getFilesMatching(partPath, NIOFileUtil.PARTS_GLOB,
-        TabixUtils.STANDARD_INDEX_EXTENSION);
+    List<Path> parts =
+        getFilesMatching(partPath, NIOFileUtil.PARTS_GLOB, TabixUtils.STANDARD_INDEX_EXTENSION);
     if (parts.isEmpty()) {
-      throw new IllegalArgumentException("Could not write bam file because no part " +
-          "files were found in " + partPath);
+      throw new IllegalArgumentException(
+          "Could not write bam file because no part " + "files were found in " + partPath);
     } else if (isBCF(parts)) {
       throw new IllegalArgumentException("BCF files are not supported.");
     }
@@ -77,20 +79,20 @@ public static void mergeParts(final String partDirectory, final String outputFil
     deleteRecursive(partPath);
   }
 
-  /**
-   * @return whether the output is block compressed
-   */
-  private static boolean writeHeader(OutputStream out, Path outputPath, List<Path> parts,
-      VCFHeader header) throws IOException {
+  /** @return whether the output is block compressed */
+  private static boolean writeHeader(
+      OutputStream out, Path outputPath, List<Path> parts, VCFHeader header) throws IOException {
     if (header == null) {
       return false;
     }
     boolean blockCompressed = isBlockCompressed(parts);
     boolean bgzExtension = outputPath.toString().endsWith(BGZFCodec.DEFAULT_EXTENSION);
     if (blockCompressed && !bgzExtension) {
-      logger.warn("Parts are block compressed, but output does not have .bgz extension: {}", outputPath);
+      logger.warn(
+          "Parts are block compressed, but output does not have .bgz extension: {}", outputPath);
     } else if (!blockCompressed && bgzExtension) {
-      logger.warn("Output has a .bgz extension, but parts are not block compressed: {}", outputPath);
+      logger.warn(
+          "Output has a .bgz extension, but parts are not block compressed: {}", outputPath);
     }
     boolean gzipCompressed = isGzipCompressed(parts);
     OutputStream headerOut;
@@ -101,8 +103,8 @@ private static boolean writeHeader(OutputStream out, Path outputPath, List<Path>
     } else {
       headerOut = out;
     }
-    VariantContextWriter writer = new VariantContextWriterBuilder().clearOptions()
-        .setOutputVCFStream(headerOut).build();
+    VariantContextWriter writer =
+        new VariantContextWriterBuilder().clearOptions().setOutputVCFStream(headerOut).build();
     writer.writeHeader(header);
     headerOut.flush();
     if (headerOut instanceof GZIPOutputStream) {
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/VCFHeaderReader.java b/src/main/java/org/seqdoop/hadoop_bam/util/VCFHeaderReader.java
index b59619a..9089146 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/VCFHeaderReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/VCFHeaderReader.java
@@ -22,13 +22,8 @@
 
 package org.seqdoop.hadoop_bam.util;
 
-import java.io.BufferedInputStream;
-import java.io.InputStream;
-import java.io.IOException;
-
 import htsjdk.samtools.seekablestream.SeekableStream;
 import htsjdk.samtools.util.BlockCompressedInputStream;
-
 import htsjdk.tribble.FeatureCodecHeader;
 import htsjdk.tribble.TribbleException;
 import htsjdk.tribble.readers.AsciiLineReader;
@@ -37,43 +32,43 @@
 import htsjdk.variant.bcf2.BCF2Codec;
 import htsjdk.variant.vcf.VCFCodec;
 import htsjdk.variant.vcf.VCFHeader;
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
 import java.util.zip.GZIPInputStream;
 import org.seqdoop.hadoop_bam.VCFFormat;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
-/** Can read a VCF header without being told beforehand whether the input is
- * VCF or BCF.
- */
+/** Can read a VCF header without being told beforehand whether the input is VCF or BCF. */
 public final class VCFHeaderReader {
-	private static final Logger logger = LoggerFactory.getLogger(VCFHeaderReader.class);
 
-	public static VCFHeader readHeaderFrom(final SeekableStream in)
-		throws IOException
-	{
-		Object headerCodec = null;
-        Object header = null;
-		final long initialPos = in.position();
-		try {
-			BufferedInputStream bis = new BufferedInputStream(in);
-			InputStream is = VCFFormat.isGzip(bis) ? new GZIPInputStream(bis) : bis;
-			headerCodec = new VCFCodec().readHeader(new AsciiLineReaderIterator(new AsciiLineReader(is)));
-		} catch (TribbleException e) {
-			logger.warn("Exception while trying to read VCF header from file:", e);
+  private static final Logger logger = LoggerFactory.getLogger(VCFHeaderReader.class);
+
+  public static VCFHeader readHeaderFrom(final SeekableStream in) throws IOException {
+    Object headerCodec = null;
+    Object header = null;
+    final long initialPos = in.position();
+    try {
+      BufferedInputStream bis = new BufferedInputStream(in);
+      InputStream is = VCFFormat.isGzip(bis) ? new GZIPInputStream(bis) : bis;
+      headerCodec = new VCFCodec().readHeader(new AsciiLineReaderIterator(new AsciiLineReader(is)));
+    } catch (TribbleException e) {
+      logger.warn("Exception while trying to read VCF header from file:", e);
 
-			in.seek(initialPos);
+      in.seek(initialPos);
 
-			InputStream bin = new BufferedInputStream(in);
-			if (BlockCompressedInputStream.isValidFile(bin))
-				bin = new BlockCompressedInputStream(bin);
+      InputStream bin = new BufferedInputStream(in);
+      if (BlockCompressedInputStream.isValidFile(bin)) {
+        bin = new BlockCompressedInputStream(bin);
+      }
 
-			headerCodec =
-				new BCF2Codec().readHeader(
-					new PositionalBufferedStream(bin));
-		}
-		if (!(headerCodec instanceof FeatureCodecHeader))
-			throw new IOException("No VCF header found");
-        header = ((FeatureCodecHeader)headerCodec).getHeaderValue();
-		return (VCFHeader)header;
-	}
+      headerCodec = new BCF2Codec().readHeader(new PositionalBufferedStream(bin));
+    }
+    if (!(headerCodec instanceof FeatureCodecHeader)) {
+      throw new IOException("No VCF header found");
+    }
+    header = ((FeatureCodecHeader) headerCodec).getHeaderValue();
+    return (VCFHeader) header;
+  }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/util/WrapSeekable.java b/src/main/java/org/seqdoop/hadoop_bam/util/WrapSeekable.java
index b61827a..c5dcb4f 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/util/WrapSeekable.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/util/WrapSeekable.java
@@ -22,66 +22,81 @@
 
 package org.seqdoop.hadoop_bam.util;
 
-import java.io.InputStream;
+import htsjdk.samtools.seekablestream.SeekableStream;
 import java.io.IOException;
-
+import java.io.InputStream;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.Seekable;
 
-import htsjdk.samtools.seekablestream.SeekableStream;
-
-/** Wraps Hadoop's "seekable stream" abstraction so that we can give such a one
- * to BlockCompressedInputStream and retain seekability.
+/**
+ * Wraps Hadoop's "seekable stream" abstraction so that we can give such a one to
+ * BlockCompressedInputStream and retain seekability.
  *
- * <p>This is necessary because Hadoop and the SAM tools each have their own
- * "seekable stream" abstraction.</p>
+ * <p>This is necessary because Hadoop and the SAM tools each have their own "seekable stream"
+ * abstraction.
  */
-public class WrapSeekable<S extends InputStream & Seekable>
-	extends SeekableStream
-{
-	private final S    stm;
-	private final long len;
-	private final Path path;
-
-	public WrapSeekable(final S s, long length, Path p) {
-		stm  = s;
-		len  = length;
-		path = p;
-	}
-
-	/** A helper for the common use case. */
-	public static WrapSeekable<FSDataInputStream> openPath(
-		FileSystem fs, Path p) throws IOException
-	{
-		return new WrapSeekable<FSDataInputStream>(
-			fs.open(p), fs.getFileStatus(p).getLen(), p);
-	}
-	public static WrapSeekable<FSDataInputStream> openPath(
-		Configuration conf, Path path) throws IOException
-	{
-		return openPath(path.getFileSystem(conf), path);
-	}
-
-	@Override public String getSource() { return path.toString(); }
-	@Override public long   length   () { return len; }
-
-	@Override public long position() throws IOException { return stm.getPos(); }
-	@Override public void    close() throws IOException { stm.close(); }
-	@Override public boolean eof  () throws IOException {
-		return stm.getPos() == length();
-	}
-	@Override public void seek(long pos) throws IOException {
-		stm.seek(pos);
-	}
-	@Override public int read() throws IOException {
-		return stm.read();
-	}
-	@Override public int read(byte[] buf, int offset, int len)
-		throws IOException
-	{
-		return stm.read(buf, offset, len);
-	}
+public class WrapSeekable<S extends InputStream & Seekable> extends SeekableStream {
+
+  private final S stm;
+  private final long len;
+  private final Path path;
+
+  public WrapSeekable(final S s, long length, Path p) {
+    stm = s;
+    len = length;
+    path = p;
+  }
+
+  /** A helper for the common use case. */
+  public static WrapSeekable<FSDataInputStream> openPath(FileSystem fs, Path p) throws IOException {
+    return new WrapSeekable<FSDataInputStream>(fs.open(p), fs.getFileStatus(p).getLen(), p);
+  }
+
+  public static WrapSeekable<FSDataInputStream> openPath(Configuration conf, Path path)
+      throws IOException {
+    return openPath(path.getFileSystem(conf), path);
+  }
+
+  @Override
+  public String getSource() {
+    return path.toString();
+  }
+
+  @Override
+  public long length() {
+    return len;
+  }
+
+  @Override
+  public long position() throws IOException {
+    return stm.getPos();
+  }
+
+  @Override
+  public void close() throws IOException {
+    stm.close();
+  }
+
+  @Override
+  public boolean eof() throws IOException {
+    return stm.getPos() == length();
+  }
+
+  @Override
+  public void seek(long pos) throws IOException {
+    stm.seek(pos);
+  }
+
+  @Override
+  public int read() throws IOException {
+    return stm.read();
+  }
+
+  @Override
+  public int read(byte[] buf, int offset, int len) throws IOException {
+    return stm.read(buf, offset, len);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/BAMTestUtil.java b/src/test/java/org/seqdoop/hadoop_bam/BAMTestUtil.java
index dda53a1..a179325 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/BAMTestUtil.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/BAMTestUtil.java
@@ -13,6 +13,7 @@
 import java.io.IOException;
 
 class BAMTestUtil {
+
   public static File writeBamFile(int numPairs, SAMFileHeader.SortOrder sortOrder)
       throws IOException {
     // file will be both queryname and coordinate sorted, so use one or the other
@@ -22,31 +23,26 @@ public static File writeBamFile(int numPairs, SAMFileHeader.SortOrder sortOrder)
       int start1 = (i + 1) * 1000;
       int start2 = start1 + 100;
       if (i == 5) { // add two unmapped fragments instead of a mapped pair
-        samRecordSetBuilder.addFrag(String.format("test-read-%03d-1", i), chr, start1,
-            false, true, null,
-            null,
-            -1, false);
-        samRecordSetBuilder.addFrag(String.format("test-read-%03d-2", i), chr, start2,
-            false, true, null,
-            null,
-            -1, false);
+        samRecordSetBuilder.addFrag(
+            String.format("test-read-%03d-1", i), chr, start1, false, true, null, null, -1, false);
+        samRecordSetBuilder.addFrag(
+            String.format("test-read-%03d-2", i), chr, start2, false, true, null, null, -1, false);
       } else {
-        samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1,
-            start2);
+        samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1, start2);
       }
     }
     if (numPairs > 0) { // add two unplaced unmapped fragments if non-empty
-      samRecordSetBuilder.addUnmappedFragment(String.format
-          ("test-read-%03d-unplaced-unmapped", numPairs++));
-      samRecordSetBuilder.addUnmappedFragment(String.format
-          ("test-read-%03d-unplaced-unmapped", numPairs++));
+      samRecordSetBuilder.addUnmappedFragment(
+          String.format("test-read-%03d-unplaced-unmapped", numPairs++));
+      samRecordSetBuilder.addUnmappedFragment(
+          String.format("test-read-%03d-unplaced-unmapped", numPairs++));
     }
 
     final File bamFile = File.createTempFile("test", ".bam");
     bamFile.deleteOnExit();
     SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
-    final SAMFileWriter bamWriter = new SAMFileWriterFactory()
-        .makeSAMOrBAMWriter(samHeader, true, bamFile);
+    final SAMFileWriter bamWriter =
+        new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, true, bamFile);
     for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
       bamWriter.addAlignment(rec);
     }
@@ -54,11 +50,13 @@ public static File writeBamFile(int numPairs, SAMFileHeader.SortOrder sortOrder)
 
     // create BAM index
     if (sortOrder.equals(SAMFileHeader.SortOrder.coordinate)) {
-      SamReader samReader = SamReaderFactory.makeDefault()
-          .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
-          .open(bamFile);
-      BAMIndexer.createIndex(samReader, new File(bamFile.getAbsolutePath()
-          .replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix)));
+      SamReader samReader =
+          SamReaderFactory.makeDefault()
+              .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
+              .open(bamFile);
+      BAMIndexer.createIndex(
+          samReader,
+          new File(bamFile.getAbsolutePath().replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix)));
     }
 
     return bamFile;
@@ -71,8 +69,7 @@ public static File writeBamFileWithLargeHeader() throws IOException {
       int chr = 20;
       int start1 = (i + 1) * 1000;
       int start2 = start1 + 100;
-      samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1,
-          start2);
+      samRecordSetBuilder.addPair(String.format("test-read-%03d", i), chr, start1, start2);
     }
 
     final File bamFile = File.createTempFile("test", ".bam");
@@ -80,11 +77,12 @@ public static File writeBamFileWithLargeHeader() throws IOException {
     SAMFileHeader samHeader = samRecordSetBuilder.getHeader();
     StringBuffer sb = new StringBuffer();
     for (int i = 0; i < 1000000; i++) {
-      sb.append("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
+      sb.append(
+          "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789");
     }
     samHeader.addComment(sb.toString());
-    final SAMFileWriter bamWriter = new SAMFileWriterFactory()
-        .makeSAMOrBAMWriter(samHeader, true, bamFile);
+    final SAMFileWriter bamWriter =
+        new SAMFileWriterFactory().makeSAMOrBAMWriter(samHeader, true, bamFile);
     for (final SAMRecord rec : samRecordSetBuilder.getRecords()) {
       bamWriter.addAlignment(rec);
     }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/IntervalUtilTest.java b/src/test/java/org/seqdoop/hadoop_bam/IntervalUtilTest.java
index 52da0c6..59baa37 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/IntervalUtilTest.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/IntervalUtilTest.java
@@ -1,75 +1,74 @@
 package org.seqdoop.hadoop_bam;
 
 import htsjdk.samtools.util.Interval;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
 import org.apache.hadoop.conf.Configuration;
 import org.junit.Assert;
 import org.junit.Test;
 import org.seqdoop.hadoop_bam.util.IntervalUtil;
 
-import java.util.List;
-import java.util.stream.Collectors;
-import java.util.stream.Stream;
-
-/**
- * Unit tests for {@link IntervalUtil}.
- */
+/** Unit tests for {@link IntervalUtil}. */
 public class IntervalUtilTest {
 
-    @Test
-    public void testInvalidIntervals() {
-        final String[] invalidIntervals = {
-                "chr1", // full sequence interval are not allowed.
-                "chr1:12", // single position omitting stop is not allowed.
-                "chr1,chr2:121-123", // , are not allowed anywhere
-                "chr20:1,100-3,400", // ,   "             "
-                "MT:35+", // , until end of contig + is not allowed.
-                "MT:13-31-1112", // too many positions.
-                "MT:-2112", // forgot the start position!
-                " MT : 113 - 1245" // blanks are not allowed either.
-        };
-        for (final String interval : invalidIntervals) {
-            final Configuration conf = new Configuration();
-            conf.set("prop-name", interval);
-            try {
-                IntervalUtil.getIntervals(conf, "prop-name");
-                Assert.fail("expected an exception when dealing with '" + interval + "'");
-            } catch (final FormatException ex) {
-                // fine.
-            }
-        }
+  @Test
+  public void testInvalidIntervals() {
+    final String[] invalidIntervals = {
+      "chr1", // full sequence interval are not allowed.
+      "chr1:12", // single position omitting stop is not allowed.
+      "chr1,chr2:121-123", // , are not allowed anywhere
+      "chr20:1,100-3,400", // ,   "             "
+      "MT:35+", // , until end of contig + is not allowed.
+      "MT:13-31-1112", // too many positions.
+      "MT:-2112", // forgot the start position!
+      " MT : 113 - 1245" // blanks are not allowed either.
+    };
+    for (final String interval : invalidIntervals) {
+      final Configuration conf = new Configuration();
+      conf.set("prop-name", interval);
+      try {
+        IntervalUtil.getIntervals(conf, "prop-name");
+        Assert.fail("expected an exception when dealing with '" + interval + "'");
+      } catch (final FormatException ex) {
+        // fine.
+      }
     }
+  }
 
-    @Test
-    public void testValidIntervals() {
-        final Object[][] validIntervals = {
-                {"chr1:1-343", "chr1", 1, 343}, // standard 'chr' starting contig interval.
-                {"chr20_Un:31-145", "chr20_Un", 31, 145}, // standard chromosome name containing underscore.
-                {"X:31-145", "X", 31, 145}, // standard 'X' chromosome interval.
-                {"10:45000012-678901123", "10", 45000012, 678901123},  // standard number starting chromosome name interval.
-                {"HLA-DQA1*01:01:02:134-14151", "HLA-DQA1*01:01:02", 134, 14151}}; // example of a Hg38 assembly
-                                                                                   // HLA contigs including - and : in their names.
+  @Test
+  public void testValidIntervals() {
+    final Object[][] validIntervals = {
+      {"chr1:1-343", "chr1", 1, 343}, // standard 'chr' starting contig interval.
+      {"chr20_Un:31-145", "chr20_Un", 31, 145}, // standard chromosome name containing underscore.
+      {"X:31-145", "X", 31, 145}, // standard 'X' chromosome interval.
+      {"10:45000012-678901123", "10", 45000012, 678901123},
+      // standard number starting chromosome name interval.
+      {"HLA-DQA1*01:01:02:134-14151", "HLA-DQA1*01:01:02", 134, 14151}
+    }; // example of a Hg38 assembly
+    // HLA contigs including - and : in their names.
 
-        final Configuration conf = new Configuration();
+    final Configuration conf = new Configuration();
 
-        Assert.assertNull(IntervalUtil.getIntervals(conf, "prop-name"));
+    Assert.assertNull(IntervalUtil.getIntervals(conf, "prop-name"));
 
-        conf.set("prop-name", "");
+    conf.set("prop-name", "");
 
-        Assert.assertNotNull(IntervalUtil.getIntervals(conf, "prop-name"));
-        Assert.assertTrue(IntervalUtil.getIntervals(conf, "prop-name").isEmpty());
+    Assert.assertNotNull(IntervalUtil.getIntervals(conf, "prop-name"));
+    Assert.assertTrue(IntervalUtil.getIntervals(conf, "prop-name").isEmpty());
 
-        conf.set("prop-name", Stream.of(validIntervals)
-                .map(o -> (String) o[0]).collect(Collectors.joining(",")));
+    conf.set(
+        "prop-name",
+        Stream.of(validIntervals).map(o -> (String) o[0]).collect(Collectors.joining(",")));
 
-        final List<Interval> allIntervals = IntervalUtil.getIntervals(conf, "prop-name");
-        Assert.assertNotNull(allIntervals);
-        Assert.assertEquals(allIntervals.size(), validIntervals.length);
-        for (int i = 0; i < validIntervals.length; i++) {
-            Assert.assertNotNull(allIntervals.get(i));
-            Assert.assertEquals(allIntervals.get(i).getContig(), validIntervals[i][1]);
-            Assert.assertEquals(allIntervals.get(i).getStart(), validIntervals[i][2]);
-            Assert.assertEquals(allIntervals.get(i).getEnd(), validIntervals[i][3]);
-        }
+    final List<Interval> allIntervals = IntervalUtil.getIntervals(conf, "prop-name");
+    Assert.assertNotNull(allIntervals);
+    Assert.assertEquals(allIntervals.size(), validIntervals.length);
+    for (int i = 0; i < validIntervals.length; i++) {
+      Assert.assertNotNull(allIntervals.get(i));
+      Assert.assertEquals(allIntervals.get(i).getContig(), validIntervals[i][1]);
+      Assert.assertEquals(allIntervals.get(i).getStart(), validIntervals[i][2]);
+      Assert.assertEquals(allIntervals.get(i).getEnd(), validIntervals[i][3]);
     }
-
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestAnySAMInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestAnySAMInputFormat.java
index b3b220a..7fb4221 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestAnySAMInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestAnySAMInputFormat.java
@@ -1,33 +1,34 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.fs.PathNotFoundException;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-
 public class TestAnySAMInputFormat {
 
-    @Test
-    public void testHeaderlessSamFormat() throws PathNotFoundException {
-        final SAMFormat result = getSamFormat(new Configuration(), "test_headerless.sam");
-        assertEquals(SAMFormat.SAM, result);
-    }
+  @Test
+  public void testHeaderlessSamFormat() throws PathNotFoundException {
+    final SAMFormat result = getSamFormat(new Configuration(), "test_headerless.sam");
+    assertEquals(SAMFormat.SAM, result);
+  }
 
-    @Test
-    public void testTrustExtensionsIsHonored() throws PathNotFoundException {
-        final Configuration conf = new Configuration();
-        //default to trusting exceptions
-        assertEquals(SAMFormat.SAM, getSamFormat(conf, "misnamedBam.sam"));
+  @Test
+  public void testTrustExtensionsIsHonored() throws PathNotFoundException {
+    final Configuration conf = new Configuration();
+    // default to trusting exceptions
+    assertEquals(SAMFormat.SAM, getSamFormat(conf, "misnamedBam.sam"));
 
-        conf.set(AnySAMInputFormat.TRUST_EXTS_PROPERTY, "false");
-        final SAMFormat result = getSamFormat(conf, "misnamedBam.sam");
-        assertEquals(SAMFormat.BAM, result);
-    }
+    conf.set(AnySAMInputFormat.TRUST_EXTS_PROPERTY, "false");
+    final SAMFormat result = getSamFormat(conf, "misnamedBam.sam");
+    assertEquals(SAMFormat.BAM, result);
+  }
 
-    private SAMFormat getSamFormat(final Configuration conf, final String file) throws PathNotFoundException {
-        final String filePath = getClass().getClassLoader().getResource(file).getFile();
-        return new AnySAMInputFormat(conf).getFormat(new Path(filePath));
-    }
+  private SAMFormat getSamFormat(final Configuration conf, final String file)
+      throws PathNotFoundException {
+    final String filePath = getClass().getClassLoader().getResource(file).getFile();
+    return new AnySAMInputFormat(conf).getFormat(new Path(filePath));
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestBAMInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestBAMInputFormat.java
index ed7f570..f79cceb 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestBAMInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestBAMInputFormat.java
@@ -1,5 +1,9 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
 import htsjdk.samtools.BAMIndex;
 import htsjdk.samtools.SAMFileHeader;
 import htsjdk.samtools.SAMRecord;
@@ -19,11 +23,8 @@
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 public class TestBAMInputFormat {
+
   private String input;
   private TaskAttemptContext taskAttemptContext;
   private JobContext jobContext;
@@ -36,13 +37,13 @@ private void completeSetupWithIntervals(List<Interval> intervals) {
     completeSetupWithBoundedTraversal(intervals, false);
   }
 
-  private void completeSetupWithBoundedTraversal(List<Interval> intervals, boolean
-      traverseUnplacedUnmapped) {
+  private void completeSetupWithBoundedTraversal(
+      List<Interval> intervals, boolean traverseUnplacedUnmapped) {
     completeSetup(true, intervals, traverseUnplacedUnmapped);
   }
 
-  private void completeSetup(boolean boundedTraversal, List<Interval> intervals, boolean
-      traverseUnplacedUnmapped) {
+  private void completeSetup(
+      boolean boundedTraversal, List<Interval> intervals, boolean traverseUnplacedUnmapped) {
     Configuration conf = new Configuration();
     conf.set("mapred.input.dir", "file://" + input);
     if (boundedTraversal) {
@@ -63,8 +64,7 @@ public void testNoReadsInFirstSplitBug() throws Exception {
 
   @Test
   public void testMultipleSplits() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.queryname)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.queryname).getAbsolutePath();
     completeSetup();
     jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
     BAMInputFormat inputFormat = new BAMInputFormat();
@@ -83,8 +83,7 @@ public void testMultipleSplits() throws Exception {
 
   @Test
   public void testMultipleSplitsBaiEnabled() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate).getAbsolutePath();
     completeSetup();
     BAMInputFormat.setEnableBAISplitCalculator(jobContext.getConfiguration(), true);
     jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
@@ -101,8 +100,7 @@ public void testMultipleSplitsBaiEnabled() throws Exception {
 
   @Test
   public void testMultipleSplitsBaiEnabledSuffixPath() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate).getAbsolutePath();
     File index = new File(input.replaceFirst("\\.bam$", BAMIndex.BAMIndexSuffix));
     index.renameTo(new File(input + BAMIndex.BAMIndexSuffix));
     completeSetup();
@@ -121,8 +119,7 @@ public void testMultipleSplitsBaiEnabledSuffixPath() throws Exception {
 
   @Test
   public void testMultipleSplitsBaiEnabledNoIndex() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.queryname)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.queryname).getAbsolutePath();
     completeSetup();
     BAMInputFormat.setEnableBAISplitCalculator(jobContext.getConfiguration(), true);
     jobContext.getConfiguration().setInt(FileInputFormat.SPLIT_MAXSIZE, 40000);
@@ -142,8 +139,7 @@ public void testMultipleSplitsBaiEnabledNoIndex() throws Exception {
 
   @Test
   public void testIntervals() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate).getAbsolutePath();
     List<Interval> intervals = new ArrayList<Interval>();
     intervals.add(new Interval("chr21", 5000, 9999)); // includes two unpaired fragments
     intervals.add(new Interval("chr21", 20000, 22999));
@@ -160,8 +156,7 @@ public void testIntervals() throws Exception {
 
   @Test
   public void testIntervalCoveringWholeChromosome() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate).getAbsolutePath();
     List<Interval> intervals = new ArrayList<Interval>();
     intervals.add(new Interval("chr21", 1, 1000135));
 
@@ -179,8 +174,7 @@ public void testIntervalCoveringWholeChromosome() throws Exception {
 
   @Test
   public void testIntervalsAndUnmapped() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate).getAbsolutePath();
     List<Interval> intervals = new ArrayList<Interval>();
     intervals.add(new Interval("chr21", 5000, 9999)); // includes two unpaired fragments
     intervals.add(new Interval("chr21", 20000, 22999));
@@ -199,8 +193,7 @@ public void testIntervalsAndUnmapped() throws Exception {
 
   @Test
   public void testUnmapped() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate).getAbsolutePath();
 
     completeSetupWithBoundedTraversal(null, true);
 
@@ -214,17 +207,16 @@ public void testUnmapped() throws Exception {
 
   @Test(expected = IllegalArgumentException.class)
   public void testMappedOnly() throws Exception {
-    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate)
-        .getAbsolutePath();
+    input = BAMTestUtil.writeBamFile(1000, SAMFileHeader.SortOrder.coordinate).getAbsolutePath();
 
     // Mapped only (-XL unmapped) is currently unsupported and throws an exception.
     completeSetupWithBoundedTraversal(null, false);
   }
 
-  private List<SAMRecord> getSAMRecordsFromSplit(BAMInputFormat inputFormat,
-      InputSplit split) throws Exception {
-    RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat
-        .createRecordReader(split, taskAttemptContext);
+  private List<SAMRecord> getSAMRecordsFromSplit(BAMInputFormat inputFormat, InputSplit split)
+      throws Exception {
+    RecordReader<LongWritable, SAMRecordWritable> reader =
+        inputFormat.createRecordReader(split, taskAttemptContext);
     reader.initialize(split, taskAttemptContext);
 
     List<SAMRecord> records = new ArrayList<SAMRecord>();
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestBAMOutputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestBAMOutputFormat.java
index 357cec2..8251660 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestBAMOutputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestBAMOutputFormat.java
@@ -1,11 +1,17 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
 import htsjdk.samtools.*;
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.BlockCompressedStreamConstants;
+import java.io.*;
 import java.nio.file.Files;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Iterator;
 import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
@@ -23,317 +29,303 @@
 import org.junit.Test;
 import org.seqdoop.hadoop_bam.util.SAMFileMerger;
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
-
-import java.io.*;
-import java.util.Iterator;
 import org.seqdoop.hadoop_bam.util.SAMOutputPreparer;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 public class TestBAMOutputFormat {
-    private String testBAMFileName;
-
-    private int expectedRecordCount;
-    private SAMFileHeader samFileHeader;
-
-    private TaskAttemptContext taskAttemptContext;
-    static private Configuration conf;
-
-    // BAM output class that writes a header before records
-    static class BAMTestWithHeaderOutputFormat
-            extends KeyIgnoringBAMOutputFormat<NullWritable> {
-        public final static String READ_HEADER_FROM_FILE = "TestBAM.header";
-
-        @Override
-        public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
-                TaskAttemptContext ctx,
-                Path outputPath) throws IOException {
-            readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
-            setWriteHeader(true);
-            return super.getRecordWriter(ctx, outputPath);
-        }
-    }
 
-    // BAM output class that doesn't write a header before records
-    static class BAMTestNoHeaderOutputFormat
-            extends KeyIgnoringBAMOutputFormat<NullWritable> {
-        public final static String READ_HEADER_FROM_FILE = "TestBAM.header";
-
-        @Override
-        public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
-                TaskAttemptContext ctx,
-                Path outputPath) throws IOException {
-            // the writers require a header in order to create a codec, even if
-            // the header isn't being written out
-            readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
-            setWriteHeader(false);
-            return super.getRecordWriter(ctx, outputPath);
-        }
-    }
+  private static Configuration conf;
+  private String testBAMFileName;
+  private int expectedRecordCount;
+  private SAMFileHeader samFileHeader;
+  private TaskAttemptContext taskAttemptContext;
 
-    @Before
-    public void setup() throws Exception {
-        conf = new Configuration();
+  @Before
+  public void setup() throws Exception {
+    conf = new Configuration();
 
-        testBAMFileName = ClassLoader.getSystemClassLoader()
-                                .getResource("test.bam").getFile();
+    testBAMFileName = ClassLoader.getSystemClassLoader().getResource("test.bam").getFile();
 
-        conf.set("mapred.input.dir", "file://" + testBAMFileName);
+    conf.set("mapred.input.dir", "file://" + testBAMFileName);
 
-        // fetch the SAMFile header from the original input to get the expected count
-        expectedRecordCount = getBAMRecordCount(new File(testBAMFileName));
-        samFileHeader = SAMHeaderReader.readSAMHeaderFrom(new Path(testBAMFileName), conf);
+    // fetch the SAMFile header from the original input to get the expected count
+    expectedRecordCount = getBAMRecordCount(new File(testBAMFileName));
+    samFileHeader = SAMHeaderReader.readSAMHeaderFrom(new Path(testBAMFileName), conf);
 
-        taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
-    }
+    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
+  }
 
-    @Test
-    public void testBAMRecordWriterNoHeader() throws Exception {
-        final File outFile = File.createTempFile("testBAMWriter", ".bam");
-        outFile.deleteOnExit();
-        final Path outPath = new Path(outFile.toURI());
-
-        final BAMTestNoHeaderOutputFormat bamOut = new BAMTestNoHeaderOutputFormat();
-        conf.set(BAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, testBAMFileName);
-        bamOut.setWriteHeader(false);
-
-        RecordWriter<NullWritable, SAMRecordWritable> rw =
-                bamOut.getRecordWriter(taskAttemptContext, outPath);
-
-        final SamReader samReader = SamReaderFactory.makeDefault()
-                .open(new File(testBAMFileName));
-
-        for (final SAMRecord r : samReader) {
-            final SAMRecordWritable samRW = new SAMRecordWritable();
-            samRW.set(r);
-            rw.write(null, samRW);
-        }
-        samReader.close();
-        rw.close(taskAttemptContext);
-
-        // now verify the output
-        final int actualCount = getBAMRecordCount(outFile, samFileHeader);
-        assertEquals(expectedRecordCount, actualCount);
-    }
+  @Test
+  public void testBAMRecordWriterNoHeader() throws Exception {
+    final File outFile = File.createTempFile("testBAMWriter", ".bam");
+    outFile.deleteOnExit();
+    final Path outPath = new Path(outFile.toURI());
 
-    @Test
-    public void testBAMRecordWriterWithHeader() throws Exception {
-        final File outFile = File.createTempFile("testBAMWriter", ".bam");
-        outFile.deleteOnExit();
-        final Path outPath = new Path(outFile.toURI());
-
-        final BAMTestWithHeaderOutputFormat bamOut = new BAMTestWithHeaderOutputFormat();
-        conf.set(BAMTestWithHeaderOutputFormat.READ_HEADER_FROM_FILE, testBAMFileName);
-        bamOut.setWriteHeader(false);
-
-        RecordWriter<NullWritable, SAMRecordWritable> rw =
-                bamOut.getRecordWriter(taskAttemptContext, outPath);
-
-        final SamReader samReader = SamReaderFactory.makeDefault()
-                .open(new File(testBAMFileName));
-
-        for (final SAMRecord r : samReader) {
-            final SAMRecordWritable samRW = new SAMRecordWritable();
-            samRW.set(r);
-            rw.write(null, samRW);
-        }
-        samReader.close();
-        rw.close(taskAttemptContext);
-
-        // now verify the output
-        final int actualCount = getBAMRecordCount(outFile);
-        assertEquals(expectedRecordCount, actualCount);
-    }
+    final BAMTestNoHeaderOutputFormat bamOut = new BAMTestNoHeaderOutputFormat();
+    conf.set(BAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, testBAMFileName);
+    bamOut.setWriteHeader(false);
 
-    @Test
-    public void testBAMOutput() throws Exception {
-        final Path outputPath = doMapReduce(testBAMFileName);
-        final File outFile = File.createTempFile("testBAMWriter", ".bam");
-        outFile.deleteOnExit();
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.BAM, samFileHeader);
-        final int actualCount = getBAMRecordCount(outFile);
-        assertEquals(expectedRecordCount, actualCount);
-    }
+    RecordWriter<NullWritable, SAMRecordWritable> rw =
+        bamOut.getRecordWriter(taskAttemptContext, outPath);
 
-    @Test
-    public void testEmptyBAM() throws Exception {
-        String bam = BAMTestUtil.writeBamFile(0,
-            SAMFileHeader.SortOrder.coordinate).toURI().toString();
-        conf.setBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, true);
-        final Path outputPath = doMapReduce(bam);
-        final File outFile = File.createTempFile("testBAMWriter", ".bam");
-        outFile.deleteOnExit();
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.BAM, new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate).getHeader());
-        final int actualCount = getBAMRecordCount(outFile);
-        assertEquals(0, actualCount);
-    }
+    final SamReader samReader = SamReaderFactory.makeDefault().open(new File(testBAMFileName));
 
-    @Test
-    public void testBAMWithSplittingBai() throws Exception {
-        int numPairs = 20000;
-        // create a large BAM with lots of index points
-        String bam = BAMTestUtil.writeBamFile(20000,
-            SAMFileHeader.SortOrder.coordinate).toURI().toString();
-        conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 800000); // force multiple parts
-        conf.setBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, true);
-        final Path outputPath = doMapReduce(bam);
-
-        List<SAMRecord> recordsAtSplits = new ArrayList<>();
-        File[] splittingIndexes = new File(outputPath.toUri()).listFiles(pathname -> {
-            return pathname.getName().endsWith(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
-        });
-        Arrays.sort(splittingIndexes); // ensure files are sorted by name
-        for (File file : splittingIndexes) {
-            File bamFile = new File(file.getParentFile(),
-                file.getName().replace(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION, ""));
-            SplittingBAMIndex index = new SplittingBAMIndex(file);
-            recordsAtSplits.addAll(getRecordsAtSplits(bamFile, index));
-        }
-
-        final File outFile = File.createTempFile("testBAMWriter", ".bam");
-        //outFile.deleteOnExit();
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.BAM,
-            new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate).getHeader());
-
-        final int actualCount = getBAMRecordCount(outFile);
-        assertEquals(numPairs * 2 + 2, actualCount); // 2 unmapped reads
-
-        File splittingBai = new File(outFile.getParentFile(), outFile.getName() +
-            SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
-        SplittingBAMIndex splittingBAMIndex = new SplittingBAMIndex(splittingBai);
-
-        assertEquals(recordsAtSplits, getRecordsAtSplits(outFile, splittingBAMIndex));
+    for (final SAMRecord r : samReader) {
+      final SAMRecordWritable samRW = new SAMRecordWritable();
+      samRW.set(r);
+      rw.write(null, samRW);
     }
+    samReader.close();
+    rw.close(taskAttemptContext);
 
-    private List<SAMRecord> getRecordsAtSplits(File bam, SplittingBAMIndex index) throws IOException {
-        List<SAMRecord> records = new ArrayList<>();
-        BAMRecordCodec codec = new BAMRecordCodec(samFileHeader);
-        BlockCompressedInputStream bci = new BlockCompressedInputStream(bam);
-        codec.setInputStream(bci);
-        for (Long offset : index.getVirtualOffsets()) {
-            bci.seek(offset);
-            SAMRecord record = codec.decode();
-            if (record != null) {
-                records.add(record);
-            }
-        }
-        return records;
-    }
+    // now verify the output
+    final int actualCount = getBAMRecordCount(outFile, samFileHeader);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  @Test
+  public void testBAMRecordWriterWithHeader() throws Exception {
+    final File outFile = File.createTempFile("testBAMWriter", ".bam");
+    outFile.deleteOnExit();
+    final Path outPath = new Path(outFile.toURI());
 
-    @Test
-    public void testBAMRoundTrip() throws Exception {
-        // run a m/r job to write out a bam file
-        Path outputPath = doMapReduce(testBAMFileName);
+    final BAMTestWithHeaderOutputFormat bamOut = new BAMTestWithHeaderOutputFormat();
+    conf.set(BAMTestWithHeaderOutputFormat.READ_HEADER_FROM_FILE, testBAMFileName);
+    bamOut.setWriteHeader(false);
 
-        // merge the parts, and write to a temp file
-        final File outFile = File.createTempFile("testBAMWriter", ".bam");
-        outFile.deleteOnExit();
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.BAM, samFileHeader);
+    RecordWriter<NullWritable, SAMRecordWritable> rw =
+        bamOut.getRecordWriter(taskAttemptContext, outPath);
 
-        // now use the assembled output as m/r input
-        outputPath = doMapReduce(outFile.getAbsolutePath());
+    final SamReader samReader = SamReaderFactory.makeDefault().open(new File(testBAMFileName));
 
-        // merge the parts again
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.BAM, samFileHeader);
+    for (final SAMRecord r : samReader) {
+      final SAMRecordWritable samRW = new SAMRecordWritable();
+      samRW.set(r);
+      rw.write(null, samRW);
+    }
+    samReader.close();
+    rw.close(taskAttemptContext);
+
+    // now verify the output
+    final int actualCount = getBAMRecordCount(outFile);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  @Test
+  public void testBAMOutput() throws Exception {
+    final Path outputPath = doMapReduce(testBAMFileName);
+    final File outFile = File.createTempFile("testBAMWriter", ".bam");
+    outFile.deleteOnExit();
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(), outFile.toURI().toString(), SAMFormat.BAM, samFileHeader);
+    final int actualCount = getBAMRecordCount(outFile);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  @Test
+  public void testEmptyBAM() throws Exception {
+    String bam = BAMTestUtil.writeBamFile(0, SAMFileHeader.SortOrder.coordinate).toURI().toString();
+    conf.setBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, true);
+    final Path outputPath = doMapReduce(bam);
+    final File outFile = File.createTempFile("testBAMWriter", ".bam");
+    outFile.deleteOnExit();
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(),
+        outFile.toURI().toString(),
+        SAMFormat.BAM,
+        new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate).getHeader());
+    final int actualCount = getBAMRecordCount(outFile);
+    assertEquals(0, actualCount);
+  }
+
+  @Test
+  public void testBAMWithSplittingBai() throws Exception {
+    int numPairs = 20000;
+    // create a large BAM with lots of index points
+    String bam =
+        BAMTestUtil.writeBamFile(20000, SAMFileHeader.SortOrder.coordinate).toURI().toString();
+    conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 800000); // force multiple parts
+    conf.setBoolean(BAMOutputFormat.WRITE_SPLITTING_BAI, true);
+    final Path outputPath = doMapReduce(bam);
+
+    List<SAMRecord> recordsAtSplits = new ArrayList<>();
+    File[] splittingIndexes =
+        new File(outputPath.toUri())
+            .listFiles(
+                pathname -> {
+                  return pathname.getName().endsWith(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
+                });
+    Arrays.sort(splittingIndexes); // ensure files are sorted by name
+    for (File file : splittingIndexes) {
+      File bamFile =
+          new File(
+              file.getParentFile(),
+              file.getName().replace(SplittingBAMIndexer.OUTPUT_FILE_EXTENSION, ""));
+      SplittingBAMIndex index = new SplittingBAMIndex(file);
+      recordsAtSplits.addAll(getRecordsAtSplits(bamFile, index));
+    }
 
-        // verify the final output
-        final int actualCount = getBAMRecordCount(outFile);
-        assertEquals(expectedRecordCount, actualCount);
+    final File outFile = File.createTempFile("testBAMWriter", ".bam");
+    // outFile.deleteOnExit();
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(),
+        outFile.toURI().toString(),
+        SAMFormat.BAM,
+        new SAMRecordSetBuilder(true, SAMFileHeader.SortOrder.coordinate).getHeader());
+
+    final int actualCount = getBAMRecordCount(outFile);
+    assertEquals(numPairs * 2 + 2, actualCount); // 2 unmapped reads
+
+    File splittingBai =
+        new File(
+            outFile.getParentFile(), outFile.getName() + SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
+    SplittingBAMIndex splittingBAMIndex = new SplittingBAMIndex(splittingBai);
+
+    assertEquals(recordsAtSplits, getRecordsAtSplits(outFile, splittingBAMIndex));
+  }
+
+  private List<SAMRecord> getRecordsAtSplits(File bam, SplittingBAMIndex index) throws IOException {
+    List<SAMRecord> records = new ArrayList<>();
+    BAMRecordCodec codec = new BAMRecordCodec(samFileHeader);
+    BlockCompressedInputStream bci = new BlockCompressedInputStream(bam);
+    codec.setInputStream(bci);
+    for (Long offset : index.getVirtualOffsets()) {
+      bci.seek(offset);
+      SAMRecord record = codec.decode();
+      if (record != null) {
+        records.add(record);
+      }
+    }
+    return records;
+  }
+
+  @Test
+  public void testBAMRoundTrip() throws Exception {
+    // run a m/r job to write out a bam file
+    Path outputPath = doMapReduce(testBAMFileName);
+
+    // merge the parts, and write to a temp file
+    final File outFile = File.createTempFile("testBAMWriter", ".bam");
+    outFile.deleteOnExit();
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(), outFile.toURI().toString(), SAMFormat.BAM, samFileHeader);
+
+    // now use the assembled output as m/r input
+    outputPath = doMapReduce(outFile.getAbsolutePath());
+
+    // merge the parts again
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(), outFile.toURI().toString(), SAMFormat.BAM, samFileHeader);
+
+    // verify the final output
+    final int actualCount = getBAMRecordCount(outFile);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  private Path doMapReduce(final String inputFile) throws Exception {
+    final FileSystem fileSystem = FileSystem.get(conf);
+    final Path inputPath = new Path(inputFile);
+    final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
+    fileSystem.delete(outputPath, true);
+
+    final Job job = Job.getInstance(conf);
+    FileInputFormat.setInputPaths(job, inputPath);
+
+    conf.set(BAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile);
+    job.setInputFormatClass(BAMInputFormat.class);
+    job.setMapOutputKeyClass(LongWritable.class);
+    job.setMapOutputValueClass(SAMRecordWritable.class);
+
+    job.setOutputFormatClass(BAMTestNoHeaderOutputFormat.class);
+    job.setOutputKeyClass(LongWritable.class);
+    job.setOutputValueClass(SAMRecordWritable.class);
+
+    job.setNumReduceTasks(0);
+    FileOutputFormat.setOutputPath(job, outputPath);
+
+    final boolean success = job.waitForCompletion(true);
+    assertTrue(success);
+
+    return outputPath;
+  }
+
+  private int getBAMRecordCount(final File bamFile) throws IOException {
+    final SamReader bamReader = SamReaderFactory.makeDefault().open(SamInputResource.of(bamFile));
+    final Iterator<SAMRecord> it = bamReader.iterator();
+    int recCount = 0;
+    while (it.hasNext()) {
+      it.next();
+      recCount++;
+    }
+    bamReader.close();
+    return recCount;
+  }
+
+  private int getBAMRecordCount(final File blockStreamFile, final SAMFileHeader header)
+      throws IOException {
+    // assemble a proper BAM file from the block stream shard(s) in
+    // order to verify the contents
+    final ByteArrayInputStream mergedStream = mergeBAMBlockStream(blockStreamFile, header);
+
+    // now we can verify that we can read everything back in
+    final SamReader resultBAMReader =
+        SamReaderFactory.makeDefault().open(SamInputResource.of(mergedStream));
+    final Iterator<SAMRecord> it = resultBAMReader.iterator();
+    int actualCount = 0;
+    while (it.hasNext()) {
+      it.next();
+      actualCount++;
     }
+    return actualCount;
+  }
 
-    private Path doMapReduce(final String inputFile) throws Exception {
-        final FileSystem fileSystem = FileSystem.get(conf);
-        final Path inputPath = new Path(inputFile);
-        final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
-        fileSystem.delete(outputPath, true);
+  private ByteArrayInputStream mergeBAMBlockStream(
+      final File blockStreamFile, final SAMFileHeader header) throws IOException {
+    // assemble a proper BAM file from the block stream shard(s) in
+    // order to verify the contents
+    final ByteArrayOutputStream bamOutputStream = new ByteArrayOutputStream();
 
-        final Job job = Job.getInstance(conf);
-        FileInputFormat.setInputPaths(job, inputPath);
+    // write out the bam file header
+    new SAMOutputPreparer().prepareForRecords(bamOutputStream, SAMFormat.BAM, header);
 
-        conf.set(BAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile);
-        job.setInputFormatClass(BAMInputFormat.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(SAMRecordWritable.class);
+    // copy the contents of the block shard(s) written out by the M/R job
+    final ByteArrayOutputStream blockOutputStream = new ByteArrayOutputStream();
+    Files.copy(blockStreamFile.toPath(), blockOutputStream);
+    blockOutputStream.writeTo(bamOutputStream);
 
-        job.setOutputFormatClass(BAMTestNoHeaderOutputFormat.class);
-        job.setOutputKeyClass(LongWritable.class);
-        job.setOutputValueClass(SAMRecordWritable.class);
+    // add the BGZF terminator
+    bamOutputStream.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
+    bamOutputStream.close();
 
-        job.setNumReduceTasks(0);
-        FileOutputFormat.setOutputPath(job, outputPath);
+    return new ByteArrayInputStream(bamOutputStream.toByteArray());
+  }
 
-        final boolean success = job.waitForCompletion(true);
-        assertTrue(success);
+  // BAM output class that writes a header before records
+  static class BAMTestWithHeaderOutputFormat extends KeyIgnoringBAMOutputFormat<NullWritable> {
 
-        return outputPath;
-    }
+    public static final String READ_HEADER_FROM_FILE = "TestBAM.header";
 
-    private int getBAMRecordCount(final File bamFile) throws IOException {
-        final SamReader bamReader = SamReaderFactory.makeDefault()
-                                        .open(SamInputResource.of(bamFile));
-        final Iterator<SAMRecord> it = bamReader.iterator();
-        int recCount = 0;
-        while (it.hasNext()) {
-            it.next();
-            recCount++;
-        }
-        bamReader.close();
-        return recCount;
+    @Override
+    public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
+        TaskAttemptContext ctx, Path outputPath) throws IOException {
+      readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
+      setWriteHeader(true);
+      return super.getRecordWriter(ctx, outputPath);
     }
+  }
 
-    private int getBAMRecordCount(
-        final File blockStreamFile,
-        final SAMFileHeader header) throws IOException
-    {
-        // assemble a proper BAM file from the block stream shard(s) in
-        // order to verify the contents
-        final ByteArrayInputStream mergedStream = mergeBAMBlockStream (
-            blockStreamFile,
-            header
-        );
-
-        // now we can verify that we can read everything back in
-        final SamReader resultBAMReader = SamReaderFactory.makeDefault()
-            .open(SamInputResource.of(mergedStream));
-        final Iterator<SAMRecord> it = resultBAMReader.iterator();
-        int actualCount = 0;
-        while (it.hasNext()) {
-            it.next();
-            actualCount++;
-        }
-        return actualCount;
-    }
+  // BAM output class that doesn't write a header before records
+  static class BAMTestNoHeaderOutputFormat extends KeyIgnoringBAMOutputFormat<NullWritable> {
+
+    public static final String READ_HEADER_FROM_FILE = "TestBAM.header";
 
-    private ByteArrayInputStream mergeBAMBlockStream(
-        final File blockStreamFile,
-        final SAMFileHeader header) throws IOException
-    {
-        // assemble a proper BAM file from the block stream shard(s) in
-        // order to verify the contents
-        final ByteArrayOutputStream bamOutputStream = new ByteArrayOutputStream();
-
-        // write out the bam file header
-        new SAMOutputPreparer().prepareForRecords(
-            bamOutputStream,
-            SAMFormat.BAM,
-            header);
-
-        // copy the contents of the block shard(s) written out by the M/R job
-        final ByteArrayOutputStream blockOutputStream = new ByteArrayOutputStream();
-        Files.copy(blockStreamFile.toPath(), blockOutputStream);
-        blockOutputStream.writeTo(bamOutputStream);
-
-        // add the BGZF terminator
-        bamOutputStream.write(BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK);
-        bamOutputStream.close();
-
-        return new ByteArrayInputStream(bamOutputStream.toByteArray());
+    @Override
+    public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
+        TaskAttemptContext ctx, Path outputPath) throws IOException {
+      // the writers require a header in order to create a codec, even if
+      // the header isn't being written out
+      readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
+      setWriteHeader(false);
+      return super.getRecordWriter(ctx, outputPath);
     }
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestBAMSplitGuesser.java b/src/test/java/org/seqdoop/hadoop_bam/TestBAMSplitGuesser.java
index 6becbad..e077542 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestBAMSplitGuesser.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestBAMSplitGuesser.java
@@ -1,5 +1,7 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+
 import htsjdk.samtools.SAMUtils;
 import htsjdk.samtools.seekablestream.SeekableStream;
 import java.io.File;
@@ -8,8 +10,6 @@
 import org.junit.Test;
 import org.seqdoop.hadoop_bam.util.WrapSeekable;
 
-import static org.junit.Assert.assertEquals;
-
 public class TestBAMSplitGuesser {
 
   @Test
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestBGZFSplitGuesser.java b/src/test/java/org/seqdoop/hadoop_bam/TestBGZFSplitGuesser.java
index d500cdd..f168d05 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestBGZFSplitGuesser.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestBGZFSplitGuesser.java
@@ -1,5 +1,7 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+
 import htsjdk.samtools.util.BlockCompressedInputStream;
 import htsjdk.samtools.util.BlockCompressedStreamConstants;
 import java.io.File;
@@ -15,8 +17,6 @@
 import org.junit.runners.Parameterized;
 import org.seqdoop.hadoop_bam.util.BGZFSplitGuesser;
 
-import static org.junit.Assert.assertEquals;
-
 @RunWith(Parameterized.class)
 public class TestBGZFSplitGuesser {
 
@@ -32,9 +32,10 @@ public TestBGZFSplitGuesser(String filename, long firstSplit, long lastSplit) {
 
   @Parameterized.Parameters
   public static Collection<Object> data() {
-    return Arrays.asList(new Object[][] {
-        {"test.vcf.bgzf.gz", 821, 821}, {"HiSeq.10000.vcf.bgzf.gz", 16688, 509222}
-    });
+    return Arrays.asList(
+        new Object[][] {
+          {"test.vcf.bgzf.gz", 821, 821}, {"HiSeq.10000.vcf.bgzf.gz", 16688, 509222}
+        });
   }
 
   @Test
@@ -58,14 +59,14 @@ public void test() throws IOException {
     assertEquals(firstSplit, (long) boundaries.getFirst());
     assertEquals(lastSplit, (long) boundaries.getLast());
 
-    assertEquals("Last block start is terminator gzip block",
+    assertEquals(
+        "Last block start is terminator gzip block",
         file.length() - BlockCompressedStreamConstants.EMPTY_GZIP_BLOCK.length,
         (long) boundaries.get(boundaries.size() - 1));
   }
 
   private void canReadFromBlockStart(long blockStart) throws IOException {
-    BlockCompressedInputStream blockCompressedInputStream = new
-        BlockCompressedInputStream(file);
+    BlockCompressedInputStream blockCompressedInputStream = new BlockCompressedInputStream(file);
     blockCompressedInputStream.setCheckCrcs(true);
     blockCompressedInputStream.seek(blockStart << 16);
     byte[] b = new byte[100];
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormat.java
index d0ec14e..0564575 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormat.java
@@ -1,5 +1,9 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
 import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
@@ -28,11 +32,8 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 public class TestCRAMInputFormat {
+
   private String input;
   private String reference;
   private TaskAttemptContext taskAttemptContext;
@@ -53,8 +54,10 @@ public void setup() throws Exception {
   @Test
   public void testReader() throws Exception {
     int expectedCount = 0;
-    SamReader samReader = SamReaderFactory.makeDefault()
-        .referenceSequence(new File(URI.create(reference))).open(new File(input));
+    SamReader samReader =
+        SamReaderFactory.makeDefault()
+            .referenceSequence(new File(URI.create(reference)))
+            .open(new File(input));
     for (SAMRecord r : samReader) {
       expectedCount++;
     }
@@ -62,8 +65,8 @@ public void testReader() throws Exception {
     AnySAMInputFormat inputFormat = new AnySAMInputFormat();
     List<InputSplit> splits = inputFormat.getSplits(jobContext);
     assertEquals(1, splits.size());
-    RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat
-        .createRecordReader(splits.get(0), taskAttemptContext);
+    RecordReader<LongWritable, SAMRecordWritable> reader =
+        inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
     reader.initialize(splits.get(0), taskAttemptContext);
 
     int actualCount = 0;
@@ -135,8 +138,10 @@ public void testMapReduceJob() throws Exception {
     assertTrue(success);
 
     List<String> samStrings = new ArrayList<String>();
-    SamReader samReader = SamReaderFactory.makeDefault()
-        .referenceSequence(new File(URI.create(reference))).open(new File(input));
+    SamReader samReader =
+        SamReaderFactory.makeDefault()
+            .referenceSequence(new File(URI.create(reference)))
+            .open(new File(input));
     for (SAMRecord r : samReader) {
       samStrings.add(r.getSAMString().trim());
     }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormatOnHDFS.java b/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormatOnHDFS.java
index 034c078..1efce70 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormatOnHDFS.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestCRAMInputFormatOnHDFS.java
@@ -1,5 +1,8 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.mockito.Mockito.mock;
+
 import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
@@ -26,20 +29,15 @@
 import org.junit.BeforeClass;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 public class TestCRAMInputFormatOnHDFS {
+
+  private static MiniDFSCluster cluster;
+  private static URI clusterUri;
   private String input;
   private String reference;
   private TaskAttemptContext taskAttemptContext;
   private JobContext jobContext;
 
-
-  private static MiniDFSCluster cluster;
-  private static URI clusterUri;
-
   @BeforeClass
   public static void setUpBeforeClass() throws Exception {
     cluster = startMini(TestCRAMInputFormatOnHDFS.class.getName());
@@ -48,20 +46,52 @@ public static void setUpBeforeClass() throws Exception {
 
   @AfterClass
   public static void teardownClass() throws Exception {
-    if (cluster != null)
-    {
+    if (cluster != null) {
       cluster.shutdown();
     }
   }
 
+  private static MiniDFSCluster startMini(String testName) throws IOException {
+    File baseDir = new File("./target/hdfs/" + testName).getAbsoluteFile();
+    FileUtil.fullyDelete(baseDir);
+    Configuration conf = new Configuration();
+    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath());
+    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
+    MiniDFSCluster hdfsCluster = builder.clusterId(testName).build();
+    hdfsCluster.waitActive();
+    return hdfsCluster;
+  }
+
+  protected static URI formalizeClusterURI(URI clusterUri) throws URISyntaxException {
+    if (clusterUri.getPath() == null) {
+      return new URI(
+          clusterUri.getScheme(),
+          null,
+          clusterUri.getHost(),
+          clusterUri.getPort(),
+          "/",
+          null,
+          null);
+    } else if (clusterUri.getPath().trim() == "") {
+      return new URI(
+          clusterUri.getScheme(),
+          null,
+          clusterUri.getHost(),
+          clusterUri.getPort(),
+          "/",
+          null,
+          null);
+    }
+    return clusterUri;
+  }
 
   @Before
   public void setup() throws Exception {
     Configuration conf = new Configuration();
     input = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
     reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI().toString();
-    String referenceIndex = ClassLoader.getSystemClassLoader().getResource("auxf.fa.fai")
-        .toURI().toString();
+    String referenceIndex =
+        ClassLoader.getSystemClassLoader().getResource("auxf.fa.fai").toURI().toString();
     conf.set("mapred.input.dir", "file://" + input);
 
     URI hdfsRef = clusterUri.resolve("/tmp/auxf.fa");
@@ -71,41 +101,17 @@ public void setup() throws Exception {
 
     conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, hdfsRef.toString());
 
-
     taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
     jobContext = new JobContextImpl(conf, taskAttemptContext.getJobID());
-
-  }
-
-  private static MiniDFSCluster startMini(String testName) throws IOException {
-    File baseDir = new File("./target/hdfs/" + testName).getAbsoluteFile();
-    FileUtil.fullyDelete(baseDir);
-    Configuration conf = new Configuration();
-    conf.set(MiniDFSCluster.HDFS_MINIDFS_BASEDIR, baseDir.getAbsolutePath());
-    MiniDFSCluster.Builder builder = new MiniDFSCluster.Builder(conf);
-    MiniDFSCluster hdfsCluster = builder.clusterId(testName).build();
-    hdfsCluster.waitActive();
-    return hdfsCluster;
-  }
-
-  protected static URI formalizeClusterURI(URI clusterUri) throws URISyntaxException {
-    if (clusterUri.getPath()==null) {
-      return new URI(clusterUri.getScheme(), null,
-          clusterUri.getHost(), clusterUri.getPort(),
-          "/", null, null);
-    } else if (clusterUri.getPath().trim()=="") {
-      return new URI(clusterUri.getScheme(), null,
-          clusterUri.getHost(), clusterUri.getPort(),
-          "/", null, null);
-    }
-    return clusterUri;
   }
 
   @Test
   public void testReader() throws Exception {
     int expectedCount = 0;
-    SamReader samReader = SamReaderFactory.makeDefault()
-        .referenceSequence(new File(URI.create(reference))).open(new File(input));
+    SamReader samReader =
+        SamReaderFactory.makeDefault()
+            .referenceSequence(new File(URI.create(reference)))
+            .open(new File(input));
     for (SAMRecord r : samReader) {
       expectedCount++;
     }
@@ -113,8 +119,8 @@ public void testReader() throws Exception {
     CRAMInputFormat inputFormat = new CRAMInputFormat();
     List<InputSplit> splits = inputFormat.getSplits(jobContext);
     assertEquals(1, splits.size());
-    RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat
-        .createRecordReader(splits.get(0), taskAttemptContext);
+    RecordReader<LongWritable, SAMRecordWritable> reader =
+        inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
     reader.initialize(splits.get(0), taskAttemptContext);
 
     int actualCount = 0;
@@ -124,5 +130,4 @@ public void testReader() throws Exception {
 
     assertEquals(expectedCount, actualCount);
   }
-
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestCRAMOutputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestCRAMOutputFormat.java
index 171013b..6d12a36 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestCRAMOutputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestCRAMOutputFormat.java
@@ -1,9 +1,16 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
 import htsjdk.samtools.*;
 import htsjdk.samtools.cram.ref.ReferenceSource;
 import htsjdk.samtools.seekablestream.SeekableStream;
+import java.io.*;
 import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Iterator;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -17,285 +24,254 @@
 import org.junit.Test;
 import org.seqdoop.hadoop_bam.util.SAMFileMerger;
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
-
-import java.io.*;
-import java.nio.file.Paths;
-import java.util.Iterator;
 import org.seqdoop.hadoop_bam.util.SAMOutputPreparer;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 public class TestCRAMOutputFormat {
-    private String testCRAMFileName;
-    private String testReferenceFileName;
-    private ReferenceSource testReferenceSource;
-
-    private int expectedRecordCount;
-    private SAMFileHeader samFileHeader;
-
-    private TaskAttemptContext taskAttemptContext;
-    private static Configuration conf;
-
-    // CRAM output class that writes a header before records
-    static class CRAMTestWithHeaderOutputFormat
-            extends KeyIgnoringCRAMOutputFormat<NullWritable> {
-        public final static String READ_HEADER_FROM_FILE = "TestBAM.header";
-
-        @Override
-        public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
-                TaskAttemptContext ctx,
-                Path outputPath) throws IOException {
-            readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
-            setWriteHeader(true);
-            return super.getRecordWriter(ctx, outputPath);
-        }
-    }
-
-    // CRAM Output class that doesn't write a header out before records
-    static class CRAMTestNoHeaderOutputFormat
-            extends KeyIgnoringCRAMOutputFormat<NullWritable> {
-        public final static String READ_HEADER_FROM_FILE = "TestBAM.header";
-
-        @Override
-        public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
-                TaskAttemptContext ctx,
-                Path outputPath) throws IOException {
-            // the writers require a header in order to create a codec, even if
-            // the header isn't being written out
-            readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
-            setWriteHeader(false);
-            return super.getRecordWriter(ctx, outputPath);
-        }
-    }
-
-    @Before
-    public void setup() throws Exception {
-        conf = new Configuration();
-
-        testCRAMFileName = ClassLoader.getSystemClassLoader()
-                .getResource("test.cram").getFile();
-        testReferenceFileName = ClassLoader.getSystemClassLoader()
-                .getResource("auxf.fa").getFile();
-        testReferenceSource = new ReferenceSource(Paths.get(testReferenceFileName));
-
-        conf.set("mapred.input.dir", "file://" + testCRAMFileName);
-        conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY,
-                "file://" + testReferenceFileName);
-
-        // fetch the SAMFile header from the original input to get the
-        // expected count
-        expectedRecordCount = getCRAMRecordCount(new File(testCRAMFileName));
-        samFileHeader = SAMHeaderReader.readSAMHeaderFrom(
-                new Path(testCRAMFileName), conf);
-
-        taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
-    }
-
-    @Test
-    public void testCRAMRecordWriterNoHeader() throws Exception {
-        final File outFile = File.createTempFile("testCRAMWriter", ".cram");
-        outFile.deleteOnExit();
-        final Path outPath = new Path(outFile.toURI());
-
-        final CRAMTestNoHeaderOutputFormat cramOut = new CRAMTestNoHeaderOutputFormat();
-        conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, testCRAMFileName);
-
-        RecordWriter<NullWritable, SAMRecordWritable> rw =
-                cramOut.getRecordWriter(taskAttemptContext, outPath);
-
-        final SamReader samReader = SamReaderFactory.makeDefault()
-                .referenceSequence(new File(testReferenceFileName))
-                .open(new File(testCRAMFileName));
-
-        for (final SAMRecord r : samReader) {
-            final SAMRecordWritable samRW = new SAMRecordWritable();
-            samRW.set(r);
-            rw.write(null, samRW);
-        }
-        samReader.close();
-        rw.close(taskAttemptContext);
-
-        // now verify the container stream
-        final int actualCount = getCRAMRecordCount(outFile, samFileHeader,
-            testReferenceSource);
-        assertEquals(expectedRecordCount, actualCount);
-    }
 
-    @Test
-    public void testCRAMRecordWriterWithHeader() throws Exception {
-        final File outFile = File.createTempFile("testCRAMWriter", ".cram");
-        outFile.deleteOnExit();
-        final Path outPath = new Path(outFile.toURI());
-
-        final CRAMTestWithHeaderOutputFormat cramOut = new CRAMTestWithHeaderOutputFormat();
-        conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, testCRAMFileName);
-
-        RecordWriter<NullWritable, SAMRecordWritable> rw =
-                cramOut.getRecordWriter(taskAttemptContext, outPath);
-
-        final SamReader samReader = SamReaderFactory.makeDefault()
-                .referenceSequence(new File(testReferenceFileName))
-                .open(new File(testCRAMFileName));
-
-        for (final SAMRecord r : samReader) {
-            final SAMRecordWritable samRW = new SAMRecordWritable();
-            samRW.set(r);
-            rw.write(null, samRW);
-        }
-        samReader.close();
-        rw.close(taskAttemptContext);
-
-        // now verify the container stream
-        final int actualCount = getCRAMRecordCount(outFile);
-        assertEquals(expectedRecordCount, actualCount);
+  private static Configuration conf;
+  private String testCRAMFileName;
+  private String testReferenceFileName;
+  private ReferenceSource testReferenceSource;
+  private int expectedRecordCount;
+  private SAMFileHeader samFileHeader;
+  private TaskAttemptContext taskAttemptContext;
+
+  @Before
+  public void setup() throws Exception {
+    conf = new Configuration();
+
+    testCRAMFileName = ClassLoader.getSystemClassLoader().getResource("test.cram").getFile();
+    testReferenceFileName = ClassLoader.getSystemClassLoader().getResource("auxf.fa").getFile();
+    testReferenceSource = new ReferenceSource(Paths.get(testReferenceFileName));
+
+    conf.set("mapred.input.dir", "file://" + testCRAMFileName);
+    conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, "file://" + testReferenceFileName);
+
+    // fetch the SAMFile header from the original input to get the
+    // expected count
+    expectedRecordCount = getCRAMRecordCount(new File(testCRAMFileName));
+    samFileHeader = SAMHeaderReader.readSAMHeaderFrom(new Path(testCRAMFileName), conf);
+
+    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
+  }
+
+  @Test
+  public void testCRAMRecordWriterNoHeader() throws Exception {
+    final File outFile = File.createTempFile("testCRAMWriter", ".cram");
+    outFile.deleteOnExit();
+    final Path outPath = new Path(outFile.toURI());
+
+    final CRAMTestNoHeaderOutputFormat cramOut = new CRAMTestNoHeaderOutputFormat();
+    conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, testCRAMFileName);
+
+    RecordWriter<NullWritable, SAMRecordWritable> rw =
+        cramOut.getRecordWriter(taskAttemptContext, outPath);
+
+    final SamReader samReader =
+        SamReaderFactory.makeDefault()
+            .referenceSequence(new File(testReferenceFileName))
+            .open(new File(testCRAMFileName));
+
+    for (final SAMRecord r : samReader) {
+      final SAMRecordWritable samRW = new SAMRecordWritable();
+      samRW.set(r);
+      rw.write(null, samRW);
     }
-
-    @Test
-    public void testCRAMOutput() throws Exception {
-        final Path outputPath = doMapReduce(testCRAMFileName);
-        final File outFile = File.createTempFile("testCRAMWriter", ".cram");
-        outFile.deleteOnExit();
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.CRAM, samFileHeader);
-        final File containerStreamFile =
-                new File(new File(outputPath.toUri()), "part-m-00000");
-        final int actualCount = getCRAMRecordCount(outFile);
-        assertEquals(expectedRecordCount, actualCount);
+    samReader.close();
+    rw.close(taskAttemptContext);
+
+    // now verify the container stream
+    final int actualCount = getCRAMRecordCount(outFile, samFileHeader, testReferenceSource);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  @Test
+  public void testCRAMRecordWriterWithHeader() throws Exception {
+    final File outFile = File.createTempFile("testCRAMWriter", ".cram");
+    outFile.deleteOnExit();
+    final Path outPath = new Path(outFile.toURI());
+
+    final CRAMTestWithHeaderOutputFormat cramOut = new CRAMTestWithHeaderOutputFormat();
+    conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, testCRAMFileName);
+
+    RecordWriter<NullWritable, SAMRecordWritable> rw =
+        cramOut.getRecordWriter(taskAttemptContext, outPath);
+
+    final SamReader samReader =
+        SamReaderFactory.makeDefault()
+            .referenceSequence(new File(testReferenceFileName))
+            .open(new File(testCRAMFileName));
+
+    for (final SAMRecord r : samReader) {
+      final SAMRecordWritable samRW = new SAMRecordWritable();
+      samRW.set(r);
+      rw.write(null, samRW);
     }
-
-    @Test
-    public void testCRAMRoundTrip() throws Exception {
-        // run a m/r job to write out a cram file
-        Path outputPath = doMapReduce(testCRAMFileName);
-
-        // merge the parts, and write to a temp file
-        final File outFile = File.createTempFile("testCRAMWriter", ".cram");
-        outFile.deleteOnExit();
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.CRAM, samFileHeader);
-
-        // now use the assembled output as m/r input
-        outputPath = doMapReduce(outFile.getAbsolutePath());
-
-        // merge the parts again
-        SAMFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            SAMFormat.CRAM, samFileHeader);
-
-        // verify the final output
-        final int actualCount = getCRAMRecordCount(outFile);
-        assertEquals(expectedRecordCount, actualCount);
+    samReader.close();
+    rw.close(taskAttemptContext);
+
+    // now verify the container stream
+    final int actualCount = getCRAMRecordCount(outFile);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  @Test
+  public void testCRAMOutput() throws Exception {
+    final Path outputPath = doMapReduce(testCRAMFileName);
+    final File outFile = File.createTempFile("testCRAMWriter", ".cram");
+    outFile.deleteOnExit();
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(), outFile.toURI().toString(), SAMFormat.CRAM, samFileHeader);
+    final File containerStreamFile = new File(new File(outputPath.toUri()), "part-m-00000");
+    final int actualCount = getCRAMRecordCount(outFile);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  @Test
+  public void testCRAMRoundTrip() throws Exception {
+    // run a m/r job to write out a cram file
+    Path outputPath = doMapReduce(testCRAMFileName);
+
+    // merge the parts, and write to a temp file
+    final File outFile = File.createTempFile("testCRAMWriter", ".cram");
+    outFile.deleteOnExit();
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(), outFile.toURI().toString(), SAMFormat.CRAM, samFileHeader);
+
+    // now use the assembled output as m/r input
+    outputPath = doMapReduce(outFile.getAbsolutePath());
+
+    // merge the parts again
+    SAMFileMerger.mergeParts(
+        outputPath.toUri().toString(), outFile.toURI().toString(), SAMFormat.CRAM, samFileHeader);
+
+    // verify the final output
+    final int actualCount = getCRAMRecordCount(outFile);
+    assertEquals(expectedRecordCount, actualCount);
+  }
+
+  private Path doMapReduce(final String inputFile) throws Exception {
+    final FileSystem fileSystem = FileSystem.get(conf);
+    final Path inputPath = new Path(inputFile);
+    final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
+    fileSystem.delete(outputPath, true);
+
+    final Job job = Job.getInstance(conf);
+    FileInputFormat.setInputPaths(job, inputPath);
+
+    job.setInputFormatClass(CRAMInputFormat.class);
+    job.setMapOutputKeyClass(LongWritable.class);
+    job.setMapOutputValueClass(SAMRecordWritable.class);
+
+    conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile);
+    job.setOutputFormatClass(CRAMTestNoHeaderOutputFormat.class);
+    job.setOutputKeyClass(LongWritable.class);
+    job.setOutputValueClass(SAMRecordWritable.class);
+
+    job.setNumReduceTasks(0);
+    FileOutputFormat.setOutputPath(job, outputPath);
+
+    final boolean success = job.waitForCompletion(true);
+    assertTrue(success);
+
+    return outputPath;
+  }
+
+  private int getCRAMRecordCount(final File cramFile) {
+    final CRAMFileReader cramReader =
+        new CRAMFileReader(cramFile, (File) null, testReferenceSource);
+    final Iterator<SAMRecord> it = cramReader.getIterator();
+    int recCount = 0;
+    while (it.hasNext()) {
+      it.next();
+      recCount++;
     }
-
-    private Path doMapReduce(final String inputFile) throws Exception {
-        final FileSystem fileSystem = FileSystem.get(conf);
-        final Path inputPath = new Path(inputFile);
-        final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
-        fileSystem.delete(outputPath, true);
-
-        final Job job = Job.getInstance(conf);
-        FileInputFormat.setInputPaths(job, inputPath);
-
-        job.setInputFormatClass(CRAMInputFormat.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(SAMRecordWritable.class);
-
-        conf.set(CRAMTestNoHeaderOutputFormat.READ_HEADER_FROM_FILE, inputFile);
-        job.setOutputFormatClass(CRAMTestNoHeaderOutputFormat.class);
-        job.setOutputKeyClass(LongWritable.class);
-        job.setOutputValueClass(SAMRecordWritable.class);
-
-        job.setNumReduceTasks(0);
-        FileOutputFormat.setOutputPath(job, outputPath);
-
-        final boolean success = job.waitForCompletion(true);
-        assertTrue(success);
-
-        return outputPath;
-    }
-
-    private int getCRAMRecordCount(final File cramFile) {
-        final CRAMFileReader cramReader =
-            new CRAMFileReader(cramFile,
-                (File)null,
-                testReferenceSource);
-        final Iterator<SAMRecord> it = cramReader.getIterator();
-        int recCount = 0;
-        while (it.hasNext()) {
-            it.next();
-            recCount++;
-        }
-        cramReader.close();
-        return recCount;
-    }
-
-    private int getCRAMRecordCount(
-        final File containerStreamFile,
-        final SAMFileHeader header,
-        final ReferenceSource refSource) throws IOException
-    {
-        // assemble a proper CRAM file from the container stream shard(s) in
-        // order to verify the contents
-        final ByteArrayInputStream mergedStream = mergeCRAMContainerStream (
-            containerStreamFile,
-            header,
-            refSource
-        );
-
-        // now we can verify that we can read everything back in
-        final CRAMFileReader resultCRAMReader = new CRAMFileReader(
+    cramReader.close();
+    return recCount;
+  }
+
+  private int getCRAMRecordCount(
+      final File containerStreamFile, final SAMFileHeader header, final ReferenceSource refSource)
+      throws IOException {
+    // assemble a proper CRAM file from the container stream shard(s) in
+    // order to verify the contents
+    final ByteArrayInputStream mergedStream =
+        mergeCRAMContainerStream(containerStreamFile, header, refSource);
+
+    // now we can verify that we can read everything back in
+    final CRAMFileReader resultCRAMReader =
+        new CRAMFileReader(
             mergedStream,
             (SeekableStream) null,
             refSource,
             ValidationStringency.DEFAULT_STRINGENCY);
-        final Iterator<SAMRecord> it = resultCRAMReader.getIterator();
-        int actualCount = 0;
-        while (it.hasNext()) {
-            it.next();
-            actualCount++;
-        }
-        return actualCount;
+    final Iterator<SAMRecord> it = resultCRAMReader.getIterator();
+    int actualCount = 0;
+    while (it.hasNext()) {
+      it.next();
+      actualCount++;
+    }
+    return actualCount;
+  }
+
+  // TODO: SAMOutputPreparer knows how to prepare the beginning of a stream,
+  // but not how to populate or terminate it (which for CRAM requires a special
+  // terminating EOF container). For now we'll use SAMPreparer here so we get
+  // some test coverage, and then manually populate and terminate, but we
+  // should consolidate/refactor the knowledge of how to do this aggregation
+  // for each output type in one place in a separate PR
+  // https://github.com/HadoopGenomics/Hadoop-BAM/issues/61
+  private ByteArrayInputStream mergeCRAMContainerStream(
+      final File containerStreamFile, final SAMFileHeader header, final ReferenceSource refSource)
+      throws IOException {
+    // assemble a proper CRAM file from the container stream shard(s) in
+    // order to verify the contents
+    final ByteArrayOutputStream cramOutputStream = new ByteArrayOutputStream();
+    // write out the cram file header
+    new SAMOutputPreparer().prepareForRecords(cramOutputStream, SAMFormat.CRAM, header);
+    // now copy the contents of the container stream shard(s) written out by
+    // the M/R job
+    final ByteArrayOutputStream containerOutputStream = new ByteArrayOutputStream();
+    Files.copy(containerStreamFile.toPath(), containerOutputStream);
+    containerOutputStream.writeTo(cramOutputStream);
+
+    // use containerStreamWriter directly to properly terminate the output
+    // stream with an EOF container
+    final CRAMContainerStreamWriter containerStreamWriter =
+        new CRAMContainerStreamWriter(cramOutputStream, null, refSource, header, "CRAMTest");
+    containerStreamWriter.finish(true); // close and write an EOF container
+    cramOutputStream.close();
+
+    return new ByteArrayInputStream(cramOutputStream.toByteArray());
+  }
+
+  // CRAM output class that writes a header before records
+  static class CRAMTestWithHeaderOutputFormat extends KeyIgnoringCRAMOutputFormat<NullWritable> {
+
+    public static final String READ_HEADER_FROM_FILE = "TestBAM.header";
+
+    @Override
+    public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
+        TaskAttemptContext ctx, Path outputPath) throws IOException {
+      readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
+      setWriteHeader(true);
+      return super.getRecordWriter(ctx, outputPath);
     }
+  }
+
+  // CRAM Output class that doesn't write a header out before records
+  static class CRAMTestNoHeaderOutputFormat extends KeyIgnoringCRAMOutputFormat<NullWritable> {
+
+    public static final String READ_HEADER_FROM_FILE = "TestBAM.header";
 
-    // TODO: SAMOutputPreparer knows how to prepare the beginning of a stream,
-    // but not how to populate or terminate it (which for CRAM requires a special
-    // terminating EOF container). For now we'll use SAMPreparer here so we get
-    // some test coverage, and then manually populate and terminate, but we
-    // should consolidate/refactor the knowledge of how to do this aggregation
-    // for each output type in one place in a separate PR
-    // https://github.com/HadoopGenomics/Hadoop-BAM/issues/61
-    private ByteArrayInputStream mergeCRAMContainerStream(
-        final File containerStreamFile,
-        final SAMFileHeader header,
-        final ReferenceSource refSource) throws IOException
-    {
-        // assemble a proper CRAM file from the container stream shard(s) in
-        // order to verify the contents
-        final ByteArrayOutputStream cramOutputStream = new ByteArrayOutputStream();
-        // write out the cram file header
-        new SAMOutputPreparer().prepareForRecords(
-            cramOutputStream,
-            SAMFormat.CRAM,
-            header);
-        // now copy the contents of the container stream shard(s) written out by
-        // the M/R job
-        final ByteArrayOutputStream containerOutputStream = new ByteArrayOutputStream();
-        Files.copy(containerStreamFile.toPath(), containerOutputStream);
-        containerOutputStream.writeTo(cramOutputStream);
-
-        // use containerStreamWriter directly to properly terminate the output
-        // stream with an EOF container
-        final CRAMContainerStreamWriter containerStreamWriter =
-            new CRAMContainerStreamWriter(
-                cramOutputStream,
-                null,
-                refSource,
-                header,
-                "CRAMTest");
-        containerStreamWriter.finish(true); // close and write an EOF container
-        cramOutputStream.close();
-
-        return new ByteArrayInputStream(cramOutputStream.toByteArray());
+    @Override
+    public RecordWriter<NullWritable, SAMRecordWritable> getRecordWriter(
+        TaskAttemptContext ctx, Path outputPath) throws IOException {
+      // the writers require a header in order to create a codec, even if
+      // the header isn't being written out
+      readSAMHeaderFrom(new Path(conf.get(READ_HEADER_FROM_FILE)), conf);
+      setWriteHeader(false);
+      return super.getRecordWriter(ctx, outputPath);
     }
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestConfHelper.java b/src/test/java/org/seqdoop/hadoop_bam/TestConfHelper.java
index 1866943..e0d2beb 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestConfHelper.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestConfHelper.java
@@ -22,69 +22,62 @@
 
 package org.seqdoop.hadoop_bam;
 
-import org.seqdoop.hadoop_bam.util.ConfHelper;
-
-import org.junit.*;
 import static org.junit.Assert.*;
 
 import org.apache.hadoop.conf.Configuration;
+import org.junit.*;
+import org.seqdoop.hadoop_bam.util.ConfHelper;
 
-public class TestConfHelper
-{
-	@Test
-	public void testParseBooleanValidValues()
-	{
-		assertTrue(ConfHelper.parseBoolean("true", false));
-		assertTrue(ConfHelper.parseBoolean("tRuE", false));
-		assertTrue(ConfHelper.parseBoolean("TRUE", false));
-		assertTrue(ConfHelper.parseBoolean("t", false));
-		assertTrue(ConfHelper.parseBoolean("yes", false));
-		assertTrue(ConfHelper.parseBoolean("y", false));
-		assertTrue(ConfHelper.parseBoolean("Y", false));
-		assertTrue(ConfHelper.parseBoolean("1", false));
+public class TestConfHelper {
 
-		assertFalse(ConfHelper.parseBoolean("false", true));
-		assertFalse(ConfHelper.parseBoolean("faLse", true));
-		assertFalse(ConfHelper.parseBoolean("FALSE", true));
-		assertFalse(ConfHelper.parseBoolean("f", true));
-		assertFalse(ConfHelper.parseBoolean("no", true));
-		assertFalse(ConfHelper.parseBoolean("n", true));
-		assertFalse(ConfHelper.parseBoolean("N", true));
-		assertFalse(ConfHelper.parseBoolean("0", true));
-	}
+  public static void main(String args[]) {
+    org.junit.runner.JUnitCore.main(TestConfHelper.class.getName());
+  }
 
-	@Test
-	public void testParseBooleanNull()
-	{
-		assertTrue(ConfHelper.parseBoolean(null, true));
-		assertFalse(ConfHelper.parseBoolean(null, false));
-	}
+  @Test
+  public void testParseBooleanValidValues() {
+    assertTrue(ConfHelper.parseBoolean("true", false));
+    assertTrue(ConfHelper.parseBoolean("tRuE", false));
+    assertTrue(ConfHelper.parseBoolean("TRUE", false));
+    assertTrue(ConfHelper.parseBoolean("t", false));
+    assertTrue(ConfHelper.parseBoolean("yes", false));
+    assertTrue(ConfHelper.parseBoolean("y", false));
+    assertTrue(ConfHelper.parseBoolean("Y", false));
+    assertTrue(ConfHelper.parseBoolean("1", false));
 
-	@Test(expected=IllegalArgumentException.class)
-	public void testParseBooleanInvalidValue()
-	{
-		ConfHelper.parseBoolean("dodo", true);
-	}
+    assertFalse(ConfHelper.parseBoolean("false", true));
+    assertFalse(ConfHelper.parseBoolean("faLse", true));
+    assertFalse(ConfHelper.parseBoolean("FALSE", true));
+    assertFalse(ConfHelper.parseBoolean("f", true));
+    assertFalse(ConfHelper.parseBoolean("no", true));
+    assertFalse(ConfHelper.parseBoolean("n", true));
+    assertFalse(ConfHelper.parseBoolean("N", true));
+    assertFalse(ConfHelper.parseBoolean("0", true));
+  }
 
-	@Test
-	public void testParseBooleanFromConfValue()
-	{
-		final String propName = "my.property";
-		Configuration conf = new Configuration();
-		conf.set(propName, "t");
-		assertTrue(ConfHelper.parseBoolean(conf, propName, false));
-	}
+  @Test
+  public void testParseBooleanNull() {
+    assertTrue(ConfHelper.parseBoolean(null, true));
+    assertFalse(ConfHelper.parseBoolean(null, false));
+  }
 
-	@Test
-	public void testParseBooleanFromConfNull()
-	{
-		Configuration conf = new Configuration();
-		assertTrue(ConfHelper.parseBoolean(conf, "my.property", true));
-		assertFalse(ConfHelper.parseBoolean(conf, "my.property", false));
-	}
+  @Test(expected = IllegalArgumentException.class)
+  public void testParseBooleanInvalidValue() {
+    ConfHelper.parseBoolean("dodo", true);
+  }
 
+  @Test
+  public void testParseBooleanFromConfValue() {
+    final String propName = "my.property";
+    Configuration conf = new Configuration();
+    conf.set(propName, "t");
+    assertTrue(ConfHelper.parseBoolean(conf, propName, false));
+  }
 
-	public static void main(String args[]) {
-		org.junit.runner.JUnitCore.main(TestConfHelper.class.getName());
-	}
+  @Test
+  public void testParseBooleanFromConfNull() {
+    Configuration conf = new Configuration();
+    assertTrue(ConfHelper.parseBoolean(conf, "my.property", true));
+    assertFalse(ConfHelper.parseBoolean(conf, "my.property", false));
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestFastaInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestFastaInputFormat.java
index b484b36..3d744f5 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestFastaInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestFastaInputFormat.java
@@ -1,5 +1,10 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
 import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
@@ -14,12 +19,8 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 public class TestFastaInputFormat {
+
   private String input;
   private TaskAttemptContext taskAttemptContext;
   private JobContext jobContext;
@@ -42,29 +43,53 @@ public void testReader() throws Exception {
     FastaInputFormat inputFormat = new FastaInputFormat();
     List<InputSplit> splits = inputFormat.getSplits(jobContext);
     assertEquals(2, splits.size());
-    RecordReader<Text, ReferenceFragment> reader = inputFormat
-        .createRecordReader(splits.get(0), taskAttemptContext);
+    RecordReader<Text, ReferenceFragment> reader =
+        inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
     reader.initialize(splits.get(0), taskAttemptContext);
 
     assertTrue(reader.nextKeyValue());
-    assertEquals(new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:11"), reader.getCurrentKey());
-    assertEquals(new Text("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA"), reader.getCurrentValue().getSequence());
+    assertEquals(
+        new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:11"), reader.getCurrentKey());
+    assertEquals(
+        new Text(
+            "TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTA"),
+        reader.getCurrentValue().getSequence());
 
     assertTrue(reader.nextKeyValue());
-    assertEquals(new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:182"), reader.getCurrentKey());
-    assertEquals(new Text("ACCCTAACCCTAACCCTAACCCTAACCCAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAAC"), reader.getCurrentValue().getSequence());
+    assertEquals(
+        new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:182"),
+        reader.getCurrentKey());
+    assertEquals(
+        new Text(
+            "ACCCTAACCCTAACCCTAACCCTAACCCAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAAC"),
+        reader.getCurrentValue().getSequence());
 
     assertTrue(reader.nextKeyValue());
-    assertEquals(new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:1163"), reader.getCurrentKey());
-    assertEquals(new Text("CCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCC"), reader.getCurrentValue().getSequence());
+    assertEquals(
+        new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:1163"),
+        reader.getCurrentKey());
+    assertEquals(
+        new Text(
+            "CCTAACCCTAACCCTAACCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCC"),
+        reader.getCurrentValue().getSequence());
 
     assertTrue(reader.nextKeyValue());
-    assertEquals(new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:1244"), reader.getCurrentKey());
-    assertEquals(new Text("TAACCCTAAACCCTAAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCC"), reader.getCurrentValue().getSequence());
+    assertEquals(
+        new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:1244"),
+        reader.getCurrentKey());
+    assertEquals(
+        new Text(
+            "TAACCCTAAACCCTAAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCAACCCCAACCCCAACCCCAACCCCAACCC"),
+        reader.getCurrentValue().getSequence());
 
     assertTrue(reader.nextKeyValue());
-    assertEquals(new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:1325"), reader.getCurrentKey());
-    assertEquals(new Text("CAACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCC"), reader.getCurrentValue().getSequence());
+    assertEquals(
+        new Text("chr1 dna:chromosome chromosome:GRCh37:1:1:249250621:1325"),
+        reader.getCurrentKey());
+    assertEquals(
+        new Text(
+            "CAACCCTAACCCCTAACCCTAACCCTAACCCTACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCC"),
+        reader.getCurrentValue().getSequence());
 
     assertFalse(reader.nextKeyValue());
 
@@ -72,12 +97,15 @@ public void testReader() throws Exception {
     reader.initialize(splits.get(1), taskAttemptContext);
 
     assertTrue(reader.nextKeyValue());
-    assertEquals(new Text("chr2 dna:chromosome chromosome:GRCh37:2:1:243199373:11"), reader.getCurrentKey());
-    assertEquals(new Text("TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTC"), reader.getCurrentValue().getSequence());
+    assertEquals(
+        new Text("chr2 dna:chromosome chromosome:GRCh37:2:1:243199373:11"), reader.getCurrentKey());
+    assertEquals(
+        new Text(
+            "TAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTC"),
+        reader.getCurrentValue().getSequence());
 
     assertFalse(reader.nextKeyValue());
 
     reader.close();
   }
-
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestFastqInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestFastqInputFormat.java
index 2574495..d4d4f53 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestFastqInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestFastqInputFormat.java
@@ -22,534 +22,560 @@
 
 package org.seqdoop.hadoop_bam;
 
-import org.seqdoop.hadoop_bam.FastqInputFormat.FastqRecordReader;
+import static org.junit.Assert.*;
 
+import java.io.BufferedOutputStream;
+import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.FileOutputStream;
-import java.io.BufferedOutputStream;
-
-import org.junit.*;
-import static org.junit.Assert.*;
-
-import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.junit.*;
+import org.seqdoop.hadoop_bam.FastqInputFormat.FastqRecordReader;
 
-public class TestFastqInputFormat
-{
-	public static final String oneFastq =
-		"@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+\n" +
-		"###########################################################################################";
-
-	public static final String twoFastq =
-		"@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+\n" +
-		"###########################################################################################\n" +
-
-		"@ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1\n" +
-		"TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG\n" +
-		"+\n" +
-		"BDDCDBDD?A=?=:=7,7*@A;;53/53.:@>@@4=>@@@=?1?###############################################";
-
-	public static final String illuminaFastq =
-		"@EAS139:136:FC706VJ:2:5:1000:12850 1:Y:18:ATCACG\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+\n" +
-		"##########################################################################################~";
-
-	public static final String illuminaFastqWithPhred64Quality =
-		"@EAS139:136:FC706VJ:2:5:1000:12850 1:Y:18:ATCACG\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+\n" +
-		"bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
-
-	public static final String oneFastqWithoutRead =
-		"@ERR020229.10880 HWI-ST168_161:1:1:1373:2042\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+\n" +
-		"###########################################################################################";
-
-	public static final String fastqWithIdTwice =
-		"@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n" +
-		"###########################################################################################";
-
-	public static final String fastqWithAmpersandQuality =
-		"+lousy.id HWI-ST168_161:1:1:1373:2042/1\n" +
-		"@##########################################################################################\n" +
-		"@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n" +
-		"###########################################################################################";
-
-	public static final String illuminaFastqNoFlowCellID =
-	  "@EAS139:136::2:5:1000:12850 1:Y:18:ATCACG\n" +
-	  "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-	  "+\n" +
-	  "###########################################################################################";
-
-	public static final String illuminaFastqNegativeXYPos =
-	  "@EAS139:136:FC706VJ:2:5:-1000:-12850 1:Y:18:ATCACG\n" +
-	  "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-	  "+\n" +
-	  "###########################################################################################";
-
-	public static final String illuminaFastqNoIndex =
-	  "@EAS139:136::2:5:1000:12850 1:Y:18:\n" +
-	  "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-	  "+\n" +
-	  "###########################################################################################";
-
-	public static final String twoFastqWithIllumina =
-		"@EAS139:136:FC706VJ:2:5:1000:12850 1:Y:18:ATCACG\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+\n" +
-		"###########################################################################################\n" +
-
-		"@EAS139:136:FC706VJ:2:5:1000:12850 2:N:18:ATCACG\n" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n" +
-		"+\n" +
-		"###########################################################################################\n" +
-
-		"@EAS139:136:FC706VJ:2:5:1000:12850 3:N:18:ATCACG\n" +
-		"TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG\n" +
-		"+\n" +
-		"BDDCDBDD?A=?=:=7,7*@A;;53/53.:@>@@4=>@@@=?1?###############################################";
-
-	private JobConf conf;
-	private FileSplit split;
-	private File tempFastq;
-	private File tempGz;
-
-	private Text key;
-	private SequencedFragment fragment;
-
-	@Before
-	public void setup() throws IOException
-	{
-		tempFastq = File.createTempFile("test_fastq_input_format", "fastq");
-		tempGz = File.createTempFile("test_fastq_input_format", ".gz");
-		conf = new JobConf();
-		key = new Text();
-		fragment = new SequencedFragment();
-	}
-
-	@After
-	public void tearDown()
-	{
-		tempFastq.delete();
-		tempGz.delete();
-		split = null;
-	}
-
-	private void writeToTempFastq(String s) throws IOException
-	{
-		PrintWriter fastqOut = new PrintWriter( new BufferedWriter( new FileWriter(tempFastq) ) );
-		fastqOut.write(s);
-		fastqOut.close();
-	}
-
-	private FastqRecordReader createReaderForOneFastq() throws IOException
-	{
-		writeToTempFastq(oneFastq);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, oneFastq.length(), null);
-
-		return new FastqRecordReader(conf, split);
-	}
-
-	@Test
-	public void testReadFromStart() throws IOException
-	{
-		FastqRecordReader reader = createReaderForOneFastq();
-
-		assertEquals(0, reader.getPos());
-		assertEquals(0.0, reader.getProgress(), 0.01);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
-		assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
-		assertEquals("###########################################################################################", fragment.getQuality().toString());
-
-		assertEquals(oneFastq.length(), reader.getPos());
-		assertEquals(1.0, reader.getProgress(), 0.01);
-
-		retval = reader.next(key, fragment);
-		assertFalse(retval);
-	}
-
-	@Test
-	public void testReadStartInMiddle() throws IOException
-	{
-		writeToTempFastq(twoFastq);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 10, twoFastq.length() - 10, null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-
-		assertEquals(oneFastq.length() + 1, reader.getPos()); // The start of the second record. We +1 for the \n that is not in oneFastq
-		assertEquals(0.0, reader.getProgress(), 0.01);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString());
-		assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
-		assertEquals("BDDCDBDD?A=?=:=7,7*@A;;53/53.:@>@@4=>@@@=?1?###############################################", fragment.getQuality().toString());
-
-		assertEquals(twoFastq.length(), reader.getPos()); // now should be at the end of the data
-		assertEquals(1.0, reader.getProgress(), 0.01);
-
-		retval = reader.next(key, fragment);
-		assertFalse(retval);
-	}
-
-	@Test
-	public void testSliceEndsBeforeEndOfFile() throws IOException
-	{
-		writeToTempFastq(twoFastq);
-		// slice ends at position 10--i.e. somewhere in the first record.  The second record should not be read.
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, 10, null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
-
-		assertFalse("FastqRecordReader is reading a record that starts after the end of the slice", reader.next(key, fragment));
-	}
-
-	@Test
-	public void testGetReadNumFromName() throws IOException
-	{
-		FastqRecordReader reader = createReaderForOneFastq();
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals(1, fragment.getRead().intValue());
-	}
-
-	@Test
-	public void testNameWithoutReadNum() throws IOException
-	{
-		writeToTempFastq(oneFastqWithoutRead);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, oneFastqWithoutRead.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertNull("Read is not null", fragment.getRead());
-	}
-
-	@Test
-	public void testIlluminaMetaInfo() throws IOException
-	{
-		writeToTempFastq(illuminaFastq);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastq.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-
-		assertEquals("EAS139", fragment.getInstrument());
-		assertEquals(136, fragment.getRunNumber().intValue());
-		assertEquals("FC706VJ", fragment.getFlowcellId());
-		assertEquals(2, fragment.getLane().intValue());
-		assertEquals(5, fragment.getTile().intValue());
-		assertEquals(1000, fragment.getXpos().intValue());
-		assertEquals(12850, fragment.getYpos().intValue());
-		assertEquals(1, fragment.getRead().intValue());
-		assertEquals(false, fragment.getFilterPassed().booleanValue());
-		assertEquals(18, fragment.getControlNumber().intValue());
-		assertEquals("ATCACG", fragment.getIndexSequence());
-	}
-
-	@Test
-	public void testIlluminaMetaInfoNullFC() throws IOException
-	{
-		writeToTempFastq(illuminaFastqNoFlowCellID);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastqNoFlowCellID.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-
-		assertEquals("EAS139", fragment.getInstrument());
-		assertEquals(136, fragment.getRunNumber().intValue());
-		assertEquals("", fragment.getFlowcellId());
-		assertEquals(2, fragment.getLane().intValue());
-		assertEquals(5, fragment.getTile().intValue());
-		assertEquals(1000, fragment.getXpos().intValue());
-		assertEquals(12850, fragment.getYpos().intValue());
-		assertEquals(1, fragment.getRead().intValue());
-		assertEquals(false, fragment.getFilterPassed().booleanValue());
-		assertEquals(18, fragment.getControlNumber().intValue());
-		assertEquals("ATCACG", fragment.getIndexSequence());
-	}
-
-	@Test
-	public void testIlluminaMetaInfoNegativeXYpos() throws IOException
-	{
-		writeToTempFastq(illuminaFastqNegativeXYPos);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastqNegativeXYPos.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-
-		assertEquals("EAS139", fragment.getInstrument());
-		assertEquals(136, fragment.getRunNumber().intValue());
-		assertEquals("FC706VJ", fragment.getFlowcellId());
-		assertEquals(2, fragment.getLane().intValue());
-		assertEquals(5, fragment.getTile().intValue());
-		assertEquals(-1000, fragment.getXpos().intValue());
-		assertEquals(-12850, fragment.getYpos().intValue());
-		assertEquals(1, fragment.getRead().intValue());
-		assertEquals(false, fragment.getFilterPassed().booleanValue());
-		assertEquals(18, fragment.getControlNumber().intValue());
-		assertEquals("ATCACG", fragment.getIndexSequence());
-	}
-
-	@Test
-	public void testOneIlluminaThenNot() throws IOException
-	{
-		writeToTempFastq(illuminaFastq + "\n" + oneFastq);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastq.length() + oneFastq.length() + 1, null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-
-		assertTrue(reader.next(key, fragment));
-		assertEquals("EAS139", fragment.getInstrument());
-
-		assertTrue(reader.next(key, fragment));
-		assertNull(fragment.getInstrument());
-
-		assertFalse(reader.next(key, fragment));
-	}
-
-	@Test
-	public void testOneNotThenIllumina() throws IOException
-	{
-		writeToTempFastq(oneFastq + "\n" + illuminaFastq);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastq.length() + oneFastq.length() + 1, null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-
-		assertTrue(reader.next(key, fragment));
-		assertNull(fragment.getInstrument());
-
-		assertTrue(reader.next(key, fragment));
-		assertNull(fragment.getInstrument());
-
-		assertFalse(reader.next(key, fragment));
-	}
-
-	@Test
-	public void testProgress() throws IOException
-	{
-		writeToTempFastq(twoFastq);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, twoFastq.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		assertEquals(0.0, reader.getProgress(), 0.01);
-
-		reader.next(key, fragment);
-		assertEquals(0.5, reader.getProgress(), 0.01);
-
-		reader.next(key, fragment);
-		assertEquals(1.0, reader.getProgress(), 0.01);
-	}
-
-	@Test
-	public void testCreateKey() throws IOException
-	{
-		FastqRecordReader reader = createReaderForOneFastq();
-		assertTrue(reader.createKey() instanceof Text);
-	}
-
-	@Test
-	public void testCreateValue() throws IOException
-	{
-		FastqRecordReader reader = createReaderForOneFastq();
-		assertTrue(reader.createValue() instanceof SequencedFragment);
-	}
-
-	@Test
-	public void testClose() throws IOException
-	{
-		FastqRecordReader reader = createReaderForOneFastq();
-		// doesn't really do anything but exercise the code
-		reader.close();
-	}
-
-	@Test
-	public void testReadFastqWithIdTwice() throws IOException
-	{
-		writeToTempFastq(fastqWithIdTwice);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, fastqWithIdTwice.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
-		assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
-		assertEquals("###########################################################################################", fragment.getQuality().toString());
-
-		retval = reader.next(key, fragment);
-		assertFalse(retval);
-	}
-
-	@Test
-	public void testReadFastqWithAmpersandQuality() throws IOException
-	{
-		writeToTempFastq(fastqWithAmpersandQuality);
-		// split doesn't start at 0, forcing reader to advance looking for first complete record
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 3, fastqWithAmpersandQuality.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
-		assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
-		assertEquals("###########################################################################################", fragment.getQuality().toString());
-
-		retval = reader.next(key, fragment);
-		assertFalse(retval);
-	}
-
-	@Test
-	public void testMakePositionMessage() throws IOException
-	{
-		writeToTempFastq(fastqWithIdTwice);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, fastqWithIdTwice.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		assertNotNull(reader.makePositionMessage());
-	}
-
-	@Test
-	public void testFastqWithIlluminaEncoding() throws IOException
-	{
-		conf.set("hbam.fastq-input.base-quality-encoding", "illumina");
-		verifyInputQualityConfig();
-	}
-
-	@Test
-	public void testFastqWithIlluminaEncodingAndGenericInputConfig() throws IOException
-	{
-		conf.set("hbam.input.base-quality-encoding", "illumina");
-		verifyInputQualityConfig();
-	}
-
-	private void verifyInputQualityConfig() throws IOException
-	{
-		writeToTempFastq(illuminaFastqWithPhred64Quality);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastqWithPhred64Quality.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-		assertEquals("CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC", fragment.getQuality().toString());
-	}
-
-	@Test
-	public void testGzCompressedInput() throws IOException
-	{
-		// write gzip-compressed data
-		GzipCodec codec = new GzipCodec();
-		PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
-		fastqOut.write(twoFastq);
-		fastqOut.close();
-
-		// now try to read it
-		split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoFastq.length(), null);
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
-		assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
-
-		retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString());
-		assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
-	}
-
-	@Test(expected=RuntimeException.class)
-	public void testCompressedSplit() throws IOException
-	{
-		// write gzip-compressed data
-		GzipCodec codec = new GzipCodec();
-		PrintWriter fastqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
-		fastqOut.write(twoFastq);
-		fastqOut.close();
-
-		// now try to read it starting from the middle
-		split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null);
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-	}
-
-	@Test
-	public void testIlluminaNoIndex() throws IOException
-	{
-		writeToTempFastq(illuminaFastqNoIndex);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastqNoIndex.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-
-		// ensure all meta-data was picked up
-		assertEquals("EAS139", fragment.getInstrument());
-		assertEquals(136, fragment.getRunNumber().intValue());
-		// now verify the index
-		assertEquals("", fragment.getIndexSequence());
-	}
-
-	@Test
-	public void testSkipFailedQC() throws IOException
-	{
-		conf.set("hbam.fastq-input.filter-failed-qc", "true");
-		verifySkipFailedQC();
-	}
-
-	@Test
-	public void testSkipFailedQCGenericConfig() throws IOException
-	{
-		conf.set("hbam.input.filter-failed-qc", "true");
-		verifySkipFailedQC();
-	}
-
-	private void verifySkipFailedQC() throws IOException
-	{
-		writeToTempFastq(twoFastqWithIllumina);
-		split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, twoFastqWithIllumina.length(), null);
-
-		FastqRecordReader reader = new FastqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-		assertEquals(2, (int)fragment.getRead());
-
-		found = reader.next(key, fragment);
-		assertTrue(found);
-		assertEquals(3, (int)fragment.getRead());
-
-		found = reader.next(key, fragment);
-		assertFalse(found);
-	}
-
-	public static void main(String args[]) {
-		org.junit.runner.JUnitCore.main(TestFastqInputFormat.class.getName());
-	}
+public class TestFastqInputFormat {
+
+  public static final String oneFastq =
+      "@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################";
+
+  public static final String twoFastq =
+      "@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################\n"
+          + "@ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1\n"
+          + "TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG\n"
+          + "+\n"
+          + "BDDCDBDD?A=?=:=7,7*@A;;53/53.:@>@@4=>@@@=?1?###############################################";
+
+  public static final String illuminaFastq =
+      "@EAS139:136:FC706VJ:2:5:1000:12850 1:Y:18:ATCACG\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "##########################################################################################~";
+
+  public static final String illuminaFastqWithPhred64Quality =
+      "@EAS139:136:FC706VJ:2:5:1000:12850 1:Y:18:ATCACG\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb";
+
+  public static final String oneFastqWithoutRead =
+      "@ERR020229.10880 HWI-ST168_161:1:1:1373:2042\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################";
+
+  public static final String fastqWithIdTwice =
+      "@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n"
+          + "###########################################################################################";
+
+  public static final String fastqWithAmpersandQuality =
+      "+lousy.id HWI-ST168_161:1:1:1373:2042/1\n"
+          + "@##########################################################################################\n"
+          + "@ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1\n"
+          + "###########################################################################################";
+
+  public static final String illuminaFastqNoFlowCellID =
+      "@EAS139:136::2:5:1000:12850 1:Y:18:ATCACG\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################";
+
+  public static final String illuminaFastqNegativeXYPos =
+      "@EAS139:136:FC706VJ:2:5:-1000:-12850 1:Y:18:ATCACG\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################";
+
+  public static final String illuminaFastqNoIndex =
+      "@EAS139:136::2:5:1000:12850 1:Y:18:\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################";
+
+  public static final String twoFastqWithIllumina =
+      "@EAS139:136:FC706VJ:2:5:1000:12850 1:Y:18:ATCACG\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################\n"
+          + "@EAS139:136:FC706VJ:2:5:1000:12850 2:N:18:ATCACG\n"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\n"
+          + "+\n"
+          + "###########################################################################################\n"
+          + "@EAS139:136:FC706VJ:2:5:1000:12850 3:N:18:ATCACG\n"
+          + "TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG\n"
+          + "+\n"
+          + "BDDCDBDD?A=?=:=7,7*@A;;53/53.:@>@@4=>@@@=?1?###############################################";
+
+  private JobConf conf;
+  private FileSplit split;
+  private File tempFastq;
+  private File tempGz;
+
+  private Text key;
+  private SequencedFragment fragment;
+
+  public static void main(String args[]) {
+    org.junit.runner.JUnitCore.main(TestFastqInputFormat.class.getName());
+  }
+
+  @Before
+  public void setup() throws IOException {
+    tempFastq = File.createTempFile("test_fastq_input_format", "fastq");
+    tempGz = File.createTempFile("test_fastq_input_format", ".gz");
+    conf = new JobConf();
+    key = new Text();
+    fragment = new SequencedFragment();
+  }
+
+  @After
+  public void tearDown() {
+    tempFastq.delete();
+    tempGz.delete();
+    split = null;
+  }
+
+  private void writeToTempFastq(String s) throws IOException {
+    PrintWriter fastqOut = new PrintWriter(new BufferedWriter(new FileWriter(tempFastq)));
+    fastqOut.write(s);
+    fastqOut.close();
+  }
+
+  private FastqRecordReader createReaderForOneFastq() throws IOException {
+    writeToTempFastq(oneFastq);
+    split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, oneFastq.length(), null);
+
+    return new FastqRecordReader(conf, split);
+  }
+
+  @Test
+  public void testReadFromStart() throws IOException {
+    FastqRecordReader reader = createReaderForOneFastq();
+
+    assertEquals(0, reader.getPos());
+    assertEquals(0.0, reader.getProgress(), 0.01);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
+    assertEquals(
+        "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
+        fragment.getSequence().toString());
+    assertEquals(
+        "###########################################################################################",
+        fragment.getQuality().toString());
+
+    assertEquals(oneFastq.length(), reader.getPos());
+    assertEquals(1.0, reader.getProgress(), 0.01);
+
+    retval = reader.next(key, fragment);
+    assertFalse(retval);
+  }
+
+  @Test
+  public void testReadStartInMiddle() throws IOException {
+    writeToTempFastq(twoFastq);
+    split = new FileSplit(new Path(tempFastq.toURI().toString()), 10, twoFastq.length() - 10, null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+
+    assertEquals(
+        oneFastq.length() + 1,
+        reader
+            .getPos()); // The start of the second record. We +1 for the \n that is not in oneFastq
+    assertEquals(0.0, reader.getProgress(), 0.01);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString());
+    assertEquals(
+        "TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
+        fragment.getSequence().toString());
+    assertEquals(
+        "BDDCDBDD?A=?=:=7,7*@A;;53/53.:@>@@4=>@@@=?1?###############################################",
+        fragment.getQuality().toString());
+
+    assertEquals(twoFastq.length(), reader.getPos()); // now should be at the end of the data
+    assertEquals(1.0, reader.getProgress(), 0.01);
+
+    retval = reader.next(key, fragment);
+    assertFalse(retval);
+  }
+
+  @Test
+  public void testSliceEndsBeforeEndOfFile() throws IOException {
+    writeToTempFastq(twoFastq);
+    // slice ends at position 10--i.e. somewhere in the first record.  The second record should not
+    // be read.
+    split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, 10, null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
+
+    assertFalse(
+        "FastqRecordReader is reading a record that starts after the end of the slice",
+        reader.next(key, fragment));
+  }
+
+  @Test
+  public void testGetReadNumFromName() throws IOException {
+    FastqRecordReader reader = createReaderForOneFastq();
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals(1, fragment.getRead().intValue());
+  }
+
+  @Test
+  public void testNameWithoutReadNum() throws IOException {
+    writeToTempFastq(oneFastqWithoutRead);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()), 0, oneFastqWithoutRead.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertNull("Read is not null", fragment.getRead());
+  }
+
+  @Test
+  public void testIlluminaMetaInfo() throws IOException {
+    writeToTempFastq(illuminaFastq);
+    split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, illuminaFastq.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+
+    assertEquals("EAS139", fragment.getInstrument());
+    assertEquals(136, fragment.getRunNumber().intValue());
+    assertEquals("FC706VJ", fragment.getFlowcellId());
+    assertEquals(2, fragment.getLane().intValue());
+    assertEquals(5, fragment.getTile().intValue());
+    assertEquals(1000, fragment.getXpos().intValue());
+    assertEquals(12850, fragment.getYpos().intValue());
+    assertEquals(1, fragment.getRead().intValue());
+    assertEquals(false, fragment.getFilterPassed().booleanValue());
+    assertEquals(18, fragment.getControlNumber().intValue());
+    assertEquals("ATCACG", fragment.getIndexSequence());
+  }
+
+  @Test
+  public void testIlluminaMetaInfoNullFC() throws IOException {
+    writeToTempFastq(illuminaFastqNoFlowCellID);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()), 0, illuminaFastqNoFlowCellID.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+
+    assertEquals("EAS139", fragment.getInstrument());
+    assertEquals(136, fragment.getRunNumber().intValue());
+    assertEquals("", fragment.getFlowcellId());
+    assertEquals(2, fragment.getLane().intValue());
+    assertEquals(5, fragment.getTile().intValue());
+    assertEquals(1000, fragment.getXpos().intValue());
+    assertEquals(12850, fragment.getYpos().intValue());
+    assertEquals(1, fragment.getRead().intValue());
+    assertEquals(false, fragment.getFilterPassed().booleanValue());
+    assertEquals(18, fragment.getControlNumber().intValue());
+    assertEquals("ATCACG", fragment.getIndexSequence());
+  }
+
+  @Test
+  public void testIlluminaMetaInfoNegativeXYpos() throws IOException {
+    writeToTempFastq(illuminaFastqNegativeXYPos);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()), 0, illuminaFastqNegativeXYPos.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+
+    assertEquals("EAS139", fragment.getInstrument());
+    assertEquals(136, fragment.getRunNumber().intValue());
+    assertEquals("FC706VJ", fragment.getFlowcellId());
+    assertEquals(2, fragment.getLane().intValue());
+    assertEquals(5, fragment.getTile().intValue());
+    assertEquals(-1000, fragment.getXpos().intValue());
+    assertEquals(-12850, fragment.getYpos().intValue());
+    assertEquals(1, fragment.getRead().intValue());
+    assertEquals(false, fragment.getFilterPassed().booleanValue());
+    assertEquals(18, fragment.getControlNumber().intValue());
+    assertEquals("ATCACG", fragment.getIndexSequence());
+  }
+
+  @Test
+  public void testOneIlluminaThenNot() throws IOException {
+    writeToTempFastq(illuminaFastq + "\n" + oneFastq);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()),
+            0,
+            illuminaFastq.length() + oneFastq.length() + 1,
+            null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+
+    assertTrue(reader.next(key, fragment));
+    assertEquals("EAS139", fragment.getInstrument());
+
+    assertTrue(reader.next(key, fragment));
+    assertNull(fragment.getInstrument());
+
+    assertFalse(reader.next(key, fragment));
+  }
+
+  @Test
+  public void testOneNotThenIllumina() throws IOException {
+    writeToTempFastq(oneFastq + "\n" + illuminaFastq);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()),
+            0,
+            illuminaFastq.length() + oneFastq.length() + 1,
+            null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+
+    assertTrue(reader.next(key, fragment));
+    assertNull(fragment.getInstrument());
+
+    assertTrue(reader.next(key, fragment));
+    assertNull(fragment.getInstrument());
+
+    assertFalse(reader.next(key, fragment));
+  }
+
+  @Test
+  public void testProgress() throws IOException {
+    writeToTempFastq(twoFastq);
+    split = new FileSplit(new Path(tempFastq.toURI().toString()), 0, twoFastq.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    assertEquals(0.0, reader.getProgress(), 0.01);
+
+    reader.next(key, fragment);
+    assertEquals(0.5, reader.getProgress(), 0.01);
+
+    reader.next(key, fragment);
+    assertEquals(1.0, reader.getProgress(), 0.01);
+  }
+
+  @Test
+  public void testCreateKey() throws IOException {
+    FastqRecordReader reader = createReaderForOneFastq();
+    assertTrue(reader.createKey() instanceof Text);
+  }
+
+  @Test
+  public void testCreateValue() throws IOException {
+    FastqRecordReader reader = createReaderForOneFastq();
+    assertTrue(reader.createValue() instanceof SequencedFragment);
+  }
+
+  @Test
+  public void testClose() throws IOException {
+    FastqRecordReader reader = createReaderForOneFastq();
+    // doesn't really do anything but exercise the code
+    reader.close();
+  }
+
+  @Test
+  public void testReadFastqWithIdTwice() throws IOException {
+    writeToTempFastq(fastqWithIdTwice);
+    split =
+        new FileSplit(new Path(tempFastq.toURI().toString()), 0, fastqWithIdTwice.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
+    assertEquals(
+        "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
+        fragment.getSequence().toString());
+    assertEquals(
+        "###########################################################################################",
+        fragment.getQuality().toString());
+
+    retval = reader.next(key, fragment);
+    assertFalse(retval);
+  }
+
+  @Test
+  public void testReadFastqWithAmpersandQuality() throws IOException {
+    writeToTempFastq(fastqWithAmpersandQuality);
+    // split doesn't start at 0, forcing reader to advance looking for first complete record
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()), 3, fastqWithAmpersandQuality.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
+    assertEquals(
+        "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
+        fragment.getSequence().toString());
+    assertEquals(
+        "###########################################################################################",
+        fragment.getQuality().toString());
+
+    retval = reader.next(key, fragment);
+    assertFalse(retval);
+  }
+
+  @Test
+  public void testMakePositionMessage() throws IOException {
+    writeToTempFastq(fastqWithIdTwice);
+    split =
+        new FileSplit(new Path(tempFastq.toURI().toString()), 0, fastqWithIdTwice.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    assertNotNull(reader.makePositionMessage());
+  }
+
+  @Test
+  public void testFastqWithIlluminaEncoding() throws IOException {
+    conf.set("hbam.fastq-input.base-quality-encoding", "illumina");
+    verifyInputQualityConfig();
+  }
+
+  @Test
+  public void testFastqWithIlluminaEncodingAndGenericInputConfig() throws IOException {
+    conf.set("hbam.input.base-quality-encoding", "illumina");
+    verifyInputQualityConfig();
+  }
+
+  private void verifyInputQualityConfig() throws IOException {
+    writeToTempFastq(illuminaFastqWithPhred64Quality);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()),
+            0,
+            illuminaFastqWithPhred64Quality.length(),
+            null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+    assertEquals(
+        "CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC",
+        fragment.getQuality().toString());
+  }
+
+  @Test
+  public void testGzCompressedInput() throws IOException {
+    // write gzip-compressed data
+    GzipCodec codec = new GzipCodec();
+    PrintWriter fastqOut =
+        new PrintWriter(
+            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
+    fastqOut.write(twoFastq);
+    fastqOut.close();
+
+    // now try to read it
+    split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoFastq.length(), null);
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229.10880 HWI-ST168_161:1:1:1373:2042/1", key.toString());
+    assertEquals(
+        "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
+        fragment.getSequence().toString());
+
+    retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229.10883 HWI-ST168_161:1:1:1796:2044/1", key.toString());
+    assertEquals(
+        "TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
+        fragment.getSequence().toString());
+  }
+
+  @Test(expected = RuntimeException.class)
+  public void testCompressedSplit() throws IOException {
+    // write gzip-compressed data
+    GzipCodec codec = new GzipCodec();
+    PrintWriter fastqOut =
+        new PrintWriter(
+            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
+    fastqOut.write(twoFastq);
+    fastqOut.close();
+
+    // now try to read it starting from the middle
+    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoFastq.length(), null);
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+  }
+
+  @Test
+  public void testIlluminaNoIndex() throws IOException {
+    writeToTempFastq(illuminaFastqNoIndex);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()), 0, illuminaFastqNoIndex.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+
+    // ensure all meta-data was picked up
+    assertEquals("EAS139", fragment.getInstrument());
+    assertEquals(136, fragment.getRunNumber().intValue());
+    // now verify the index
+    assertEquals("", fragment.getIndexSequence());
+  }
+
+  @Test
+  public void testSkipFailedQC() throws IOException {
+    conf.set("hbam.fastq-input.filter-failed-qc", "true");
+    verifySkipFailedQC();
+  }
+
+  @Test
+  public void testSkipFailedQCGenericConfig() throws IOException {
+    conf.set("hbam.input.filter-failed-qc", "true");
+    verifySkipFailedQC();
+  }
+
+  private void verifySkipFailedQC() throws IOException {
+    writeToTempFastq(twoFastqWithIllumina);
+    split =
+        new FileSplit(
+            new Path(tempFastq.toURI().toString()), 0, twoFastqWithIllumina.length(), null);
+
+    FastqRecordReader reader = new FastqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+    assertEquals(2, (int) fragment.getRead());
+
+    found = reader.next(key, fragment);
+    assertTrue(found);
+    assertEquals(3, (int) fragment.getRead());
+
+    found = reader.next(key, fragment);
+    assertFalse(found);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestFastqOutputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestFastqOutputFormat.java
index c57085d..ed6543e 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestFastqOutputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestFastqOutputFormat.java
@@ -22,185 +22,170 @@
 
 package org.seqdoop.hadoop_bam;
 
-import org.seqdoop.hadoop_bam.FastqOutputFormat.FastqRecordWriter;
-import org.seqdoop.hadoop_bam.SequencedFragment;
-
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-
-import java.io.DataOutputStream;
-import java.io.ByteArrayOutputStream;
-
-import org.junit.*;
 import static org.junit.Assert.*;
 
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
+import org.junit.*;
+import org.seqdoop.hadoop_bam.FastqOutputFormat.FastqRecordWriter;
 
-public class TestFastqOutputFormat
-{
-	private SequencedFragment fragment;
-
-	private ByteArrayOutputStream outputBuffer;
-	private DataOutputStream dataOutput;
-	private FastqRecordWriter writer;
-
-	@Before
-	public void setup() throws IOException
-	{
-		fragment = new SequencedFragment();
-		fragment.setInstrument("instrument");
-		fragment.setRunNumber(1);
-		fragment.setFlowcellId("xyz");
-		fragment.setLane(2);
-		fragment.setTile(1001);
-		fragment.setXpos(10000);
-		fragment.setYpos(9999);
-		fragment.setRead(1);
-		fragment.setFilterPassed(true);
-		fragment.setControlNumber(33);
-		fragment.setIndexSequence("CATCAT");
-		fragment.setSequence(new Text("AAAAAAAAAA"));
-		fragment.setQuality(new Text("##########"));
-
-		outputBuffer = new ByteArrayOutputStream();
-		dataOutput = new DataOutputStream(outputBuffer);
-		writer = new FastqRecordWriter(new Configuration(), dataOutput);
-	}
-
-	@Test
-	public void testSimple() throws IOException
-	{
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
-		assertEquals(4, lines.length);
-
-		String idLine = lines[0];
-		assertTrue(idLine.startsWith("@"));
-
-		compareMetadata(fragment, idLine);
-
-		assertEquals(fragment.getSequence().toString(), lines[1]);
-		assertEquals("+", lines[2]);
-		assertEquals(fragment.getQuality().toString(), lines[3]);
-	}
-
-	@Test
-	public void testNullControlNumber() throws IOException
-	{
-		fragment.setControlNumber(null);
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
-		assertEquals(4, lines.length);
-
-		String idLine = lines[0];
-		assertTrue(idLine.startsWith("@"));
-
-		fragment.setControlNumber(0); // when null 0 should be written
-		compareMetadata(fragment, idLine);
-	}
-
-	@Test
-	public void testNullFilter() throws IOException
-	{
-		fragment.setFilterPassed(null);
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
-		assertEquals(4, lines.length);
-
-		String idLine = lines[0];
-		assertTrue(idLine.startsWith("@"));
-
-		fragment.setFilterPassed(true); // when filter not available then it passes
-		compareMetadata(fragment, idLine);
-	}
-
-	@Test
-	public void testCustomId() throws IOException
-	{
-		String customKey = "hello";
-		writer.write(new Text(customKey), fragment);
-		writer.close(null);
-
-		String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
-		assertEquals(4, lines.length);
-
-		String idLine = lines[0];
-		assertTrue(idLine.startsWith("@"));
-		assertEquals(customKey, idLine.substring(1));
-	}
-
-	@Test
-	public void testBaseQualitiesInIllumina() throws IOException
-	{
-		Configuration conf = new Configuration();
-		conf.set("hbam.fastq-output.base-quality-encoding", "illumina");
-		writer.setConf(conf);
-
-		// ensure sanger qualities are converted to illumina
-		String seq = "AAAAAAAAAA";
-		String qual = "##########";
-
-		fragment.setSequence(new Text(seq));
-		fragment.setQuality(new Text(qual));
-
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
-		assertEquals(qual.replace("#", "B"), lines[3]);
-	}
-
-	@Test
-	public void testConfigureOutputInSanger() throws IOException
-	{
-		Configuration conf = new Configuration();
-		conf.set("hbam.fastq-output.base-quality-encoding", "sanger");
-		writer.setConf(conf);
-		testSimple();
-	}
-
-	@Test(expected=RuntimeException.class)
-	public void testBadConfig() throws IOException
-	{
-		Configuration conf = new Configuration();
-		conf.set("hbam.fastq-output.base-quality-encoding", "blalbal");
-		writer.setConf(conf);
-	}
-
-	@Test
-	public void testClose() throws IOException
-	{
-		// doesn't really do anything but exercise the code
-		writer.close(null);
-	}
-
-	private static void compareMetadata(SequencedFragment fragment, String idLine)
-	{
-		idLine = idLine.substring(1);
-		String[] pieces = idLine.split(" ")[0].split(":"); // first part: location on flowcell
-		assertEquals(fragment.getInstrument(), pieces[0]);
-		assertEquals(fragment.getRunNumber().toString(), pieces[1]);
-		assertEquals(fragment.getFlowcellId(), pieces[2]);
-		assertEquals(fragment.getLane().toString(), pieces[3]);
-		assertEquals(fragment.getTile().toString(), pieces[4]);
-		assertEquals(fragment.getXpos().toString(), pieces[5]);
-		assertEquals(fragment.getYpos().toString(), pieces[6]);
-
-		pieces = idLine.split(" ")[1].split(":"); // second part
-		assertEquals(fragment.getRead().toString(), pieces[0]);
-		assertEquals(fragment.getFilterPassed() ? "N" : "Y", pieces[1]);
-		assertEquals(fragment.getControlNumber().toString(), pieces[2]);
-		assertEquals(fragment.getIndexSequence().toString(), pieces[3]);
-	}
-
-	public static void main(String args[]) {
-		org.junit.runner.JUnitCore.main(TestFastqOutputFormat.class.getName());
-	}
+public class TestFastqOutputFormat {
+
+  private SequencedFragment fragment;
+
+  private ByteArrayOutputStream outputBuffer;
+  private DataOutputStream dataOutput;
+  private FastqRecordWriter writer;
+
+  private static void compareMetadata(SequencedFragment fragment, String idLine) {
+    idLine = idLine.substring(1);
+    String[] pieces = idLine.split(" ")[0].split(":"); // first part: location on flowcell
+    assertEquals(fragment.getInstrument(), pieces[0]);
+    assertEquals(fragment.getRunNumber().toString(), pieces[1]);
+    assertEquals(fragment.getFlowcellId(), pieces[2]);
+    assertEquals(fragment.getLane().toString(), pieces[3]);
+    assertEquals(fragment.getTile().toString(), pieces[4]);
+    assertEquals(fragment.getXpos().toString(), pieces[5]);
+    assertEquals(fragment.getYpos().toString(), pieces[6]);
+
+    pieces = idLine.split(" ")[1].split(":"); // second part
+    assertEquals(fragment.getRead().toString(), pieces[0]);
+    assertEquals(fragment.getFilterPassed() ? "N" : "Y", pieces[1]);
+    assertEquals(fragment.getControlNumber().toString(), pieces[2]);
+    assertEquals(fragment.getIndexSequence().toString(), pieces[3]);
+  }
+
+  public static void main(String args[]) {
+    org.junit.runner.JUnitCore.main(TestFastqOutputFormat.class.getName());
+  }
+
+  @Before
+  public void setup() throws IOException {
+    fragment = new SequencedFragment();
+    fragment.setInstrument("instrument");
+    fragment.setRunNumber(1);
+    fragment.setFlowcellId("xyz");
+    fragment.setLane(2);
+    fragment.setTile(1001);
+    fragment.setXpos(10000);
+    fragment.setYpos(9999);
+    fragment.setRead(1);
+    fragment.setFilterPassed(true);
+    fragment.setControlNumber(33);
+    fragment.setIndexSequence("CATCAT");
+    fragment.setSequence(new Text("AAAAAAAAAA"));
+    fragment.setQuality(new Text("##########"));
+
+    outputBuffer = new ByteArrayOutputStream();
+    dataOutput = new DataOutputStream(outputBuffer);
+    writer = new FastqRecordWriter(new Configuration(), dataOutput);
+  }
+
+  @Test
+  public void testSimple() throws IOException {
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
+    assertEquals(4, lines.length);
+
+    String idLine = lines[0];
+    assertTrue(idLine.startsWith("@"));
+
+    compareMetadata(fragment, idLine);
+
+    assertEquals(fragment.getSequence().toString(), lines[1]);
+    assertEquals("+", lines[2]);
+    assertEquals(fragment.getQuality().toString(), lines[3]);
+  }
+
+  @Test
+  public void testNullControlNumber() throws IOException {
+    fragment.setControlNumber(null);
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
+    assertEquals(4, lines.length);
+
+    String idLine = lines[0];
+    assertTrue(idLine.startsWith("@"));
+
+    fragment.setControlNumber(0); // when null 0 should be written
+    compareMetadata(fragment, idLine);
+  }
+
+  @Test
+  public void testNullFilter() throws IOException {
+    fragment.setFilterPassed(null);
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
+    assertEquals(4, lines.length);
+
+    String idLine = lines[0];
+    assertTrue(idLine.startsWith("@"));
+
+    fragment.setFilterPassed(true); // when filter not available then it passes
+    compareMetadata(fragment, idLine);
+  }
+
+  @Test
+  public void testCustomId() throws IOException {
+    String customKey = "hello";
+    writer.write(new Text(customKey), fragment);
+    writer.close(null);
+
+    String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
+    assertEquals(4, lines.length);
+
+    String idLine = lines[0];
+    assertTrue(idLine.startsWith("@"));
+    assertEquals(customKey, idLine.substring(1));
+  }
+
+  @Test
+  public void testBaseQualitiesInIllumina() throws IOException {
+    Configuration conf = new Configuration();
+    conf.set("hbam.fastq-output.base-quality-encoding", "illumina");
+    writer.setConf(conf);
+
+    // ensure sanger qualities are converted to illumina
+    String seq = "AAAAAAAAAA";
+    String qual = "##########";
+
+    fragment.setSequence(new Text(seq));
+    fragment.setQuality(new Text(qual));
+
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] lines = new String(outputBuffer.toByteArray(), "US-ASCII").split("\n");
+    assertEquals(qual.replace("#", "B"), lines[3]);
+  }
+
+  @Test
+  public void testConfigureOutputInSanger() throws IOException {
+    Configuration conf = new Configuration();
+    conf.set("hbam.fastq-output.base-quality-encoding", "sanger");
+    writer.setConf(conf);
+    testSimple();
+  }
+
+  @Test(expected = RuntimeException.class)
+  public void testBadConfig() throws IOException {
+    Configuration conf = new Configuration();
+    conf.set("hbam.fastq-output.base-quality-encoding", "blalbal");
+    writer.setConf(conf);
+  }
+
+  @Test
+  public void testClose() throws IOException {
+    // doesn't really do anything but exercise the code
+    writer.close(null);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestLineReader.java b/src/test/java/org/seqdoop/hadoop_bam/TestLineReader.java
index 6924991..6e94a83 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestLineReader.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestLineReader.java
@@ -22,69 +22,60 @@
 
 package org.seqdoop.hadoop_bam;
 
-import org.junit.*;
 import static org.junit.Assert.*;
 
-import org.seqdoop.hadoop_bam.LineReader;
-
-import org.apache.hadoop.io.Text;
-
 import java.io.ByteArrayInputStream;
 import java.io.IOException;
+import org.apache.hadoop.io.Text;
+import org.junit.*;
 
-public class TestLineReader
-{
-	public static final String input10 = "0123456789";
-	public static final String input22 = "0123456789\n0987654321\n";
+public class TestLineReader {
 
-	private LineReader reader;
-	private Text dest = new Text();
+  public static final String input10 = "0123456789";
+  public static final String input22 = "0123456789\n0987654321\n";
 
-	@Test
-	public void testReadBufferedLine() throws IOException
-	{
-		reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 22);
-		reader.readLine(dest);
-		assertEquals("0123456789", dest.toString());
-	}
+  private LineReader reader;
+  private Text dest = new Text();
 
-	@Test
-	public void testSkipOnBufferedLine() throws IOException
-	{
-		reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 22);
-		long skipped = reader.skip(1);
-		assertEquals(1, skipped);
-		reader.readLine(dest);
-		assertEquals("123456789", dest.toString());
-	}
+  @Test
+  public void testReadBufferedLine() throws IOException {
+    reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 22);
+    reader.readLine(dest);
+    assertEquals("0123456789", dest.toString());
+  }
 
-	@Test
-	public void testReadBeyondBuffer() throws IOException
-	{
-		reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 5);
-		reader.readLine(dest);
-		assertEquals("0123456789", dest.toString());
-	}
+  @Test
+  public void testSkipOnBufferedLine() throws IOException {
+    reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 22);
+    long skipped = reader.skip(1);
+    assertEquals(1, skipped);
+    reader.readLine(dest);
+    assertEquals("123456789", dest.toString());
+  }
 
-	@Test
-	public void testSkipBeyondBuffer() throws IOException
-	{
-		reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 5);
-		long skipped = reader.skip(11);
-		assertEquals(11, skipped);
-		reader.readLine(dest);
-		assertEquals("0987654321", dest.toString());
-	}
+  @Test
+  public void testReadBeyondBuffer() throws IOException {
+    reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 5);
+    reader.readLine(dest);
+    assertEquals("0123456789", dest.toString());
+  }
 
-	@Test
-	public void testSkipBeyondInput() throws IOException
-	{
-		reader = new LineReader(new ByteArrayInputStream(input10.getBytes()), 5);
-		long skipped = reader.skip(11);
-		assertEquals(10, skipped);
+  @Test
+  public void testSkipBeyondBuffer() throws IOException {
+    reader = new LineReader(new ByteArrayInputStream(input22.getBytes()), 5);
+    long skipped = reader.skip(11);
+    assertEquals(11, skipped);
+    reader.readLine(dest);
+    assertEquals("0987654321", dest.toString());
+  }
 
-		skipped = reader.skip(11);
-		assertEquals(0, skipped);
-	}
+  @Test
+  public void testSkipBeyondInput() throws IOException {
+    reader = new LineReader(new ByteArrayInputStream(input10.getBytes()), 5);
+    long skipped = reader.skip(11);
+    assertEquals(10, skipped);
 
+    skipped = reader.skip(11);
+    assertEquals(0, skipped);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestQseqInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestQseqInputFormat.java
index 359a1e2..50ed517 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestQseqInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestQseqInputFormat.java
@@ -22,363 +22,359 @@
 
 package org.seqdoop.hadoop_bam;
 
-import org.seqdoop.hadoop_bam.QseqInputFormat.QseqRecordReader;
-import org.seqdoop.hadoop_bam.SequencedFragment;
-import org.seqdoop.hadoop_bam.FormatException;
+import static org.junit.Assert.*;
 
+import java.io.BufferedOutputStream;
+import java.io.BufferedWriter;
 import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.PrintWriter;
-import java.io.BufferedWriter;
-import java.io.FileWriter;
-import java.io.FileOutputStream;
-import java.io.BufferedOutputStream;
-
-import org.junit.*;
-import static org.junit.Assert.*;
-
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FSDataInputStream;
 import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.compress.GzipCodec;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.compress.GzipCodec;
+import org.apache.hadoop.mapred.JobConf;
 import org.apache.hadoop.mapreduce.lib.input.FileSplit;
+import org.junit.*;
+import org.seqdoop.hadoop_bam.QseqInputFormat.QseqRecordReader;
 
-public class TestQseqInputFormat
-{
-	public static final String oneQseq =
-		"ERR020229	10880	1	1	1373	2042	0	1	" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t" +
-		"BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	1";
-
-	public static final String twoQseq =
-		"ERR020229	10880	1	1	1373	2042	0	1	" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t" +
-		"BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	0\n" +
-		"ERR020229	10883	1	1	1796	2044	0	2	" +
-		"TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG\t" +
-		"DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD	1";
-
-	public static final String illuminaQseq =
-		"EAS139	136	2	5	1000	12850	ATCACG	1	" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t" +
-		"BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	0";
-
-	public static final String nQseq =
-		"ERR020229	10880	1	1	1373	2042	0	1	" +
-		"...........................................................................................\t" +
-		"BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	0";
-
-
-	public static final String sangerQseq =
-		"EAS139	136	2	5	1000	12850	ATCACG	1	" +
-		"TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t" +
-		"###########################################################################################	0";
-
-	public static final String indexWithUnknown =
-		"EAS139	136	2	5	1000	12850	ATC..G	1	" +
-		"TTGGATGATAGGGATTATTTGACTCGAATAT\t" +
-		"BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\t0";
-
-	private JobConf conf;
-	private FileSplit split;
-	private File tempQseq;
-	private File tempGz;
-
-	private Text key;
-	private SequencedFragment fragment;
-
-	@Before
-	public void setup() throws IOException
-	{
-		tempQseq = File.createTempFile("test_qseq_input_format", "qseq");
-		tempGz = File.createTempFile("test_qseq_input_format", ".gz");
-		conf = new JobConf();
-		key = new Text();
-		fragment = new SequencedFragment();
-	}
-
-	@After
-	public void tearDown()
-	{
-		tempQseq.delete();
-		tempGz.delete();
-		split = null;
-	}
-
-	private void writeToTempQseq(String s) throws IOException
-	{
-		PrintWriter qseqOut = new PrintWriter( new BufferedWriter( new FileWriter(tempQseq) ) );
-		qseqOut.write(s);
-		qseqOut.close();
-	}
-
-	private QseqRecordReader createReaderForOneQseq() throws IOException
-	{
-		writeToTempQseq(oneQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, oneQseq.length(), null);
-
-		return new QseqRecordReader(conf, split);
-	}
-
-	@Test
-	public void testReadFromStart() throws IOException
-	{
-		QseqRecordReader reader = createReaderForOneQseq();
-
-		assertEquals(0, reader.getPos());
-		assertEquals(0.0, reader.getProgress(), 0.01);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-//System.err.println("in testReadFromStart quality: " + fragment.getQuality().toString());
-		assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
-		assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
-		assertEquals("###########################################################################################", fragment.getQuality().toString());
-
-		assertEquals(oneQseq.length(), reader.getPos());
-		assertEquals(1.0, reader.getProgress(), 0.01);
-
-		retval = reader.next(key, fragment);
-		assertFalse(retval);
-	}
-
-	@Test
-	public void testReadStartInMiddle() throws IOException
-	{
-		writeToTempQseq(twoQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 10, twoQseq.length() - 10, null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-
-		assertEquals(oneQseq.length() + 1, reader.getPos()); // The start of the second record. We +1 for the \n that is not in oneQseq
-		assertEquals(0.0, reader.getProgress(), 0.01);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
-		assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
-		assertEquals("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%", fragment.getQuality().toString());
-
-		assertEquals(twoQseq.length(), reader.getPos()); // now should be at the end of the data
-		assertEquals(1.0, reader.getProgress(), 0.01);
-
-		retval = reader.next(key, fragment);
-		assertFalse(retval);
-	}
-
-	@Test
-	public void testSliceEndsBeforeEndOfFile() throws IOException
-	{
-		writeToTempQseq(twoQseq);
-		// slice ends at position 10--i.e. somewhere in the first record.  The second record should not be read.
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, 10, null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
-
-		assertFalse("QseqRecordReader is reading a record that starts after the end of the slice", reader.next(key, fragment));
-	}
-
-	@Test
-	public void testIlluminaMetaInfo() throws IOException
-	{
-		writeToTempQseq(illuminaQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, illuminaQseq.length(), null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-
-		assertEquals("EAS139", fragment.getInstrument());
-		assertEquals(136, fragment.getRunNumber().intValue());
-		assertNull("flowcell id not null", fragment.getFlowcellId());
-		assertEquals(2, fragment.getLane().intValue());
-		assertEquals(5, fragment.getTile().intValue());
-		assertEquals(1000, fragment.getXpos().intValue());
-		assertEquals(12850, fragment.getYpos().intValue());
-		assertEquals(1, fragment.getRead().intValue());
-		assertEquals(false, fragment.getFilterPassed().booleanValue());
-		assertNull("control number not null", fragment.getControlNumber());
-		assertEquals("ATCACG", fragment.getIndexSequence());
-	}
-
-	@Test
-	public void testNs() throws IOException
-	{
-		writeToTempQseq(nQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, nQseq.length(), null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-		assertEquals("NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN", fragment.getSequence().toString());
-	}
-
-	@Test
-	public void testConvertDotInIndexSequence() throws IOException
-	{
-		writeToTempQseq(indexWithUnknown);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, indexWithUnknown.length(), null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-		assertEquals("ATCNNG", fragment.getIndexSequence());
-	}
-
-	@Test(expected=FormatException.class)
-	public void testSangerQualities() throws IOException
-	{
-		writeToTempQseq(sangerQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, sangerQseq.length(), null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-		reader.next(key, fragment);
-	}
-
-	@Test
-	public void testConfigureForSangerQualities() throws IOException
-	{
-		conf.set("hbam.qseq-input.base-quality-encoding", "sanger");
-		qualityConfigTest();
-	}
-
-	@Test
-	public void testGenericInputConfigureForSangerQualities() throws IOException
-	{
-		conf.set("hbam.input.base-quality-encoding", "sanger");
-		qualityConfigTest();
-	}
-
-	private void qualityConfigTest() throws IOException
-	{
-		writeToTempQseq(sangerQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, sangerQseq.length(), null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-		assertTrue(reader.next(key, fragment));
-		assertEquals("###########################################################################################", fragment.getQuality().toString());
-	}
-
-	@Test
-	public void testProgress() throws IOException
-	{
-		writeToTempQseq(twoQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, twoQseq.length(), null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-		assertEquals(0.0, reader.getProgress(), 0.01);
-
-		reader.next(key, fragment);
-		assertEquals(0.5, reader.getProgress(), 0.01);
-
-		reader.next(key, fragment);
-		assertEquals(1.0, reader.getProgress(), 0.01);
-	}
-
-	@Test
-	public void testCreateKey() throws IOException
-	{
-		QseqRecordReader reader = createReaderForOneQseq();
-		assertTrue(reader.createKey() instanceof Text);
-	}
-
-	@Test
-	public void testCreateValue() throws IOException
-	{
-		QseqRecordReader reader = createReaderForOneQseq();
-		assertTrue(reader.createValue() instanceof SequencedFragment);
-	}
-
-	@Test
-	public void testClose() throws IOException
-	{
-		QseqRecordReader reader = createReaderForOneQseq();
-		// doesn't really do anything but exercise the code
-		reader.close();
-	}
-
-	@Test
-	public void testMakePositionMessage() throws IOException
-	{
-		writeToTempQseq(twoQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 10, twoQseq.length() - 10, null);
-
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-		assertNotNull(reader.makePositionMessage());
-	}
-
-	@Test
-	public void testGzCompressedInput() throws IOException
-	{
-		// write gzip-compressed data
-		GzipCodec codec = new GzipCodec();
-		PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
-		qseqOut.write(twoQseq);
-		qseqOut.close();
-
-		// now try to read it
-		split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null);
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-
-		boolean retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
-		assertEquals("TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT", fragment.getSequence().toString());
-
-		retval = reader.next(key, fragment);
-		assertTrue(retval);
-		assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
-		assertEquals("TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG", fragment.getSequence().toString());
-	}
-
-	@Test(expected=RuntimeException.class)
-	public void testCompressedSplit() throws IOException
-	{
-		// write gzip-compressed data
-		GzipCodec codec = new GzipCodec();
-		PrintWriter qseqOut = new PrintWriter( new BufferedOutputStream( codec.createOutputStream( new FileOutputStream(tempGz) ) ) );
-		qseqOut.write(twoQseq);
-		qseqOut.close();
-
-		// now try to read it starting from the middle
-		split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null);
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-	}
-	@Test
-	public void testSkipFailedQC() throws IOException
-	{
-		conf.set("hbam.qseq-input.filter-failed-qc", "t");
-		verifySkipFailedQC();
-	}
-
-	@Test
-	public void testSkipFailedQCGenericConfig() throws IOException
-	{
-		conf.set("hbam.input.filter-failed-qc", "t");
-		verifySkipFailedQC();
-	}
-
-	private void verifySkipFailedQC() throws IOException
-	{
-		writeToTempQseq(twoQseq);
-		split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, twoQseq.length(), null);
-		QseqRecordReader reader = new QseqRecordReader(conf, split);
-
-		boolean found = reader.next(key, fragment);
-		assertTrue(found);
-		assertEquals(2, (int)fragment.getRead());
-
-		found = reader.next(key, fragment);
-		assertFalse(found);
-	}
-
-	public static void main(String args[]) {
-		org.junit.runner.JUnitCore.main(TestQseqInputFormat.class.getName());
-	}
+public class TestQseqInputFormat {
+
+  public static final String oneQseq =
+      "ERR020229	10880	1	1	1373	2042	0	1	"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t"
+          + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	1";
+
+  public static final String twoQseq =
+      "ERR020229	10880	1	1	1373	2042	0	1	"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t"
+          + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	0\n"
+          + "ERR020229	10883	1	1	1796	2044	0	2	"
+          + "TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG\t"
+          + "DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD	1";
+
+  public static final String illuminaQseq =
+      "EAS139	136	2	5	1000	12850	ATCACG	1	"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t"
+          + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	0";
+
+  public static final String nQseq =
+      "ERR020229	10880	1	1	1373	2042	0	1	"
+          + "...........................................................................................\t"
+          + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB	0";
+
+  public static final String sangerQseq =
+      "EAS139	136	2	5	1000	12850	ATCACG	1	"
+          + "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT\t"
+          + "###########################################################################################	0";
+
+  public static final String indexWithUnknown =
+      "EAS139	136	2	5	1000	12850	ATC..G	1	"
+          + "TTGGATGATAGGGATTATTTGACTCGAATAT\t"
+          + "BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB\t0";
+
+  private JobConf conf;
+  private FileSplit split;
+  private File tempQseq;
+  private File tempGz;
+
+  private Text key;
+  private SequencedFragment fragment;
+
+  public static void main(String args[]) {
+    org.junit.runner.JUnitCore.main(TestQseqInputFormat.class.getName());
+  }
+
+  @Before
+  public void setup() throws IOException {
+    tempQseq = File.createTempFile("test_qseq_input_format", "qseq");
+    tempGz = File.createTempFile("test_qseq_input_format", ".gz");
+    conf = new JobConf();
+    key = new Text();
+    fragment = new SequencedFragment();
+  }
+
+  @After
+  public void tearDown() {
+    tempQseq.delete();
+    tempGz.delete();
+    split = null;
+  }
+
+  private void writeToTempQseq(String s) throws IOException {
+    PrintWriter qseqOut = new PrintWriter(new BufferedWriter(new FileWriter(tempQseq)));
+    qseqOut.write(s);
+    qseqOut.close();
+  }
+
+  private QseqRecordReader createReaderForOneQseq() throws IOException {
+    writeToTempQseq(oneQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, oneQseq.length(), null);
+
+    return new QseqRecordReader(conf, split);
+  }
+
+  @Test
+  public void testReadFromStart() throws IOException {
+    QseqRecordReader reader = createReaderForOneQseq();
+
+    assertEquals(0, reader.getPos());
+    assertEquals(0.0, reader.getProgress(), 0.01);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    // System.err.println("in testReadFromStart quality: " + fragment.getQuality().toString());
+    assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
+    assertEquals(
+        "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
+        fragment.getSequence().toString());
+    assertEquals(
+        "###########################################################################################",
+        fragment.getQuality().toString());
+
+    assertEquals(oneQseq.length(), reader.getPos());
+    assertEquals(1.0, reader.getProgress(), 0.01);
+
+    retval = reader.next(key, fragment);
+    assertFalse(retval);
+  }
+
+  @Test
+  public void testReadStartInMiddle() throws IOException {
+    writeToTempQseq(twoQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 10, twoQseq.length() - 10, null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+
+    assertEquals(
+        oneQseq.length() + 1,
+        reader.getPos()); // The start of the second record. We +1 for the \n that is not in oneQseq
+    assertEquals(0.0, reader.getProgress(), 0.01);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
+    assertEquals(
+        "TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
+        fragment.getSequence().toString());
+    assertEquals(
+        "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%",
+        fragment.getQuality().toString());
+
+    assertEquals(twoQseq.length(), reader.getPos()); // now should be at the end of the data
+    assertEquals(1.0, reader.getProgress(), 0.01);
+
+    retval = reader.next(key, fragment);
+    assertFalse(retval);
+  }
+
+  @Test
+  public void testSliceEndsBeforeEndOfFile() throws IOException {
+    writeToTempQseq(twoQseq);
+    // slice ends at position 10--i.e. somewhere in the first record.  The second record should not
+    // be read.
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, 10, null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
+
+    assertFalse(
+        "QseqRecordReader is reading a record that starts after the end of the slice",
+        reader.next(key, fragment));
+  }
+
+  @Test
+  public void testIlluminaMetaInfo() throws IOException {
+    writeToTempQseq(illuminaQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, illuminaQseq.length(), null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+
+    assertEquals("EAS139", fragment.getInstrument());
+    assertEquals(136, fragment.getRunNumber().intValue());
+    assertNull("flowcell id not null", fragment.getFlowcellId());
+    assertEquals(2, fragment.getLane().intValue());
+    assertEquals(5, fragment.getTile().intValue());
+    assertEquals(1000, fragment.getXpos().intValue());
+    assertEquals(12850, fragment.getYpos().intValue());
+    assertEquals(1, fragment.getRead().intValue());
+    assertEquals(false, fragment.getFilterPassed().booleanValue());
+    assertNull("control number not null", fragment.getControlNumber());
+    assertEquals("ATCACG", fragment.getIndexSequence());
+  }
+
+  @Test
+  public void testNs() throws IOException {
+    writeToTempQseq(nQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, nQseq.length(), null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+    assertEquals(
+        "NNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNNN",
+        fragment.getSequence().toString());
+  }
+
+  @Test
+  public void testConvertDotInIndexSequence() throws IOException {
+    writeToTempQseq(indexWithUnknown);
+    split =
+        new FileSplit(new Path(tempQseq.toURI().toString()), 0, indexWithUnknown.length(), null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+    assertEquals("ATCNNG", fragment.getIndexSequence());
+  }
+
+  @Test(expected = FormatException.class)
+  public void testSangerQualities() throws IOException {
+    writeToTempQseq(sangerQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, sangerQseq.length(), null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+    reader.next(key, fragment);
+  }
+
+  @Test
+  public void testConfigureForSangerQualities() throws IOException {
+    conf.set("hbam.qseq-input.base-quality-encoding", "sanger");
+    qualityConfigTest();
+  }
+
+  @Test
+  public void testGenericInputConfigureForSangerQualities() throws IOException {
+    conf.set("hbam.input.base-quality-encoding", "sanger");
+    qualityConfigTest();
+  }
+
+  private void qualityConfigTest() throws IOException {
+    writeToTempQseq(sangerQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, sangerQseq.length(), null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+    assertTrue(reader.next(key, fragment));
+    assertEquals(
+        "###########################################################################################",
+        fragment.getQuality().toString());
+  }
+
+  @Test
+  public void testProgress() throws IOException {
+    writeToTempQseq(twoQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, twoQseq.length(), null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+    assertEquals(0.0, reader.getProgress(), 0.01);
+
+    reader.next(key, fragment);
+    assertEquals(0.5, reader.getProgress(), 0.01);
+
+    reader.next(key, fragment);
+    assertEquals(1.0, reader.getProgress(), 0.01);
+  }
+
+  @Test
+  public void testCreateKey() throws IOException {
+    QseqRecordReader reader = createReaderForOneQseq();
+    assertTrue(reader.createKey() instanceof Text);
+  }
+
+  @Test
+  public void testCreateValue() throws IOException {
+    QseqRecordReader reader = createReaderForOneQseq();
+    assertTrue(reader.createValue() instanceof SequencedFragment);
+  }
+
+  @Test
+  public void testClose() throws IOException {
+    QseqRecordReader reader = createReaderForOneQseq();
+    // doesn't really do anything but exercise the code
+    reader.close();
+  }
+
+  @Test
+  public void testMakePositionMessage() throws IOException {
+    writeToTempQseq(twoQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 10, twoQseq.length() - 10, null);
+
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+    assertNotNull(reader.makePositionMessage());
+  }
+
+  @Test
+  public void testGzCompressedInput() throws IOException {
+    // write gzip-compressed data
+    GzipCodec codec = new GzipCodec();
+    PrintWriter qseqOut =
+        new PrintWriter(
+            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
+    qseqOut.write(twoQseq);
+    qseqOut.close();
+
+    // now try to read it
+    split = new FileSplit(new Path(tempGz.toURI().toString()), 0, twoQseq.length(), null);
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+
+    boolean retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229:10880:1:1:1373:2042:1", key.toString());
+    assertEquals(
+        "TTGGATGATAGGGATTATTTGACTCGAATATTGGAAATAGCTGTTTATATTTTTTAAAAATGGTCTGTAACTGGTGACAGGACGCTTCGAT",
+        fragment.getSequence().toString());
+
+    retval = reader.next(key, fragment);
+    assertTrue(retval);
+    assertEquals("ERR020229:10883:1:1:1796:2044:2", key.toString());
+    assertEquals(
+        "TGAGCAGATGTGCTAAAGCTGCTTCTCCCCTAGGATCATTTGTACCTACCAGACTCAGGGAAAGGGGTGAGAATTGGGCCGTGGGGCAAGG",
+        fragment.getSequence().toString());
+  }
+
+  @Test(expected = RuntimeException.class)
+  public void testCompressedSplit() throws IOException {
+    // write gzip-compressed data
+    GzipCodec codec = new GzipCodec();
+    PrintWriter qseqOut =
+        new PrintWriter(
+            new BufferedOutputStream(codec.createOutputStream(new FileOutputStream(tempGz))));
+    qseqOut.write(twoQseq);
+    qseqOut.close();
+
+    // now try to read it starting from the middle
+    split = new FileSplit(new Path(tempGz.toURI().toString()), 10, twoQseq.length(), null);
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+  }
+
+  @Test
+  public void testSkipFailedQC() throws IOException {
+    conf.set("hbam.qseq-input.filter-failed-qc", "t");
+    verifySkipFailedQC();
+  }
+
+  @Test
+  public void testSkipFailedQCGenericConfig() throws IOException {
+    conf.set("hbam.input.filter-failed-qc", "t");
+    verifySkipFailedQC();
+  }
+
+  private void verifySkipFailedQC() throws IOException {
+    writeToTempQseq(twoQseq);
+    split = new FileSplit(new Path(tempQseq.toURI().toString()), 0, twoQseq.length(), null);
+    QseqRecordReader reader = new QseqRecordReader(conf, split);
+
+    boolean found = reader.next(key, fragment);
+    assertTrue(found);
+    assertEquals(2, (int) fragment.getRead());
+
+    found = reader.next(key, fragment);
+    assertFalse(found);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestQseqOutputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestQseqOutputFormat.java
index 67325ab..15154bf 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestQseqOutputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestQseqOutputFormat.java
@@ -22,151 +22,140 @@
 
 package org.seqdoop.hadoop_bam;
 
-import org.seqdoop.hadoop_bam.QseqOutputFormat.QseqRecordWriter;
-import org.seqdoop.hadoop_bam.SequencedFragment;
+import static org.junit.Assert.*;
 
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
-
-import java.io.DataOutputStream;
-import java.io.ByteArrayOutputStream;
-
-import org.junit.*;
-import static org.junit.Assert.*;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
+import org.junit.*;
+import org.seqdoop.hadoop_bam.QseqOutputFormat.QseqRecordWriter;
 
-public class TestQseqOutputFormat
-{
-	private SequencedFragment fragment;
-
-	private ByteArrayOutputStream outputBuffer;
-	private DataOutputStream dataOutput;
-	private QseqRecordWriter writer;
-
-	@Before
-	public void setup() throws IOException
-	{
-		fragment = new SequencedFragment();
-		fragment.setInstrument("instrument");
-		fragment.setRunNumber(1);
-		fragment.setFlowcellId("xyz");
-		fragment.setLane(2);
-		fragment.setTile(1001);
-		fragment.setXpos(10000);
-		fragment.setYpos(9999);
-		fragment.setRead(1);
-		fragment.setFilterPassed(true);
-		fragment.setIndexSequence("CATCAT");
-		fragment.setSequence(new Text("AAAAAAAAAA"));
-		fragment.setQuality(new Text("##########"));
-
-		outputBuffer = new ByteArrayOutputStream();
-		dataOutput = new DataOutputStream(outputBuffer);
-		writer = new QseqRecordWriter(new Configuration(), dataOutput);
-	}
-
-	@Test
-	public void testSimple() throws IOException
-	{
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
-		assertEquals(11, fields.length);
-
-		assertEquals(fragment.getInstrument(), fields[0]);
-		assertEquals(fragment.getRunNumber().toString(), fields[1]);
-		assertEquals(fragment.getLane().toString(), fields[2]);
-		assertEquals(fragment.getTile().toString(), fields[3]);
-		assertEquals(fragment.getXpos().toString(), fields[4]);
-		assertEquals(fragment.getYpos().toString(), fields[5]);
-		assertEquals(fragment.getIndexSequence().toString(), fields[6]);
-		assertEquals(fragment.getRead().toString(), fields[7]);
-		assertEquals(fragment.getSequence().toString(), fields[8]);
-		assertEquals(fragment.getQuality().toString().replace('#', 'B'), fields[9]);
-		assertEquals(fragment.getFilterPassed() ? "1\n" : "0\n", fields[10]);
-	}
-
-	@Test
-	public void testConvertUnknowns() throws IOException, UnsupportedEncodingException
-	{
-		String seq = "AAAAANNNNN";
-		fragment.setSequence(new Text(seq));
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
-		assertEquals(seq.replace("N", "."), fields[8]);
-	}
-
-	@Test
-	public void testConvertUnknownsInIndexSequence() throws IOException, UnsupportedEncodingException
-	{
-		String index = "CATNNN";
-		fragment.setIndexSequence(index);
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
-		assertEquals(index.replace("N", "."), fields[6]);
-	}
-
-	@Test
-	public void testBaseQualities() throws IOException
-	{
-		// ensure sanger qualities are converted to illumina
-		String seq = "AAAAAAAAAA";
-		String qual = "##########";
-
-		fragment.setSequence(new Text(seq));
-		fragment.setQuality(new Text(qual));
-
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
-		assertEquals(qual.replace("#", "B"), fields[9]);
-	}
-
-	@Test
-	public void testConfigureOutputInSanger() throws IOException
-	{
-		String seq = "AAAAAAAAAA";
-		String qual = "##########";
-
-		fragment.setSequence(new Text(seq));
-		fragment.setQuality(new Text(qual));
-
-		Configuration conf = new Configuration();
-		conf.set("hbam.qseq-output.base-quality-encoding", "sanger");
-		writer.setConf(conf);
-
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
-		assertEquals(qual, fields[9]);
-	}
-
-	@Test
-	public void testClose() throws IOException
-	{
-		// doesn't really do anything but exercise the code
-		writer.close(null);
-	}
-
-	@Test
-	public void testNoIndex() throws IOException
-	{
-		fragment.setIndexSequence(null);
-		writer.write(null, fragment);
-		writer.close(null);
-
-		String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
-		assertEquals(11, fields.length);
-
-		assertEquals("0", fields[6]);
-	}
+public class TestQseqOutputFormat {
+
+  private SequencedFragment fragment;
+
+  private ByteArrayOutputStream outputBuffer;
+  private DataOutputStream dataOutput;
+  private QseqRecordWriter writer;
+
+  @Before
+  public void setup() throws IOException {
+    fragment = new SequencedFragment();
+    fragment.setInstrument("instrument");
+    fragment.setRunNumber(1);
+    fragment.setFlowcellId("xyz");
+    fragment.setLane(2);
+    fragment.setTile(1001);
+    fragment.setXpos(10000);
+    fragment.setYpos(9999);
+    fragment.setRead(1);
+    fragment.setFilterPassed(true);
+    fragment.setIndexSequence("CATCAT");
+    fragment.setSequence(new Text("AAAAAAAAAA"));
+    fragment.setQuality(new Text("##########"));
+
+    outputBuffer = new ByteArrayOutputStream();
+    dataOutput = new DataOutputStream(outputBuffer);
+    writer = new QseqRecordWriter(new Configuration(), dataOutput);
+  }
+
+  @Test
+  public void testSimple() throws IOException {
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
+    assertEquals(11, fields.length);
+
+    assertEquals(fragment.getInstrument(), fields[0]);
+    assertEquals(fragment.getRunNumber().toString(), fields[1]);
+    assertEquals(fragment.getLane().toString(), fields[2]);
+    assertEquals(fragment.getTile().toString(), fields[3]);
+    assertEquals(fragment.getXpos().toString(), fields[4]);
+    assertEquals(fragment.getYpos().toString(), fields[5]);
+    assertEquals(fragment.getIndexSequence().toString(), fields[6]);
+    assertEquals(fragment.getRead().toString(), fields[7]);
+    assertEquals(fragment.getSequence().toString(), fields[8]);
+    assertEquals(fragment.getQuality().toString().replace('#', 'B'), fields[9]);
+    assertEquals(fragment.getFilterPassed() ? "1\n" : "0\n", fields[10]);
+  }
+
+  @Test
+  public void testConvertUnknowns() throws IOException, UnsupportedEncodingException {
+    String seq = "AAAAANNNNN";
+    fragment.setSequence(new Text(seq));
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
+    assertEquals(seq.replace("N", "."), fields[8]);
+  }
+
+  @Test
+  public void testConvertUnknownsInIndexSequence()
+      throws IOException, UnsupportedEncodingException {
+    String index = "CATNNN";
+    fragment.setIndexSequence(index);
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
+    assertEquals(index.replace("N", "."), fields[6]);
+  }
+
+  @Test
+  public void testBaseQualities() throws IOException {
+    // ensure sanger qualities are converted to illumina
+    String seq = "AAAAAAAAAA";
+    String qual = "##########";
+
+    fragment.setSequence(new Text(seq));
+    fragment.setQuality(new Text(qual));
+
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
+    assertEquals(qual.replace("#", "B"), fields[9]);
+  }
+
+  @Test
+  public void testConfigureOutputInSanger() throws IOException {
+    String seq = "AAAAAAAAAA";
+    String qual = "##########";
+
+    fragment.setSequence(new Text(seq));
+    fragment.setQuality(new Text(qual));
+
+    Configuration conf = new Configuration();
+    conf.set("hbam.qseq-output.base-quality-encoding", "sanger");
+    writer.setConf(conf);
+
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
+    assertEquals(qual, fields[9]);
+  }
+
+  @Test
+  public void testClose() throws IOException {
+    // doesn't really do anything but exercise the code
+    writer.close(null);
+  }
+
+  @Test
+  public void testNoIndex() throws IOException {
+    fragment.setIndexSequence(null);
+    writer.write(null, fragment);
+    writer.close(null);
+
+    String[] fields = new String(outputBuffer.toByteArray(), "US-ASCII").split("\t");
+    assertEquals(11, fields.length);
+
+    assertEquals("0", fields[6]);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestSAMFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestSAMFormat.java
index 679b56f..405e716 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestSAMFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestSAMFormat.java
@@ -1,12 +1,12 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
 import java.io.IOException;
 import java.io.InputStream;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNull;
-
 public class TestSAMFormat {
 
   @Test
@@ -22,7 +22,7 @@ public void testInferFromData() throws IOException {
     assertEquals(SAMFormat.SAM, SAMFormat.inferFromData(stream("test.sam")));
     assertEquals(SAMFormat.BAM, SAMFormat.inferFromData(stream("test.bam")));
     assertEquals(SAMFormat.CRAM, SAMFormat.inferFromData(stream("test.cram")));
-    assertNull( SAMFormat.inferFromData(stream("test.vcf")));
+    assertNull(SAMFormat.inferFromData(stream("test.vcf")));
   }
 
   private InputStream stream(String resource) throws IOException {
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestSAMHeaderReader.java b/src/test/java/org/seqdoop/hadoop_bam/TestSAMHeaderReader.java
index 15623c3..f84012c 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestSAMHeaderReader.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestSAMHeaderReader.java
@@ -1,64 +1,65 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+
 import htsjdk.samtools.*;
-import htsjdk.samtools.cram.CRAMException;
+import java.io.InputStream;
+import java.net.URI;
 import org.apache.hadoop.conf.Configuration;
 import org.junit.Rule;
 import org.junit.Test;
 import org.junit.rules.ExpectedException;
 import org.seqdoop.hadoop_bam.util.SAMHeaderReader;
 
-import java.io.InputStream;
-import java.net.URI;
-
-import static org.junit.Assert.assertEquals;
-
 public class TestSAMHeaderReader {
-    @Rule
-    public ExpectedException thrown= ExpectedException.none();
 
-    @Test
-    public void testBAMHeaderReaderNoReference() throws Exception {
+  @Rule public ExpectedException thrown = ExpectedException.none();
 
-        final Configuration conf = new Configuration();
+  @Test
+  public void testBAMHeaderReaderNoReference() throws Exception {
 
-        InputStream inputStream = ClassLoader.getSystemClassLoader().getResourceAsStream("test.bam");
-        final SamReader samReader = SamReaderFactory.makeDefault().open(SamInputResource.of(inputStream));
-        int sequenceCount = samReader.getFileHeader().getSequenceDictionary().size();
-        samReader.close();
+    final Configuration conf = new Configuration();
 
-        inputStream = ClassLoader.getSystemClassLoader().getResourceAsStream("test.bam");
-        SAMFileHeader samHeader = SAMHeaderReader.readSAMHeaderFrom(inputStream, conf);
-        inputStream.close();
+    InputStream inputStream = ClassLoader.getSystemClassLoader().getResourceAsStream("test.bam");
+    final SamReader samReader =
+        SamReaderFactory.makeDefault().open(SamInputResource.of(inputStream));
+    int sequenceCount = samReader.getFileHeader().getSequenceDictionary().size();
+    samReader.close();
 
-        assertEquals(samHeader.getSequenceDictionary().size(), sequenceCount);
-    }
+    inputStream = ClassLoader.getSystemClassLoader().getResourceAsStream("test.bam");
+    SAMFileHeader samHeader = SAMHeaderReader.readSAMHeaderFrom(inputStream, conf);
+    inputStream.close();
 
-    @Test
-    public void testCRAMHeaderReaderWithReference() throws Exception {
-        final Configuration conf = new Configuration();
+    assertEquals(samHeader.getSequenceDictionary().size(), sequenceCount);
+  }
 
-        final InputStream inputStream = ClassLoader.getSystemClassLoader().getResourceAsStream("test.cram");
-        final URI reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI();
-        conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, reference.toString());
+  @Test
+  public void testCRAMHeaderReaderWithReference() throws Exception {
+    final Configuration conf = new Configuration();
 
-        SAMFileHeader samHeader = SAMHeaderReader.readSAMHeaderFrom(inputStream, conf);
-        inputStream.close();
+    final InputStream inputStream =
+        ClassLoader.getSystemClassLoader().getResourceAsStream("test.cram");
+    final URI reference = ClassLoader.getSystemClassLoader().getResource("auxf.fa").toURI();
+    conf.set(CRAMInputFormat.REFERENCE_SOURCE_PATH_PROPERTY, reference.toString());
 
-        assertEquals(samHeader.getSequenceDictionary().size(), 1);
-    }
+    SAMFileHeader samHeader = SAMHeaderReader.readSAMHeaderFrom(inputStream, conf);
+    inputStream.close();
 
-    @Test
-    public void testCRAMHeaderReaderNoReference() throws Exception {
+    assertEquals(samHeader.getSequenceDictionary().size(), 1);
+  }
 
-        thrown.expect(IllegalStateException.class); // htsjdk throws on CRAM file with no reference provided
+  @Test
+  public void testCRAMHeaderReaderNoReference() throws Exception {
 
-        final Configuration conf = new Configuration();
-        final InputStream inputStream = ClassLoader.getSystemClassLoader().getResourceAsStream("test.cram");
-        SAMFileHeader samHeader = SAMHeaderReader.readSAMHeaderFrom(inputStream, conf);
-        inputStream.close();
+    thrown.expect(
+        IllegalStateException.class); // htsjdk throws on CRAM file with no reference provided
 
-        assertEquals(samHeader.getSequenceDictionary().size(), 1);
-    }
+    final Configuration conf = new Configuration();
+    final InputStream inputStream =
+        ClassLoader.getSystemClassLoader().getResourceAsStream("test.cram");
+    SAMFileHeader samHeader = SAMHeaderReader.readSAMHeaderFrom(inputStream, conf);
+    inputStream.close();
 
+    assertEquals(samHeader.getSequenceDictionary().size(), 1);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestSAMInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestSAMInputFormat.java
index f1ac677..d3bf93e 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestSAMInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestSAMInputFormat.java
@@ -1,5 +1,9 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
 import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
@@ -25,11 +29,8 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 public class TestSAMInputFormat {
+
   private String input;
   private TaskAttemptContext taskAttemptContext;
   private JobContext jobContext;
@@ -56,8 +57,8 @@ public void testReader() throws Exception {
     AnySAMInputFormat inputFormat = new AnySAMInputFormat();
     List<InputSplit> splits = inputFormat.getSplits(jobContext);
     assertEquals(1, splits.size());
-    RecordReader<LongWritable, SAMRecordWritable> reader = inputFormat
-        .createRecordReader(splits.get(0), taskAttemptContext);
+    RecordReader<LongWritable, SAMRecordWritable> reader =
+        inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
     reader.initialize(splits.get(0), taskAttemptContext);
 
     int actualCount = 0;
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestSequencedFragment.java b/src/test/java/org/seqdoop/hadoop_bam/TestSequencedFragment.java
index 8d049b0..bf85222 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestSequencedFragment.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestSequencedFragment.java
@@ -22,341 +22,335 @@
 
 package org.seqdoop.hadoop_bam;
 
-import org.junit.*;
 import static org.junit.Assert.*;
 
-import org.seqdoop.hadoop_bam.SequencedFragment;
-import org.seqdoop.hadoop_bam.FormatConstants;
-import org.seqdoop.hadoop_bam.FormatException;
-
-import java.io.IOException;
-import java.io.DataInput;
 import java.io.ByteArrayInputStream;
-import java.io.DataInputStream;
-
-import java.io.DataOutput;
 import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
 import java.io.DataOutputStream;
-
+import java.io.IOException;
 import org.apache.hadoop.io.Text;
+import org.junit.*;
 
-public class TestSequencedFragment
-{
-	private SequencedFragment frag;
-	private SequencedFragment frag2;
-
-	@Before
-	public void setup()
-	{
-		frag = new SequencedFragment();
-		frag2 = new SequencedFragment();
-	}
-
-	@Test
-	public void testInitialState()
-	{
-		assertNotNull(frag.getSequence());
-		assertNotNull(frag.getQuality());
-
-		assertNull(frag.getInstrument());
-		assertNull(frag.getRunNumber());
-		assertNull(frag.getFlowcellId());
-		assertNull(frag.getLane());
-		assertNull(frag.getTile());
-		assertNull(frag.getXpos());
-		assertNull(frag.getYpos());
-		assertNull(frag.getRead());
-		assertNull(frag.getFilterPassed());
-		assertNull(frag.getControlNumber());
-		assertNull(frag.getIndexSequence());
-
-		assertNotNull(frag.toString());
-	}
-
-	@Test(expected=IllegalArgumentException.class)
-	public void testNoNullSequence()
-	{
-		frag.setSequence(null);
-	}
-
-	@Test(expected=IllegalArgumentException.class)
-	public void testNoNullQuality()
-	{
-		frag.setQuality(null);
-	}
-
-	///////////////////////////////////////////////////////////////
-	// equals
-	///////////////////////////////////////////////////////////////
-	@Test
-	public void testEquals()
-	{
-		assertTrue(frag.equals(frag2));
-
-		frag.getSequence().append("AAAA".getBytes(), 0, 4);
-		assertFalse( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsSequence()
-	{
-		frag.getSequence().append("AAAA".getBytes(), 0, 4);
-		assertFalse( frag.equals(frag2) );
-		frag2.getSequence().append("AAAA".getBytes(), 0, 4);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsQuality()
-	{
-		frag.getQuality().append("AAAA".getBytes(), 0, 4);
-		assertFalse( frag.equals(frag2) );
-		frag2.getQuality().append("AAAA".getBytes(), 0, 4);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsInstrument()
-	{
-		frag.setInstrument("instrument");
-		assertFalse( frag.equals(frag2) );
-		frag2.setInstrument("instrument");
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsRunNumber()
-	{
-		frag.setRunNumber(240);
-		assertFalse( frag.equals(frag2) );
-		frag2.setRunNumber(240);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsFlowcellId()
-	{
-		frag.setFlowcellId("id");
-		assertFalse( frag.equals(frag2) );
-		frag2.setFlowcellId("id");
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsLane()
-	{
-		frag.setLane(2);
-		assertFalse( frag.equals(frag2) );
-		frag2.setLane(2);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsTile()
-	{
-		frag.setTile(1000);
-		assertFalse( frag.equals(frag2) );
-		frag2.setTile(1000);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsXpos()
-	{
-		frag.setXpos(1234);
-		assertFalse( frag.equals(frag2) );
-		frag2.setXpos(1234);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsYpos()
-	{
-		frag.setYpos(1234);
-		assertFalse( frag.equals(frag2) );
-		frag2.setYpos(1234);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsRead()
-	{
-		frag.setRead(2);
-		assertFalse( frag.equals(frag2) );
-		frag2.setRead(2);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsFilterPassed()
-	{
-		frag.setFilterPassed(false);
-		assertFalse( frag.equals(frag2) );
-		frag2.setFilterPassed(false);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsControlNumber()
-	{
-		frag.setControlNumber(314);
-		assertFalse( frag.equals(frag2) );
-		frag2.setControlNumber(314);
-		assertTrue( frag.equals(frag2) );
-	}
-
-	@Test
-	public void testEqualsIndexSequence()
-	{
-		frag.setIndexSequence("ABC");
-		assertFalse( frag.equals(frag2) );
-		frag2.setIndexSequence("ABC");
-		assertTrue( frag.equals(frag2) );
-	}
-
-	///////////////////////////////////////////////////////////////
-	// serialization
-	///////////////////////////////////////////////////////////////
-	private static SequencedFragment cloneBySerialization(SequencedFragment original) throws IOException
-	{
-		ByteArrayOutputStream outputBuffer = new ByteArrayOutputStream();
-		DataOutputStream dataOutput = new DataOutputStream(outputBuffer);
-		original.write(dataOutput);
-		dataOutput.close();
-
-		SequencedFragment newFrag = new SequencedFragment();
-		newFrag.readFields( new DataInputStream( new ByteArrayInputStream(outputBuffer.toByteArray())));
-
-		return newFrag;
-	}
-
-	@Test
-	public void testSerializationEmpty() throws IOException
-	{
-		assertEquals(frag, cloneBySerialization(frag));
-	}
-
-	@Test
-	public void testSerializationWithSeq() throws IOException
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("##############################"));
-		assertEquals(frag, cloneBySerialization(frag));
-	}
-
-	@Test
-	public void testSerializationWithFields() throws IOException
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"));
-
-		frag.setInstrument("machine");
-		frag.setLane(3);
-		frag.setRead(1);
-		frag.setIndexSequence("CAT");
-
-		assertEquals(frag, cloneBySerialization(frag));
-	}
-
-	@Test
-	public void testToString()
-	{
-		String seq = "AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT";
-		String qual = "##############################";
-		frag.setSequence(new Text(seq));
-		frag.setQuality(new Text(qual));
-
-		frag.setInstrument("machine");
-		frag.setRunNumber(123);
-		frag.setFlowcellId("flowcell");
-		frag.setLane(3);
-		frag.setTile(1001);
-		frag.setXpos(1234);
-		frag.setYpos(4321);
-		frag.setIndexSequence("CAT");
-		frag.setRead(1);
-
-		assertEquals("machine\t123\tflowcell\t3\t1001\t1234\t4321\tCAT\t1\t" + seq + "\t" + qual + "\t1", frag.toString());
-	}
-
-	@Test
-	public void testVerifyQualitySangerOk()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("##############################"));
-		assertEquals(-1, SequencedFragment.verifyQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger));
-	}
-
-	@Test
-	public void testVerifyQualityIlluminaOk()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"));
-		assertEquals(-1, SequencedFragment.verifyQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Illumina));
-	}
-
-	@Test
-	public void testVerifyQualitySangerOutOfRange()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("#############################" + Character.toString((char)127))); // over range
-		assertEquals(29, SequencedFragment.verifyQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger));
-
-		frag.setQuality(new Text("##### ########################")); // under range
-		assertEquals(5, SequencedFragment.verifyQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger));
-	}
-
-	@Test
-	public void testVerifyQualityIlluminaOutOfRange()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("zzz=zzzzzzzzzzzzzzzzzzzzzzzzzz"));
-		assertEquals(3, SequencedFragment.verifyQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Illumina));
-	}
-
-	@Test
-	public void testConvertQualityIlluminaToSanger()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"));
-		SequencedFragment.convertQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Illumina, FormatConstants.BaseQualityEncoding.Sanger);
-		assertEquals("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", frag.getQuality().toString());
-	}
-
-	@Test
-	public void testConvertQualitySangerToIllumina()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["));
-		SequencedFragment.convertQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger, FormatConstants.BaseQualityEncoding.Illumina);
-		assertEquals("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", frag.getQuality().toString());
-	}
-
-	@Test(expected=IllegalArgumentException.class)
-	public void testConvertQualityNoop()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["));
-		SequencedFragment.convertQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger, FormatConstants.BaseQualityEncoding.Sanger);
-	}
-
-	@Test(expected=FormatException.class)
-	public void testConvertQualityIlluminaOutOfRange()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("zzz=zzzzzzzzzzzzzzzzzzzzzzzzzz"));
-		SequencedFragment.convertQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Illumina, FormatConstants.BaseQualityEncoding.Sanger);
-	}
-
-	@Test(expected=FormatException.class)
-	public void testConvertQualitySangerUnderRange()
-	{
-		frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
-		frag.setQuality(new Text("### ##########################"));
-		SequencedFragment.convertQuality(frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger, FormatConstants.BaseQualityEncoding.Illumina);
-	}
-
-	public static void main(String args[]) {
-		org.junit.runner.JUnitCore.main(TestSequencedFragment.class.getName());
-	}
+public class TestSequencedFragment {
+
+  private SequencedFragment frag;
+  private SequencedFragment frag2;
+
+  ///////////////////////////////////////////////////////////////
+  // serialization
+  ///////////////////////////////////////////////////////////////
+  private static SequencedFragment cloneBySerialization(SequencedFragment original)
+      throws IOException {
+    ByteArrayOutputStream outputBuffer = new ByteArrayOutputStream();
+    DataOutputStream dataOutput = new DataOutputStream(outputBuffer);
+    original.write(dataOutput);
+    dataOutput.close();
+
+    SequencedFragment newFrag = new SequencedFragment();
+    newFrag.readFields(new DataInputStream(new ByteArrayInputStream(outputBuffer.toByteArray())));
+
+    return newFrag;
+  }
+
+  public static void main(String args[]) {
+    org.junit.runner.JUnitCore.main(TestSequencedFragment.class.getName());
+  }
+
+  @Before
+  public void setup() {
+    frag = new SequencedFragment();
+    frag2 = new SequencedFragment();
+  }
+
+  @Test
+  public void testInitialState() {
+    assertNotNull(frag.getSequence());
+    assertNotNull(frag.getQuality());
+
+    assertNull(frag.getInstrument());
+    assertNull(frag.getRunNumber());
+    assertNull(frag.getFlowcellId());
+    assertNull(frag.getLane());
+    assertNull(frag.getTile());
+    assertNull(frag.getXpos());
+    assertNull(frag.getYpos());
+    assertNull(frag.getRead());
+    assertNull(frag.getFilterPassed());
+    assertNull(frag.getControlNumber());
+    assertNull(frag.getIndexSequence());
+
+    assertNotNull(frag.toString());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testNoNullSequence() {
+    frag.setSequence(null);
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testNoNullQuality() {
+    frag.setQuality(null);
+  }
+
+  ///////////////////////////////////////////////////////////////
+  // equals
+  ///////////////////////////////////////////////////////////////
+  @Test
+  public void testEquals() {
+    assertTrue(frag.equals(frag2));
+
+    frag.getSequence().append("AAAA".getBytes(), 0, 4);
+    assertFalse(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsSequence() {
+    frag.getSequence().append("AAAA".getBytes(), 0, 4);
+    assertFalse(frag.equals(frag2));
+    frag2.getSequence().append("AAAA".getBytes(), 0, 4);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsQuality() {
+    frag.getQuality().append("AAAA".getBytes(), 0, 4);
+    assertFalse(frag.equals(frag2));
+    frag2.getQuality().append("AAAA".getBytes(), 0, 4);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsInstrument() {
+    frag.setInstrument("instrument");
+    assertFalse(frag.equals(frag2));
+    frag2.setInstrument("instrument");
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsRunNumber() {
+    frag.setRunNumber(240);
+    assertFalse(frag.equals(frag2));
+    frag2.setRunNumber(240);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsFlowcellId() {
+    frag.setFlowcellId("id");
+    assertFalse(frag.equals(frag2));
+    frag2.setFlowcellId("id");
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsLane() {
+    frag.setLane(2);
+    assertFalse(frag.equals(frag2));
+    frag2.setLane(2);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsTile() {
+    frag.setTile(1000);
+    assertFalse(frag.equals(frag2));
+    frag2.setTile(1000);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsXpos() {
+    frag.setXpos(1234);
+    assertFalse(frag.equals(frag2));
+    frag2.setXpos(1234);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsYpos() {
+    frag.setYpos(1234);
+    assertFalse(frag.equals(frag2));
+    frag2.setYpos(1234);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsRead() {
+    frag.setRead(2);
+    assertFalse(frag.equals(frag2));
+    frag2.setRead(2);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsFilterPassed() {
+    frag.setFilterPassed(false);
+    assertFalse(frag.equals(frag2));
+    frag2.setFilterPassed(false);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsControlNumber() {
+    frag.setControlNumber(314);
+    assertFalse(frag.equals(frag2));
+    frag2.setControlNumber(314);
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testEqualsIndexSequence() {
+    frag.setIndexSequence("ABC");
+    assertFalse(frag.equals(frag2));
+    frag2.setIndexSequence("ABC");
+    assertTrue(frag.equals(frag2));
+  }
+
+  @Test
+  public void testSerializationEmpty() throws IOException {
+    assertEquals(frag, cloneBySerialization(frag));
+  }
+
+  @Test
+  public void testSerializationWithSeq() throws IOException {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("##############################"));
+    assertEquals(frag, cloneBySerialization(frag));
+  }
+
+  @Test
+  public void testSerializationWithFields() throws IOException {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("BBBBBBBBBBBBBBBBBBBBBBBBBBBBBB"));
+
+    frag.setInstrument("machine");
+    frag.setLane(3);
+    frag.setRead(1);
+    frag.setIndexSequence("CAT");
+
+    assertEquals(frag, cloneBySerialization(frag));
+  }
+
+  @Test
+  public void testToString() {
+    String seq = "AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT";
+    String qual = "##############################";
+    frag.setSequence(new Text(seq));
+    frag.setQuality(new Text(qual));
+
+    frag.setInstrument("machine");
+    frag.setRunNumber(123);
+    frag.setFlowcellId("flowcell");
+    frag.setLane(3);
+    frag.setTile(1001);
+    frag.setXpos(1234);
+    frag.setYpos(4321);
+    frag.setIndexSequence("CAT");
+    frag.setRead(1);
+
+    assertEquals(
+        "machine\t123\tflowcell\t3\t1001\t1234\t4321\tCAT\t1\t" + seq + "\t" + qual + "\t1",
+        frag.toString());
+  }
+
+  @Test
+  public void testVerifyQualitySangerOk() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("##############################"));
+    assertEquals(
+        -1,
+        SequencedFragment.verifyQuality(
+            frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger));
+  }
+
+  @Test
+  public void testVerifyQualityIlluminaOk() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"));
+    assertEquals(
+        -1,
+        SequencedFragment.verifyQuality(
+            frag.getQuality(), FormatConstants.BaseQualityEncoding.Illumina));
+  }
+
+  @Test
+  public void testVerifyQualitySangerOutOfRange() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(
+        new Text("#############################" + Character.toString((char) 127))); // over range
+    assertEquals(
+        29,
+        SequencedFragment.verifyQuality(
+            frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger));
+
+    frag.setQuality(new Text("##### ########################")); // under range
+    assertEquals(
+        5,
+        SequencedFragment.verifyQuality(
+            frag.getQuality(), FormatConstants.BaseQualityEncoding.Sanger));
+  }
+
+  @Test
+  public void testVerifyQualityIlluminaOutOfRange() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("zzz=zzzzzzzzzzzzzzzzzzzzzzzzzz"));
+    assertEquals(
+        3,
+        SequencedFragment.verifyQuality(
+            frag.getQuality(), FormatConstants.BaseQualityEncoding.Illumina));
+  }
+
+  @Test
+  public void testConvertQualityIlluminaToSanger() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz"));
+    SequencedFragment.convertQuality(
+        frag.getQuality(),
+        FormatConstants.BaseQualityEncoding.Illumina,
+        FormatConstants.BaseQualityEncoding.Sanger);
+    assertEquals("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[[", frag.getQuality().toString());
+  }
+
+  @Test
+  public void testConvertQualitySangerToIllumina() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["));
+    SequencedFragment.convertQuality(
+        frag.getQuality(),
+        FormatConstants.BaseQualityEncoding.Sanger,
+        FormatConstants.BaseQualityEncoding.Illumina);
+    assertEquals("zzzzzzzzzzzzzzzzzzzzzzzzzzzzzz", frag.getQuality().toString());
+  }
+
+  @Test(expected = IllegalArgumentException.class)
+  public void testConvertQualityNoop() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("[[[[[[[[[[[[[[[[[[[[[[[[[[[[[["));
+    SequencedFragment.convertQuality(
+        frag.getQuality(),
+        FormatConstants.BaseQualityEncoding.Sanger,
+        FormatConstants.BaseQualityEncoding.Sanger);
+  }
+
+  @Test(expected = FormatException.class)
+  public void testConvertQualityIlluminaOutOfRange() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("zzz=zzzzzzzzzzzzzzzzzzzzzzzzzz"));
+    SequencedFragment.convertQuality(
+        frag.getQuality(),
+        FormatConstants.BaseQualityEncoding.Illumina,
+        FormatConstants.BaseQualityEncoding.Sanger);
+  }
+
+  @Test(expected = FormatException.class)
+  public void testConvertQualitySangerUnderRange() {
+    frag.setSequence(new Text("AGTAGTAGTAGTAGTAGTAGTAGTAGTAGT"));
+    frag.setQuality(new Text("### ##########################"));
+    SequencedFragment.convertQuality(
+        frag.getQuality(),
+        FormatConstants.BaseQualityEncoding.Sanger,
+        FormatConstants.BaseQualityEncoding.Illumina);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestSplittingBAMIndexer.java b/src/test/java/org/seqdoop/hadoop_bam/TestSplittingBAMIndexer.java
index d5c801e..c43b459 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestSplittingBAMIndexer.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestSplittingBAMIndexer.java
@@ -1,5 +1,8 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import htsjdk.samtools.SAMRecord;
 import htsjdk.samtools.SamReader;
 import htsjdk.samtools.SamReaderFactory;
@@ -10,10 +13,8 @@
 import org.junit.Before;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 public class TestSplittingBAMIndexer {
+
   private String input;
 
   @Before
@@ -24,7 +25,7 @@ public void setup() throws Exception {
   @Test
   public void testIndexersProduceSameIndexes() throws Exception {
     long bamFileSize = new File(input).length();
-    for (int g : new int[] { 2, 10, SplittingBAMIndexer.DEFAULT_GRANULARITY}) {
+    for (int g : new int[] {2, 10, SplittingBAMIndexer.DEFAULT_GRANULARITY}) {
       SplittingBAMIndex index1 = fromBAMFile(g);
       SplittingBAMIndex index2 = fromSAMRecords(g);
       assertEquals(index1, index2);
@@ -33,8 +34,7 @@ public void testIndexersProduceSameIndexes() throws Exception {
     }
   }
 
-  private SplittingBAMIndex fromBAMFile(int granularity) throws
-      IOException {
+  private SplittingBAMIndex fromBAMFile(int granularity) throws IOException {
     Configuration conf = new Configuration();
     conf.set("input", new File(input).toURI().toString());
     conf.setInt("granularity", granularity);
@@ -51,8 +51,10 @@ private SplittingBAMIndex fromSAMRecords(int granularity) throws IOException {
     File indexFile = new File(input + SplittingBAMIndexer.OUTPUT_FILE_EXTENSION);
     FileOutputStream out = new FileOutputStream(indexFile);
     SplittingBAMIndexer indexer = new SplittingBAMIndexer(out, granularity);
-    SamReader samReader = SamReaderFactory.makeDefault()
-        .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS).open(new File(input));
+    SamReader samReader =
+        SamReaderFactory.makeDefault()
+            .enable(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS)
+            .open(new File(input));
     for (SAMRecord r : samReader) {
       indexer.processAlignment(r);
     }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestVCFFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestVCFFormat.java
index 0f7441a..3d24931 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestVCFFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestVCFFormat.java
@@ -1,14 +1,12 @@
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
 import java.io.IOException;
 import java.io.InputStream;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNull;
-import static org.junit.Assert.assertTrue;
-
 public class TestVCFFormat {
 
   @Test
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormat.java
index 12ca484..85cfae1 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormat.java
@@ -20,17 +20,27 @@
 
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.mockito.Mockito.mock;
+
 import com.google.common.collect.ImmutableList;
 import com.google.common.collect.Iterators;
 import htsjdk.samtools.util.Interval;
+import htsjdk.variant.variantcontext.VariantContext;
 import htsjdk.variant.vcf.VCFFileReader;
 import java.io.File;
+import java.io.IOException;
+import java.lang.reflect.InvocationTargetException;
 import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
 import java.util.Iterator;
+import java.util.List;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.mapreduce.*;
-import htsjdk.variant.variantcontext.VariantContext;
 import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
 import org.apache.hadoop.mapreduce.task.JobContextImpl;
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
@@ -38,155 +48,159 @@
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
-
-import java.io.IOException;
-import java.lang.reflect.InvocationTargetException;
-import java.util.Arrays;
-import java.util.Collection;
-import java.util.List;
 import org.seqdoop.hadoop_bam.util.BGZFCodec;
 import org.seqdoop.hadoop_bam.util.BGZFEnhancedGzipCodec;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.junit.Assert.assertTrue;
-import static org.mockito.Mockito.mock;
-
 @RunWith(Parameterized.class)
 public class TestVCFInputFormat {
-    enum NUM_SPLITS {
-        ANY, EXACTLY_ONE, MORE_THAN_ONE
-    }
-    private String filename;
-    private NUM_SPLITS expectedSplits;
-    private Interval interval;
-    private VariantContextWritable writable;
-    private List<RecordReader<LongWritable, VariantContextWritable>> readers;
-    private TaskAttemptContext taskAttemptContext;
-
-    public TestVCFInputFormat(String filename, NUM_SPLITS expectedSplits, Interval interval) {
-        this.filename = filename;
-        this.expectedSplits = expectedSplits;
-        this.interval = interval;
-    }
 
-    @Parameterized.Parameters
-    public static Collection<Object> data() {
-        return Arrays.asList(new Object[][] {
-            {"test.vcf", NUM_SPLITS.ANY, null},
-            {"test.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null},
-            {"test.vcf.bgzf.gz", NUM_SPLITS.ANY, null},
-            // BCF tests currently fail due to https://github.com/samtools/htsjdk/issues/507
-//            {"test.uncompressed.bcf", NUM_SPLITS.ANY, null},
-//            {"test.bgzf.bcf", NUM_SPLITS.ANY, null},
-            {"HiSeq.10000.vcf", NUM_SPLITS.MORE_THAN_ONE, null},
-            {"HiSeq.10000.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null},
-            {"HiSeq.10000.vcf.bgzf.gz", NUM_SPLITS.MORE_THAN_ONE, null},
-            {"HiSeq.10000.vcf.bgzf.gz", NUM_SPLITS.EXACTLY_ONE,
-                new Interval("chr1", 2700000, 2800000)}, // chosen to fall in one split
-            {"HiSeq.10000.vcf.bgz", NUM_SPLITS.MORE_THAN_ONE, null},
-            {"HiSeq.10000.vcf.bgz", NUM_SPLITS.EXACTLY_ONE,
-                new Interval("chr1", 2700000, 2800000)} // chosen to fall in one split
+  private String filename;
+  private NUM_SPLITS expectedSplits;
+  private Interval interval;
+  private VariantContextWritable writable;
+  private List<RecordReader<LongWritable, VariantContextWritable>> readers;
+  private TaskAttemptContext taskAttemptContext;
+
+  public TestVCFInputFormat(String filename, NUM_SPLITS expectedSplits, Interval interval) {
+    this.filename = filename;
+    this.expectedSplits = expectedSplits;
+    this.interval = interval;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object> data() {
+    return Arrays.asList(
+        new Object[][] {
+          {"test.vcf", NUM_SPLITS.ANY, null},
+          {"test.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null},
+          {"test.vcf.bgzf.gz", NUM_SPLITS.ANY, null},
+          // BCF tests currently fail due to https://github.com/samtools/htsjdk/issues/507
+          //            {"test.uncompressed.bcf", NUM_SPLITS.ANY, null},
+          //            {"test.bgzf.bcf", NUM_SPLITS.ANY, null},
+          {"HiSeq.10000.vcf", NUM_SPLITS.MORE_THAN_ONE, null},
+          {"HiSeq.10000.vcf.gz", NUM_SPLITS.EXACTLY_ONE, null},
+          {"HiSeq.10000.vcf.bgzf.gz", NUM_SPLITS.MORE_THAN_ONE, null},
+          {
+            "HiSeq.10000.vcf.bgzf.gz",
+            NUM_SPLITS.EXACTLY_ONE,
+            new Interval("chr1", 2700000, 2800000)
+          }, // chosen to fall in one split
+          {"HiSeq.10000.vcf.bgz", NUM_SPLITS.MORE_THAN_ONE, null},
+          {
+            "HiSeq.10000.vcf.bgz", NUM_SPLITS.EXACTLY_ONE, new Interval("chr1", 2700000, 2800000)
+          } // chosen to fall in one split
         });
+  }
+
+  @Before
+  public void setup()
+      throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException,
+          InstantiationException, InterruptedException, NoSuchFieldException {
+    Configuration conf = new Configuration();
+    String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
+    conf.set("hadoopbam.vcf.trust-exts", "true");
+    conf.set("mapred.input.dir", "file://" + input_file);
+    conf.setStrings(
+        "io.compression.codecs",
+        BGZFEnhancedGzipCodec.class.getCanonicalName(),
+        BGZFCodec.class.getCanonicalName());
+    conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 100 * 1024); // 100K
+
+    if (interval != null) {
+      VCFInputFormat.setIntervals(conf, ImmutableList.of(interval));
     }
 
-    @Before
-    public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException, InterruptedException, NoSuchFieldException {
-        Configuration conf = new Configuration();
-        String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
-        conf.set("hadoopbam.vcf.trust-exts", "true");
-        conf.set("mapred.input.dir", "file://" + input_file);
-        conf.setStrings("io.compression.codecs", BGZFEnhancedGzipCodec.class.getCanonicalName(),
-            BGZFCodec.class.getCanonicalName());
-        conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 100 * 1024); // 100K
-
-        if (interval != null) {
-            VCFInputFormat.setIntervals(conf, ImmutableList.of(interval));
-        }
-
-        taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
-        JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());
-
-        VCFInputFormat inputFormat = new VCFInputFormat(conf);
-        List<InputSplit> splits = inputFormat.getSplits(ctx);
-        switch (expectedSplits) {
-            case EXACTLY_ONE:
-                assertEquals("Should be exactly one split", 1, splits.size());
-                break;
-            case MORE_THAN_ONE:
-                assertTrue("Should be more than one split", splits.size() > 1);
-                break;
-            case ANY:
-            default:
-                break;
-        }
-        readers = new ArrayList<>();
-        for (InputSplit split : splits) {
-            RecordReader<LongWritable, VariantContextWritable> reader = inputFormat.createRecordReader(split, taskAttemptContext);
-            reader.initialize(split, taskAttemptContext);
-            readers.add(reader);
-        }
+    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
+    JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());
+
+    VCFInputFormat inputFormat = new VCFInputFormat(conf);
+    List<InputSplit> splits = inputFormat.getSplits(ctx);
+    switch (expectedSplits) {
+      case EXACTLY_ONE:
+        assertEquals("Should be exactly one split", 1, splits.size());
+        break;
+      case MORE_THAN_ONE:
+        assertTrue("Should be more than one split", splits.size() > 1);
+        break;
+      case ANY:
+      default:
+        break;
     }
-
-    @Test
-    public void countEntries() throws Exception {
-        VCFFileReader vcfFileReader =
-            new VCFFileReader(new File("src/test/resources/" + filename), false);
-        Iterator<VariantContext> variantIterator;
-        if (interval == null) {
-            variantIterator = vcfFileReader.iterator();
-        } else {
-            variantIterator = vcfFileReader.query(interval.getContig(),
-                interval.getStart(), interval.getEnd());
-        }
-        int expectedCount = Iterators.size(variantIterator);
-
-        int counter = 0;
-        for (RecordReader<LongWritable, VariantContextWritable> reader : readers) {
-            while (reader.nextKeyValue()) {
-                writable = reader.getCurrentValue();
-                assertNotNull(writable);
-                VariantContext vc = writable.get();
-                assertNotNull(vc);
-                String value = vc.toString();
-                assertNotNull(value);
-                counter++;
-            }
-        }
-        assertEquals(expectedCount, counter);
+    readers = new ArrayList<>();
+    for (InputSplit split : splits) {
+      RecordReader<LongWritable, VariantContextWritable> reader =
+          inputFormat.createRecordReader(split, taskAttemptContext);
+      reader.initialize(split, taskAttemptContext);
+      readers.add(reader);
     }
+  }
+
+  @Test
+  public void countEntries() throws Exception {
+    VCFFileReader vcfFileReader =
+        new VCFFileReader(new File("src/test/resources/" + filename), false);
+    Iterator<VariantContext> variantIterator;
+    if (interval == null) {
+      variantIterator = vcfFileReader.iterator();
+    } else {
+      variantIterator =
+          vcfFileReader.query(interval.getContig(), interval.getStart(), interval.getEnd());
+    }
+    int expectedCount = Iterators.size(variantIterator);
 
-    @Test
-    public void testFirstSecond() throws Exception {
-        if (!filename.startsWith("test.")) {
-            return;
-        }
-        RecordReader<LongWritable, VariantContextWritable> reader = readers.get(0);
-        if (!reader.nextKeyValue())
-            throw new Exception("could not read first VariantContext");
-
+    int counter = 0;
+    for (RecordReader<LongWritable, VariantContextWritable> reader : readers) {
+      while (reader.nextKeyValue()) {
         writable = reader.getCurrentValue();
         assertNotNull(writable);
         VariantContext vc = writable.get();
         assertNotNull(vc);
+        String value = vc.toString();
+        assertNotNull(value);
+        counter++;
+      }
+    }
+    assertEquals(expectedCount, counter);
+  }
 
-        assertEquals("20", vc.getContig());
-        assertEquals(14370, vc.getStart());
-        assertEquals(14370, vc.getEnd());
-        assertEquals("G", vc.getReference().getBaseString());
-        assertEquals("A", vc.getAlternateAllele(0).getBaseString());
-
-        assertTrue("second VariantContext", reader.nextKeyValue());
-
-        writable = reader.getCurrentValue();
-        assertNotNull(writable);
-        vc = writable.get();
-        assertNotNull(vc);
-
-        assertEquals("20", vc.getContig());
-        assertEquals(17330, vc.getStart());
-        assertEquals(17330, vc.getEnd());
-        assertEquals("T", vc.getReference().getBaseString());
-        assertEquals("A", vc.getAlternateAllele(0).getBaseString());
+  @Test
+  public void testFirstSecond() throws Exception {
+    if (!filename.startsWith("test.")) {
+      return;
     }
+    RecordReader<LongWritable, VariantContextWritable> reader = readers.get(0);
+    if (!reader.nextKeyValue()) {
+      throw new Exception("could not read first VariantContext");
+    }
+
+    writable = reader.getCurrentValue();
+    assertNotNull(writable);
+    VariantContext vc = writable.get();
+    assertNotNull(vc);
+
+    assertEquals("20", vc.getContig());
+    assertEquals(14370, vc.getStart());
+    assertEquals(14370, vc.getEnd());
+    assertEquals("G", vc.getReference().getBaseString());
+    assertEquals("A", vc.getAlternateAllele(0).getBaseString());
+
+    assertTrue("second VariantContext", reader.nextKeyValue());
+
+    writable = reader.getCurrentValue();
+    assertNotNull(writable);
+    vc = writable.get();
+    assertNotNull(vc);
+
+    assertEquals("20", vc.getContig());
+    assertEquals(17330, vc.getStart());
+    assertEquals(17330, vc.getEnd());
+    assertEquals("T", vc.getReference().getBaseString());
+    assertEquals("A", vc.getAlternateAllele(0).getBaseString());
+  }
+
+  enum NUM_SPLITS {
+    ANY,
+    EXACTLY_ONE,
+    MORE_THAN_ONE
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormatStringency.java b/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormatStringency.java
index a2f381c..7578191 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormatStringency.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestVCFInputFormatStringency.java
@@ -20,6 +20,10 @@
 
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.mockito.Mockito.mock;
+
 import htsjdk.samtools.ValidationStringency;
 import htsjdk.tribble.TribbleException;
 import htsjdk.variant.variantcontext.VariantContext;
@@ -35,65 +39,62 @@
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.junit.Test;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertNotNull;
-import static org.mockito.Mockito.mock;
-
 public class TestVCFInputFormatStringency {
 
-    public void checkReading(ValidationStringency validationStringency) throws Exception {
-        String filename = "invalid_info_field.vcf";
-        Configuration conf = new Configuration();
-        String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
-        conf.set("mapred.input.dir", "file://" + input_file);
+  public void checkReading(ValidationStringency validationStringency) throws Exception {
+    String filename = "invalid_info_field.vcf";
+    Configuration conf = new Configuration();
+    String input_file = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
+    conf.set("mapred.input.dir", "file://" + input_file);
 
-        if (validationStringency != null) {
-            VCFRecordReader.setValidationStringency(conf, validationStringency);
-        }
+    if (validationStringency != null) {
+      VCFRecordReader.setValidationStringency(conf, validationStringency);
+    }
 
-        TaskAttemptContext taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
-        JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());
+    TaskAttemptContext taskAttemptContext =
+        new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
+    JobContext ctx = new JobContextImpl(conf, taskAttemptContext.getJobID());
 
-        VCFInputFormat inputFormat = new VCFInputFormat(conf);
-        List<InputSplit> splits = inputFormat.getSplits(ctx);
-        assertEquals(1, splits.size());
-        RecordReader<LongWritable, VariantContextWritable> reader =
-            inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
-        int counter = 0;
-        while (reader.nextKeyValue()) {
-            VariantContextWritable writable = reader.getCurrentValue();
-            assertNotNull(writable);
-            VariantContext vc = writable.get();
-            assertNotNull(vc);
-            String value = vc.toString();
-            assertNotNull(value);
-            counter++;
-        }
-        assertEquals(4, counter);
+    VCFInputFormat inputFormat = new VCFInputFormat(conf);
+    List<InputSplit> splits = inputFormat.getSplits(ctx);
+    assertEquals(1, splits.size());
+    RecordReader<LongWritable, VariantContextWritable> reader =
+        inputFormat.createRecordReader(splits.get(0), taskAttemptContext);
+    int counter = 0;
+    while (reader.nextKeyValue()) {
+      VariantContextWritable writable = reader.getCurrentValue();
+      assertNotNull(writable);
+      VariantContext vc = writable.get();
+      assertNotNull(vc);
+      String value = vc.toString();
+      assertNotNull(value);
+      counter++;
     }
+    assertEquals(4, counter);
+  }
 
-    @Test(expected = TribbleException.class)
-    public void testUnset() throws Exception {
-        checkReading(null); // defaults to strict
-    }
+  @Test(expected = TribbleException.class)
+  public void testUnset() throws Exception {
+    checkReading(null); // defaults to strict
+  }
 
-    @Test(expected = TribbleException.class)
-    public void testDefault() throws Exception {
-        checkReading(ValidationStringency.DEFAULT_STRINGENCY); // defaults to strict
-    }
+  @Test(expected = TribbleException.class)
+  public void testDefault() throws Exception {
+    checkReading(ValidationStringency.DEFAULT_STRINGENCY); // defaults to strict
+  }
 
-    @Test
-    public void testSilent() throws Exception {
-        checkReading(ValidationStringency.SILENT);
-    }
+  @Test
+  public void testSilent() throws Exception {
+    checkReading(ValidationStringency.SILENT);
+  }
 
-    @Test
-    public void testLenient() throws Exception {
-        checkReading(ValidationStringency.LENIENT);
-    }
+  @Test
+  public void testLenient() throws Exception {
+    checkReading(ValidationStringency.LENIENT);
+  }
 
-    @Test(expected = TribbleException.class)
-    public void testStrict() throws Exception {
-        checkReading(ValidationStringency.STRICT);
-    }
+  @Test(expected = TribbleException.class)
+  public void testStrict() throws Exception {
+    checkReading(ValidationStringency.STRICT);
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestVCFOutputFormat.java b/src/test/java/org/seqdoop/hadoop_bam/TestVCFOutputFormat.java
index 4002f1a..f698394 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestVCFOutputFormat.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestVCFOutputFormat.java
@@ -20,14 +20,14 @@
 
 package org.seqdoop.hadoop_bam;
 
-import java.io.*;
-import java.lang.reflect.InvocationTargetException;
-import java.util.*;
+import static org.mockito.Mockito.mock;
 
 import htsjdk.samtools.seekablestream.SeekableFileStream;
 import htsjdk.variant.variantcontext.*;
 import htsjdk.variant.vcf.*;
-
+import java.io.*;
+import java.lang.reflect.InvocationTargetException;
+import java.util.*;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileUtil;
 import org.apache.hadoop.fs.Path;
@@ -35,150 +35,163 @@
 import org.apache.hadoop.mapred.TaskAttemptID;
 import org.apache.hadoop.mapreduce.RecordWriter;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
-
 import org.apache.hadoop.mapreduce.task.TaskAttemptContextImpl;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
 import org.junit.Test;
-
 import org.seqdoop.hadoop_bam.util.VCFHeaderReader;
 
-import static org.mockito.Mockito.mock;
-
 public class TestVCFOutputFormat {
-    private VariantContextWritable writable;
-    private RecordWriter<Long, VariantContextWritable> writer;
-    private TaskAttemptContext taskAttemptContext;
-    private File test_vcf_output;
-
-    @Before
-    public void setup() throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException {
-        test_vcf_output = File.createTempFile("test_vcf_output", "");
-        test_vcf_output.delete();
-        writable = new VariantContextWritable();
-        Configuration conf = new Configuration();
-        conf.set("hadoopbam.vcf.output-format", "VCF");
-        KeyIgnoringVCFOutputFormat<Long> outputFormat = new KeyIgnoringVCFOutputFormat<Long>(conf);
-        outputFormat.setHeader(readHeader());
-        taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
-        writer = outputFormat.getRecordWriter(taskAttemptContext, new Path("file://" + test_vcf_output));
-    }
 
-    @After
-    public void cleanup() throws IOException {
-        FileUtil.fullyDelete(test_vcf_output);
+  private VariantContextWritable writable;
+  private RecordWriter<Long, VariantContextWritable> writer;
+  private TaskAttemptContext taskAttemptContext;
+  private File test_vcf_output;
+
+  @Before
+  public void setup()
+      throws IOException, NoSuchMethodException, IllegalAccessException, InvocationTargetException,
+          InstantiationException {
+    test_vcf_output = File.createTempFile("test_vcf_output", "");
+    test_vcf_output.delete();
+    writable = new VariantContextWritable();
+    Configuration conf = new Configuration();
+    conf.set("hadoopbam.vcf.output-format", "VCF");
+    KeyIgnoringVCFOutputFormat<Long> outputFormat = new KeyIgnoringVCFOutputFormat<Long>(conf);
+    outputFormat.setHeader(readHeader());
+    taskAttemptContext = new TaskAttemptContextImpl(conf, mock(TaskAttemptID.class));
+    writer =
+        outputFormat.getRecordWriter(taskAttemptContext, new Path("file://" + test_vcf_output));
+  }
+
+  @After
+  public void cleanup() throws IOException {
+    FileUtil.fullyDelete(test_vcf_output);
+  }
+
+  private void skipHeader(LineNumberReader reader) throws IOException {
+    String line = reader.readLine();
+    while (line.startsWith("#")) {
+      reader.mark(1000);
+      line = reader.readLine();
     }
-
-    private void skipHeader(LineNumberReader reader) throws IOException {
-        String line = reader.readLine();
-        while (line.startsWith("#")) {
-            reader.mark(1000);
-            line = reader.readLine();
-        }
-        reader.reset();
-    }
-
-    @Test
-    public void testSimple() throws Exception {
-        VariantContextBuilder vctx_builder = new VariantContextBuilder();
-
-        ArrayList<Allele> alleles = new ArrayList<Allele>();
-        alleles.add(Allele.create("A", false));
-        alleles.add(Allele.create("C", true));
-        vctx_builder.alleles(alleles);
-
-        GenotypesContext genotypes = GenotypesContext.NO_GENOTYPES;
-        vctx_builder.genotypes(genotypes);
-
-        HashSet<String> filters = new HashSet<String>();
-        vctx_builder.filters(filters);
-
-        HashMap<String, Object> attributes = new HashMap<String, Object>();
-        attributes.put("NS", new Integer(4));
-        vctx_builder.attributes(attributes);
-
-        vctx_builder.loc("20", 2, 2);
-        vctx_builder.log10PError(-8.0);
-
-        String[] expected = new String[]{"20", "2", ".", "C", "A", "80", "PASS", "NS=4"};
-
-        VariantContext ctx = vctx_builder.make();
-        writable.set(ctx);
-        writer.write(1L, writable);
-        writer.close(taskAttemptContext);
-
-        LineNumberReader reader = new LineNumberReader(new FileReader(test_vcf_output));
-        skipHeader(reader);
-        String[] fields = Arrays.copyOf(reader.readLine().split("\t"), expected.length);
-        Assert.assertArrayEquals("comparing VCF single line", expected, fields);
-    }
-    
-    @Test
-    public void testVariantContextReadWrite() throws IOException, InterruptedException
-    {
-        // This is to check whether issue https://github.com/HadoopGenomics/Hadoop-BAM/issues/1 has been
-        // resolved
-    	VariantContextBuilder vctx_builder = new VariantContextBuilder();
-
-        ArrayList<Allele> alleles = new ArrayList<Allele>();
-        alleles.add(Allele.create("C", false));
-        alleles.add(Allele.create("G", true));
-        vctx_builder.alleles(alleles);
-
-        ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
-        GenotypeBuilder builder = new GenotypeBuilder();
-        genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00001").GQ(48).DP(1).make());
-        genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00002").GQ(42).DP(2).make());
-        genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00003").GQ(39).DP(3).make());
-        vctx_builder.genotypes(genotypes);
-
-        HashSet<String> filters = new HashSet<String>();
-        vctx_builder.filters(filters);
-
-        HashMap<String, Object> attributes = new HashMap<String, Object>();
-        attributes.put("NS", new Integer(4));
-        vctx_builder.attributes(attributes);
-
-        vctx_builder.loc("20", 2, 2);
-        vctx_builder.log10PError(-8.0);
-
-        VariantContext ctx = vctx_builder.make();
-        VariantContextWithHeader ctxh = new VariantContextWithHeader(ctx, readHeader());
-        writable.set(ctxh);
-
-        DataOutputBuffer out = new DataOutputBuffer(1000);
-        writable.write(out);
-        
-        byte[] data = out.getData();
-        ByteArrayInputStream bis = new ByteArrayInputStream(data);
-
-        writable = new VariantContextWritable();
-        writable.readFields(new DataInputStream(bis));
-
-        VariantContext vc = writable.get();
-        Assert.assertArrayEquals("comparing Alleles",ctx.getAlleles().toArray(),vc.getAlleles().toArray());
-        Assert.assertEquals("comparing Log10PError",ctx.getLog10PError(),vc.getLog10PError(),0.01);
-        Assert.assertArrayEquals("comparing Filters",ctx.getFilters().toArray(),vc.getFilters().toArray());
-        Assert.assertEquals("comparing Attributes",ctx.getAttributes(),vc.getAttributes());
-
-        // Now check the genotypes. Note: we need to make the header accessible before decoding the genotypes.
-        GenotypesContext gc = vc.getGenotypes();
-        assert(gc instanceof LazyVCFGenotypesContext);
-        LazyVCFGenotypesContext.HeaderDataCache headerDataCache = new LazyVCFGenotypesContext.HeaderDataCache();
-        headerDataCache.setHeader(readHeader());
-        ((LazyVCFGenotypesContext) gc).getParser().setHeaderDataCache(headerDataCache);
-
-        for (Genotype genotype : genotypes) {
-            Assert.assertEquals("checking genotype name", genotype.getSampleName(), gc.get(genotypes.indexOf(genotype)).getSampleName());
-            Assert.assertEquals("checking genotype quality", genotype.getGQ(), gc.get(genotypes.indexOf(genotype)).getGQ());
-            Assert.assertEquals("checking genotype read depth", genotype.getDP(), gc.get(genotypes.indexOf(genotype)).getDP());
-        }
-    }
-
-    private VCFHeader readHeader() throws IOException {
-        String header_file = ClassLoader.getSystemClassLoader().getResource("test.vcf").getFile();
-        VCFHeader header = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new File(header_file)));
-        return header;
+    reader.reset();
+  }
+
+  @Test
+  public void testSimple() throws Exception {
+    VariantContextBuilder vctx_builder = new VariantContextBuilder();
+
+    ArrayList<Allele> alleles = new ArrayList<Allele>();
+    alleles.add(Allele.create("A", false));
+    alleles.add(Allele.create("C", true));
+    vctx_builder.alleles(alleles);
+
+    GenotypesContext genotypes = GenotypesContext.NO_GENOTYPES;
+    vctx_builder.genotypes(genotypes);
+
+    HashSet<String> filters = new HashSet<String>();
+    vctx_builder.filters(filters);
+
+    HashMap<String, Object> attributes = new HashMap<String, Object>();
+    attributes.put("NS", new Integer(4));
+    vctx_builder.attributes(attributes);
+
+    vctx_builder.loc("20", 2, 2);
+    vctx_builder.log10PError(-8.0);
+
+    String[] expected = new String[] {"20", "2", ".", "C", "A", "80", "PASS", "NS=4"};
+
+    VariantContext ctx = vctx_builder.make();
+    writable.set(ctx);
+    writer.write(1L, writable);
+    writer.close(taskAttemptContext);
+
+    LineNumberReader reader = new LineNumberReader(new FileReader(test_vcf_output));
+    skipHeader(reader);
+    String[] fields = Arrays.copyOf(reader.readLine().split("\t"), expected.length);
+    Assert.assertArrayEquals("comparing VCF single line", expected, fields);
+  }
+
+  @Test
+  public void testVariantContextReadWrite() throws IOException, InterruptedException {
+    // This is to check whether issue https://github.com/HadoopGenomics/Hadoop-BAM/issues/1 has been
+    // resolved
+    VariantContextBuilder vctx_builder = new VariantContextBuilder();
+
+    ArrayList<Allele> alleles = new ArrayList<Allele>();
+    alleles.add(Allele.create("C", false));
+    alleles.add(Allele.create("G", true));
+    vctx_builder.alleles(alleles);
+
+    ArrayList<Genotype> genotypes = new ArrayList<Genotype>();
+    GenotypeBuilder builder = new GenotypeBuilder();
+    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00001").GQ(48).DP(1).make());
+    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00002").GQ(42).DP(2).make());
+    genotypes.add(builder.alleles(alleles.subList(0, 1)).name("NA00003").GQ(39).DP(3).make());
+    vctx_builder.genotypes(genotypes);
+
+    HashSet<String> filters = new HashSet<String>();
+    vctx_builder.filters(filters);
+
+    HashMap<String, Object> attributes = new HashMap<String, Object>();
+    attributes.put("NS", new Integer(4));
+    vctx_builder.attributes(attributes);
+
+    vctx_builder.loc("20", 2, 2);
+    vctx_builder.log10PError(-8.0);
+
+    VariantContext ctx = vctx_builder.make();
+    VariantContextWithHeader ctxh = new VariantContextWithHeader(ctx, readHeader());
+    writable.set(ctxh);
+
+    DataOutputBuffer out = new DataOutputBuffer(1000);
+    writable.write(out);
+
+    byte[] data = out.getData();
+    ByteArrayInputStream bis = new ByteArrayInputStream(data);
+
+    writable = new VariantContextWritable();
+    writable.readFields(new DataInputStream(bis));
+
+    VariantContext vc = writable.get();
+    Assert.assertArrayEquals(
+        "comparing Alleles", ctx.getAlleles().toArray(), vc.getAlleles().toArray());
+    Assert.assertEquals("comparing Log10PError", ctx.getLog10PError(), vc.getLog10PError(), 0.01);
+    Assert.assertArrayEquals(
+        "comparing Filters", ctx.getFilters().toArray(), vc.getFilters().toArray());
+    Assert.assertEquals("comparing Attributes", ctx.getAttributes(), vc.getAttributes());
+
+    // Now check the genotypes. Note: we need to make the header accessible before decoding the
+    // genotypes.
+    GenotypesContext gc = vc.getGenotypes();
+    assert (gc instanceof LazyVCFGenotypesContext);
+    LazyVCFGenotypesContext.HeaderDataCache headerDataCache =
+        new LazyVCFGenotypesContext.HeaderDataCache();
+    headerDataCache.setHeader(readHeader());
+    ((LazyVCFGenotypesContext) gc).getParser().setHeaderDataCache(headerDataCache);
+
+    for (Genotype genotype : genotypes) {
+      Assert.assertEquals(
+          "checking genotype name",
+          genotype.getSampleName(),
+          gc.get(genotypes.indexOf(genotype)).getSampleName());
+      Assert.assertEquals(
+          "checking genotype quality",
+          genotype.getGQ(),
+          gc.get(genotypes.indexOf(genotype)).getGQ());
+      Assert.assertEquals(
+          "checking genotype read depth",
+          genotype.getDP(),
+          gc.get(genotypes.indexOf(genotype)).getDP());
     }
+  }
+
+  private VCFHeader readHeader() throws IOException {
+    String header_file = ClassLoader.getSystemClassLoader().getResource("test.vcf").getFile();
+    VCFHeader header =
+        VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new File(header_file)));
+    return header;
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/TestVCFRoundTrip.java b/src/test/java/org/seqdoop/hadoop_bam/TestVCFRoundTrip.java
index 0f61172..ca58f31 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/TestVCFRoundTrip.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/TestVCFRoundTrip.java
@@ -20,6 +20,9 @@
 
 package org.seqdoop.hadoop_bam;
 
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+
 import com.google.common.collect.Iterators;
 import com.google.common.io.Files;
 import htsjdk.samtools.seekablestream.SeekableFileStream;
@@ -57,211 +60,209 @@
 import org.seqdoop.hadoop_bam.util.VCFFileMerger;
 import org.seqdoop.hadoop_bam.util.VCFHeaderReader;
 
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
 @RunWith(Parameterized.class)
 public class TestVCFRoundTrip {
 
-    // VCF output format that writes a header before records
-    static class VCFTestWithHeaderOutputFormat
-        extends KeyIgnoringVCFOutputFormat<NullWritable> {
-        public final static String READ_HEADER_FROM_FILE = "TestVCF.header";
-
-        public VCFTestWithHeaderOutputFormat() {
-            super(VCFFormat.VCF);
-        }
-
-        @Override
-        public RecordWriter<NullWritable, VariantContextWritable> getRecordWriter(
-            TaskAttemptContext ctx) throws IOException {
-            Path vcfPath = new Path(conf.get(READ_HEADER_FROM_FILE));
-            readHeaderFrom(vcfPath, vcfPath.getFileSystem(conf));
-            return super.getRecordWriter(ctx);
-        }
+  private static Configuration conf;
+  private String testVCFFileName;
+  private Class<? extends CompressionCodec> codecClass;
+  private NUM_SPLITS expectedSplits;
+
+  public TestVCFRoundTrip(
+      String filename, Class<? extends CompressionCodec> codecClass, NUM_SPLITS expectedSplits) {
+    testVCFFileName = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
+    this.codecClass = codecClass;
+    this.expectedSplits = expectedSplits;
+  }
+
+  @Parameterized.Parameters
+  public static Collection<Object> data() {
+    return Arrays.asList(
+        new Object[][] {
+          {"test.vcf", null, NUM_SPLITS.ANY},
+          {"test.vcf.gz", BGZFEnhancedGzipCodec.class, NUM_SPLITS.EXACTLY_ONE},
+          {"test.vcf.bgzf.gz", BGZFCodec.class, NUM_SPLITS.ANY},
+          {"test.vcf.bgz", BGZFCodec.class, NUM_SPLITS.ANY},
+          {"HiSeq.10000.vcf", null, NUM_SPLITS.MORE_THAN_ONE},
+          {"HiSeq.10000.vcf.gz", BGZFEnhancedGzipCodec.class, NUM_SPLITS.EXACTLY_ONE},
+          {"HiSeq.10000.vcf.bgzf.gz", BGZFCodec.class, NUM_SPLITS.MORE_THAN_ONE},
+          {"HiSeq.10000.vcf.bgz", BGZFCodec.class, NUM_SPLITS.MORE_THAN_ONE}
+        });
+  }
+
+  private static VCFFileReader parseVcf(File vcf) throws IOException {
+    File actualVcf;
+    // work around TribbleIndexedFeatureReader not reading header from .bgz files
+    if (vcf.getName().endsWith(".bgz")) {
+      actualVcf = File.createTempFile(vcf.getName(), ".gz");
+      actualVcf.deleteOnExit();
+      Files.copy(vcf, actualVcf);
+    } else {
+      actualVcf = vcf;
+    }
+    return new VCFFileReader(actualVcf, false);
+  }
+
+  @Before
+  public void setup() throws Exception {
+    conf = new Configuration();
+    conf.set(VCFTestWithHeaderOutputFormat.READ_HEADER_FROM_FILE, testVCFFileName);
+    conf.setStrings(
+        "io.compression.codecs",
+        BGZFCodec.class.getCanonicalName(),
+        BGZFEnhancedGzipCodec.class.getCanonicalName());
+    conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 100 * 1024); // 100K
+  }
+
+  @Test
+  public void testRoundTrip() throws Exception {
+    Path vcfPath = new Path("file://" + testVCFFileName);
+
+    // run a MR job to write out a VCF file
+    Path outputPath = doMapReduce(vcfPath, true);
+
+    // verify the output is the same as the input
+    List<VariantContext> expectedVariants = new ArrayList<>();
+    VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName));
+    Iterators.addAll(expectedVariants, vcfFileReader.iterator());
+
+    int splits = 0;
+    List<VariantContext> actualVariants = new ArrayList<>();
+    File[] vcfFiles =
+        new File(outputPath.toUri())
+            .listFiles(
+                pathname ->
+                    (!pathname.getName().startsWith(".") && !pathname.getName().startsWith("_")));
+    Arrays.sort(vcfFiles); // ensure files are sorted by name
+    for (File vcf : vcfFiles) {
+      splits++;
+      Iterators.addAll(actualVariants, parseVcf(vcf).iterator());
+      if (BGZFCodec.class.equals(codecClass)) {
+        assertTrue(
+            BlockCompressedInputStream.isValidFile(
+                new BufferedInputStream(new FileInputStream(vcf))));
+      } else if (BGZFEnhancedGzipCodec.class.equals(codecClass)) {
+        assertTrue(VCFFormat.isGzip(new BufferedInputStream(new FileInputStream(vcf))));
+      }
     }
 
-    // VCF output format that doesn't write a header before records
-    static class VCFTestNoHeaderOutputFormat
-        extends KeyIgnoringVCFOutputFormat<NullWritable> {
-        public final static String READ_HEADER_FROM_FILE = "TestVCF.header";
-
-        public VCFTestNoHeaderOutputFormat() {
-            super(VCFFormat.VCF);
-        }
-
-        @Override
-        public RecordWriter<NullWritable, VariantContextWritable> getRecordWriter(
-            TaskAttemptContext ctx) throws IOException {
-            Path vcfPath = new Path(conf.get(READ_HEADER_FROM_FILE));
-            readHeaderFrom(vcfPath, vcfPath.getFileSystem(conf));
-            ctx.getConfiguration().setBoolean(WRITE_HEADER_PROPERTY, false);
-            return super.getRecordWriter(ctx);
-        }
+    switch (expectedSplits) {
+      case EXACTLY_ONE:
+        assertEquals("Should be exactly one split", 1, splits);
+        break;
+      case MORE_THAN_ONE:
+        assertTrue("Should be more than one split", splits > 1);
+        break;
+      case ANY:
+      default:
+        break;
     }
 
-    @Parameterized.Parameters
-    public static Collection<Object> data() {
-        return Arrays.asList(new Object[][] {
-            {"test.vcf", null, NUM_SPLITS.ANY},
-            {"test.vcf.gz", BGZFEnhancedGzipCodec.class, NUM_SPLITS.EXACTLY_ONE},
-            {"test.vcf.bgzf.gz", BGZFCodec.class, NUM_SPLITS.ANY},
-            {"test.vcf.bgz", BGZFCodec.class, NUM_SPLITS.ANY},
-            {"HiSeq.10000.vcf", null, NUM_SPLITS.MORE_THAN_ONE},
-            {"HiSeq.10000.vcf.gz", BGZFEnhancedGzipCodec.class, NUM_SPLITS.EXACTLY_ONE},
-            {"HiSeq.10000.vcf.bgzf.gz", BGZFCodec.class, NUM_SPLITS.MORE_THAN_ONE},
-            {"HiSeq.10000.vcf.bgz", BGZFCodec.class, NUM_SPLITS.MORE_THAN_ONE}
-        });
+    // use a VariantContextComparator to check variants are equal
+    VCFHeader vcfHeader =
+        VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new File(testVCFFileName)));
+    VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator();
+    assertEquals(expectedVariants.size(), actualVariants.size());
+    for (int i = 0; i < expectedVariants.size(); i++) {
+      assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i), actualVariants.get(i)));
+    }
+  }
+
+  @Test
+  public void testRoundTripWithMerge() throws Exception {
+    Path vcfPath = new Path("file://" + testVCFFileName);
+
+    // run a MR job to write out a VCF file
+    Path outputPath = doMapReduce(vcfPath, false);
+
+    // merge the output
+    VCFHeader vcfHeader =
+        VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new File(testVCFFileName)));
+    final File outFile =
+        File.createTempFile(
+            "testVCFWriter", testVCFFileName.substring(testVCFFileName.lastIndexOf(".")));
+    outFile.deleteOnExit();
+    VCFFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(), vcfHeader);
+    List<VariantContext> actualVariants = new ArrayList<>();
+    VCFFileReader vcfFileReaderActual = parseVcf(outFile);
+    Iterators.addAll(actualVariants, vcfFileReaderActual.iterator());
+
+    // verify the output is the same as the input
+    List<VariantContext> expectedVariants = new ArrayList<>();
+    VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName));
+    Iterators.addAll(expectedVariants, vcfFileReader.iterator());
+
+    // use a VariantContextComparator to check variants are equal
+    VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator();
+    assertEquals(expectedVariants.size(), actualVariants.size());
+    for (int i = 0; i < expectedVariants.size(); i++) {
+      assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i), actualVariants.get(i)));
     }
+  }
 
-    private static Configuration conf;
+  private Path doMapReduce(final Path inputPath, final boolean writeHeader) throws Exception {
+    final FileSystem fileSystem = FileSystem.get(conf);
+    final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
+    fileSystem.delete(outputPath, true);
 
-    private String testVCFFileName;
-    private Class<? extends CompressionCodec> codecClass;
-    private NUM_SPLITS expectedSplits;
+    final Job job = Job.getInstance(conf);
+    FileInputFormat.setInputPaths(job, inputPath);
 
-    public TestVCFRoundTrip(String filename, Class<? extends CompressionCodec> codecClass,
-        NUM_SPLITS expectedSplits) {
-        testVCFFileName = ClassLoader.getSystemClassLoader().getResource(filename).getFile();
-        this.codecClass = codecClass;
-        this.expectedSplits = expectedSplits;
-    }
+    job.setInputFormatClass(VCFInputFormat.class);
+    job.setMapOutputKeyClass(LongWritable.class);
+    job.setMapOutputValueClass(VariantContextWritable.class);
 
-    @Before
-    public void setup() throws Exception {
-        conf = new Configuration();
-        conf.set(VCFTestWithHeaderOutputFormat.READ_HEADER_FROM_FILE, testVCFFileName);
-        conf.setStrings("io.compression.codecs", BGZFCodec.class.getCanonicalName(),
-            BGZFEnhancedGzipCodec.class.getCanonicalName());
-        conf.setInt(FileInputFormat.SPLIT_MAXSIZE, 100 * 1024); // 100K
-    }
+    job.setOutputFormatClass(
+        writeHeader ? VCFTestWithHeaderOutputFormat.class : VCFTestNoHeaderOutputFormat.class);
+    job.setOutputKeyClass(LongWritable.class);
+    job.setOutputValueClass(VariantContextWritable.class);
 
-    @Test
-    public void testRoundTrip() throws Exception {
-        Path vcfPath = new Path("file://" + testVCFFileName);
-
-        // run a MR job to write out a VCF file
-        Path outputPath = doMapReduce(vcfPath, true);
-
-        // verify the output is the same as the input
-        List<VariantContext> expectedVariants = new ArrayList<>();
-        VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName));
-        Iterators.addAll(expectedVariants, vcfFileReader.iterator());
-
-        int splits = 0;
-        List<VariantContext> actualVariants = new ArrayList<>();
-        File[] vcfFiles = new File(outputPath.toUri()).listFiles(
-            pathname -> (!pathname.getName().startsWith(".") &&
-                !pathname.getName().startsWith("_")));
-        Arrays.sort(vcfFiles); // ensure files are sorted by name
-        for (File vcf : vcfFiles) {
-            splits++;
-            Iterators.addAll(actualVariants, parseVcf(vcf).iterator());
-            if (BGZFCodec.class.equals(codecClass)) {
-                assertTrue(BlockCompressedInputStream.isValidFile(
-                    new BufferedInputStream(new FileInputStream(vcf))));
-            } else if (BGZFEnhancedGzipCodec.class.equals(codecClass)) {
-                assertTrue(VCFFormat.isGzip(
-                    new BufferedInputStream(new FileInputStream(vcf))));
-            }
-        }
-
-        switch (expectedSplits) {
-            case EXACTLY_ONE:
-                assertEquals("Should be exactly one split", 1, splits);
-                break;
-            case MORE_THAN_ONE:
-                assertTrue("Should be more than one split", splits > 1);
-                break;
-            case ANY:
-            default:
-                break;
-        }
-
-        // use a VariantContextComparator to check variants are equal
-        VCFHeader vcfHeader = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new
-            File(testVCFFileName)));
-        VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator();
-        assertEquals(expectedVariants.size(), actualVariants.size());
-        for (int i = 0; i < expectedVariants.size(); i++) {
-            assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i),
-                actualVariants.get(i)));
-        }
+    job.setNumReduceTasks(0);
+    FileOutputFormat.setOutputPath(job, outputPath);
+    if (codecClass != null) {
+      FileOutputFormat.setOutputCompressorClass(job, codecClass);
     }
 
-    @Test
-    public void testRoundTripWithMerge() throws Exception {
-        Path vcfPath = new Path("file://" + testVCFFileName);
-
-        // run a MR job to write out a VCF file
-        Path outputPath = doMapReduce(vcfPath, false);
-
-        // merge the output
-        VCFHeader vcfHeader = VCFHeaderReader.readHeaderFrom(new SeekableFileStream(new
-            File(testVCFFileName)));
-        final File outFile = File.createTempFile("testVCFWriter",
-            testVCFFileName.substring(testVCFFileName.lastIndexOf(".")));
-        outFile.deleteOnExit();
-        VCFFileMerger.mergeParts(outputPath.toUri().toString(), outFile.toURI().toString(),
-            vcfHeader);
-        List<VariantContext> actualVariants = new ArrayList<>();
-        VCFFileReader vcfFileReaderActual = parseVcf(outFile);
-        Iterators.addAll(actualVariants, vcfFileReaderActual.iterator());
-
-        // verify the output is the same as the input
-        List<VariantContext> expectedVariants = new ArrayList<>();
-        VCFFileReader vcfFileReader = parseVcf(new File(testVCFFileName));
-        Iterators.addAll(expectedVariants, vcfFileReader.iterator());
-
-        // use a VariantContextComparator to check variants are equal
-        VariantContextComparator vcfRecordComparator = vcfHeader.getVCFRecordComparator();
-        assertEquals(expectedVariants.size(), actualVariants.size());
-        for (int i = 0; i < expectedVariants.size(); i++) {
-            assertEquals(0, vcfRecordComparator.compare(expectedVariants.get(i),
-                actualVariants.get(i)));
-        }
-    }
+    final boolean success = job.waitForCompletion(true);
+    assertTrue(success);
+
+    return outputPath;
+  }
 
-    private Path doMapReduce(final Path inputPath, final boolean writeHeader)
-        throws Exception {
-        final FileSystem fileSystem = FileSystem.get(conf);
-        final Path outputPath = fileSystem.makeQualified(new Path("target/out"));
-        fileSystem.delete(outputPath, true);
+  // VCF output format that writes a header before records
+  static class VCFTestWithHeaderOutputFormat extends KeyIgnoringVCFOutputFormat<NullWritable> {
 
-        final Job job = Job.getInstance(conf);
-        FileInputFormat.setInputPaths(job, inputPath);
+    public static final String READ_HEADER_FROM_FILE = "TestVCF.header";
 
-        job.setInputFormatClass(VCFInputFormat.class);
-        job.setMapOutputKeyClass(LongWritable.class);
-        job.setMapOutputValueClass(VariantContextWritable.class);
+    public VCFTestWithHeaderOutputFormat() {
+      super(VCFFormat.VCF);
+    }
 
-        job.setOutputFormatClass(writeHeader ? VCFTestWithHeaderOutputFormat.class :
-            VCFTestNoHeaderOutputFormat.class);
-        job.setOutputKeyClass(LongWritable.class);
-        job.setOutputValueClass(VariantContextWritable.class);
+    @Override
+    public RecordWriter<NullWritable, VariantContextWritable> getRecordWriter(
+        TaskAttemptContext ctx) throws IOException {
+      Path vcfPath = new Path(conf.get(READ_HEADER_FROM_FILE));
+      readHeaderFrom(vcfPath, vcfPath.getFileSystem(conf));
+      return super.getRecordWriter(ctx);
+    }
+  }
 
-        job.setNumReduceTasks(0);
-        FileOutputFormat.setOutputPath(job, outputPath);
-        if (codecClass != null) {
-            FileOutputFormat.setOutputCompressorClass(job, codecClass);
-        }
+  // VCF output format that doesn't write a header before records
+  static class VCFTestNoHeaderOutputFormat extends KeyIgnoringVCFOutputFormat<NullWritable> {
 
-        final boolean success = job.waitForCompletion(true);
-        assertTrue(success);
+    public static final String READ_HEADER_FROM_FILE = "TestVCF.header";
 
-        return outputPath;
+    public VCFTestNoHeaderOutputFormat() {
+      super(VCFFormat.VCF);
     }
 
-    private static VCFFileReader parseVcf(File vcf) throws IOException {
-        File actualVcf;
-        // work around TribbleIndexedFeatureReader not reading header from .bgz files
-        if (vcf.getName().endsWith(".bgz")) {
-            actualVcf = File.createTempFile(vcf.getName(), ".gz");
-            actualVcf.deleteOnExit();
-            Files.copy(vcf, actualVcf);
-        } else {
-            actualVcf = vcf;
-        }
-        return new VCFFileReader(actualVcf, false);
+    @Override
+    public RecordWriter<NullWritable, VariantContextWritable> getRecordWriter(
+        TaskAttemptContext ctx) throws IOException {
+      Path vcfPath = new Path(conf.get(READ_HEADER_FROM_FILE));
+      readHeaderFrom(vcfPath, vcfPath.getFileSystem(conf));
+      ctx.getConfiguration().setBoolean(WRITE_HEADER_PROPERTY, false);
+      return super.getRecordWriter(ctx);
     }
+  }
 }
diff --git a/src/test/java/org/seqdoop/hadoop_bam/util/TestVCFHeaderReader.java b/src/test/java/org/seqdoop/hadoop_bam/util/TestVCFHeaderReader.java
index 4fdb2b5..9fc079e 100644
--- a/src/test/java/org/seqdoop/hadoop_bam/util/TestVCFHeaderReader.java
+++ b/src/test/java/org/seqdoop/hadoop_bam/util/TestVCFHeaderReader.java
@@ -1,18 +1,20 @@
 package org.seqdoop.hadoop_bam.util;
 
-import java.io.IOException;
+import static org.junit.Assert.assertNotNull;
 
 import com.google.common.io.Resources;
-
 import htsjdk.samtools.seekablestream.ByteArraySeekableStream;
 import htsjdk.samtools.seekablestream.SeekableStream;
-
+import java.io.IOException;
 import org.junit.Test;
 
-import static org.junit.Assert.assertNotNull;
-
 public class TestVCFHeaderReader {
 
+  static SeekableStream seekableStream(final String resource) throws IOException {
+    return new ByteArraySeekableStream(
+        Resources.toByteArray(ClassLoader.getSystemClassLoader().getResource(resource)));
+  }
+
   @Test
   public void testReadHeaderFromVCF() throws IOException {
     assertNotNull(VCFHeaderReader.readHeaderFrom(seekableStream("test.vcf")));
@@ -27,8 +29,4 @@ public void testReadHeaderFromGzippedVCF() throws IOException {
   public void testReadHeaderFromBGZFVCF() throws IOException {
     assertNotNull(VCFHeaderReader.readHeaderFrom(seekableStream("test.vcf.bgzf.gz")));
   }
-
-  static SeekableStream seekableStream(final String resource) throws IOException {
-    return new ByteArraySeekableStream(Resources.toByteArray(ClassLoader.getSystemClassLoader().getResource(resource)));
-  }
 }
diff --git a/src/test/resources/log4j.properties b/src/test/resources/log4j.properties
index 4406891..9d7e245 100644
--- a/src/test/resources/log4j.properties
+++ b/src/test/resources/log4j.properties
@@ -1,7 +1,5 @@
-log4j.rootLogger = WARN, out
-
-log4j.appender.out = org.apache.log4j.ConsoleAppender
-log4j.appender.out.layout = org.apache.log4j.PatternLayout
-log4j.appender.out.layout.ConversionPattern = %d (%t) [%p - %l] %m%n
-
+log4j.rootLogger=WARN, out
+log4j.appender.out=org.apache.log4j.ConsoleAppender
+log4j.appender.out.layout=org.apache.log4j.PatternLayout
+log4j.appender.out.layout.ConversionPattern=%d (%t) [%p - %l] %m%n
 log4j.logger.org.seqdoop.hadoop_bam=DEBUG

From 80d964f4a6c23ab8657b3a15b62afdfa6d33b1c8 Mon Sep 17 00:00:00 2001
From: Tom White <tom@cloudera.com>
Date: Mon, 5 Feb 2018 15:35:56 +0000
Subject: [PATCH 2/3] Use fmt-maven-plugin to check source complies with the
 Google Java Style Guide. (If it doesn't the author can manually type 'mvn
 fmt:format' to reformat, or manually reformat in the IDE.)

---
 .travis.yml |  2 +-
 pom.xml     | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 8dbe2b6..49e0a6c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,7 +13,7 @@ before_install:
   - sed -e "s/^\\(127\\.0\\.0\\.1.*\\)/\\1 $(hostname | cut -c1-63)/" /etc/hosts | sudo tee /etc/hosts
   - cat /etc/hosts # optionally check the content *after*
 
-script: mvn clean test jacoco:report
+script: mvn clean fmt:check test jacoco:report
 
 after_success:
   - python scripts/deploy/addServerToM2Settings.py
diff --git a/pom.xml b/pom.xml
index cc4d60c..2124c5d 100644
--- a/pom.xml
+++ b/pom.xml
@@ -121,6 +121,18 @@
             </plugins>
         </pluginManagement>
         <plugins>
+            <plugin>
+                <groupId>com.coveo</groupId>
+                <artifactId>fmt-maven-plugin</artifactId>
+                <version>2.2.0</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>check</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
             <plugin>
                 <groupId>org.apache.maven.plugins</groupId>
                 <artifactId>maven-release-plugin</artifactId>

From e59cdecdddc3ee1108ba57245f44471d53bd9e84 Mon Sep 17 00:00:00 2001
From: Tom White <tom@cloudera.com>
Date: Wed, 7 Feb 2018 16:20:21 +0000
Subject: [PATCH 3/3] Change non-public top-level classes to be nested classes.

---
 .../seqdoop/hadoop_bam/BCFRecordReader.java   | 120 ++++----
 .../seqdoop/hadoop_bam/BCFRecordWriter.java   |  86 +++---
 .../hadoop_bam/LazyBAMRecordFactory.java      | 148 +++++-----
 .../hadoop_bam/LazyVCFGenotypesContext.java   |  46 +--
 .../seqdoop/hadoop_bam/SAMRecordReader.java   | 268 +++++++++---------
 5 files changed, 334 insertions(+), 334 deletions(-)

diff --git a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java
index 6b6b626..8735230 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordReader.java
@@ -177,77 +177,77 @@ public boolean nextKeyValue() throws IOException {
     vc.set(v);
     return true;
   }
-}
-
-class BGZFLimitingStream extends InputStream {
 
-  private final BlockCompressedInputStream bgzf;
-  private final long virtEnd;
-  private byte[] readBuf = new byte[1];
-
-  public BGZFLimitingStream(BlockCompressedInputStream stream, long virtualEnd) {
-    bgzf = stream;
-    virtEnd = virtualEnd;
-  }
+  static class BGZFLimitingStream extends InputStream {
 
-  @Override
-  public void close() throws IOException {
-    bgzf.close();
-  }
+    private final BlockCompressedInputStream bgzf;
+    private final long virtEnd;
+    private byte[] readBuf = new byte[1];
 
-  @Override
-  public int read() throws IOException {
-    switch (read(readBuf)) {
-      case 1:
-        return readBuf[0];
-      case -1:
-        return -1;
-      default:
-        assert false;
-        return -1;
+    public BGZFLimitingStream(BlockCompressedInputStream stream, long virtualEnd) {
+      bgzf = stream;
+      virtEnd = virtualEnd;
     }
-  }
 
-  @Override
-  public int read(byte[] buf, int off, int len) throws IOException {
-
-    int totalRead = 0;
-    long virt;
-
-    final int lastLen = (int) virtEnd & 0xffff;
-
-    while ((virt = bgzf.getFilePointer()) >>> 16 != virtEnd >>> 16) {
-      // We're not in the last BGZF block yet. Unfortunately
-      // BlockCompressedInputStream doesn't expose the length of the current
-      // block, so we can't simply (possibly repeatedly) read the current
-      // block to the end. Instead, we read at most virtEnd & 0xffff at a
-      // time, which ensures that we can't overshoot virtEnd even if the
-      // next block starts immediately.
-      final int r = bgzf.read(buf, off, Math.min(len, lastLen));
-      if (r == -1) {
-        return totalRead == 0 ? -1 : totalRead;
-      }
+    @Override
+    public void close() throws IOException {
+      bgzf.close();
+    }
 
-      totalRead += r;
-      len -= r;
-      if (len == 0) {
-        return totalRead;
+    @Override
+    public int read() throws IOException {
+      switch (read(readBuf)) {
+        case 1:
+          return readBuf[0];
+        case -1:
+          return -1;
+        default:
+          assert false;
+          return -1;
       }
-      off += r;
     }
 
-    // We're in the last BGZF block: read only up to lastLen.
-    len = Math.min(len, ((int) virt & 0xffff) - lastLen);
-    while (len > 0) {
-      final int r = bgzf.read(buf, off, len);
-      if (r == -1) {
-        return totalRead == 0 ? -1 : totalRead;
+    @Override
+    public int read(byte[] buf, int off, int len) throws IOException {
+
+      int totalRead = 0;
+      long virt;
+
+      final int lastLen = (int) virtEnd & 0xffff;
+
+      while ((virt = bgzf.getFilePointer()) >>> 16 != virtEnd >>> 16) {
+        // We're not in the last BGZF block yet. Unfortunately
+        // BlockCompressedInputStream doesn't expose the length of the current
+        // block, so we can't simply (possibly repeatedly) read the current
+        // block to the end. Instead, we read at most virtEnd & 0xffff at a
+        // time, which ensures that we can't overshoot virtEnd even if the
+        // next block starts immediately.
+        final int r = bgzf.read(buf, off, Math.min(len, lastLen));
+        if (r == -1) {
+          return totalRead == 0 ? -1 : totalRead;
+        }
+
+        totalRead += r;
+        len -= r;
+        if (len == 0) {
+          return totalRead;
+        }
+        off += r;
       }
 
-      totalRead += r;
-      len -= r;
-      off += r;
+      // We're in the last BGZF block: read only up to lastLen.
+      len = Math.min(len, ((int) virt & 0xffff) - lastLen);
+      while (len > 0) {
+        final int r = bgzf.read(buf, off, len);
+        if (r == -1) {
+          return totalRead == 0 ? -1 : totalRead;
+        }
+
+        totalRead += r;
+        len -= r;
+        off += r;
+      }
+      return totalRead == 0 ? -1 : totalRead;
     }
-    return totalRead == 0 ? -1 : totalRead;
   }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java
index 0ec2664..6bb4327 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/BCFRecordWriter.java
@@ -122,57 +122,57 @@ protected void writeRecord(VariantContext vc) {
 
     writer.add(vc);
   }
-}
-
-// We must always call writer.writeHeader() because the writer requires
-// the header in writer.add(), and writeHeader() is the only way to give
-// the header to the writer. Thus, we use this class to simply throw away
-// output until after the header's been written.
-//
-// This is, of course, a HACK and a slightly dangerous one: if writer
-// does any buffering of its own and doesn't flush after writing the
-// header, this isn't as easy as this.
-//
-// In addition we do BGZF compression here, to simplify things.
-final class BCFStoppableOutputStream extends FilterOutputStream {
 
-  private final OutputStream origOut;
-  public boolean stopped;
-
-  public BCFStoppableOutputStream(boolean startStopped, OutputStream out) {
-    super(new BlockCompressedOutputStream(out, null));
-    origOut = out;
-    stopped = startStopped;
-  }
+  // We must always call writer.writeHeader() because the writer requires
+  // the header in writer.add(), and writeHeader() is the only way to give
+  // the header to the writer. Thus, we use this class to simply throw away
+  // output until after the header's been written.
+  //
+  // This is, of course, a HACK and a slightly dangerous one: if writer
+  // does any buffering of its own and doesn't flush after writing the
+  // header, this isn't as easy as this.
+  //
+  // In addition we do BGZF compression here, to simplify things.
+  static final class BCFStoppableOutputStream extends FilterOutputStream {
+
+    private final OutputStream origOut;
+    public boolean stopped;
+
+    public BCFStoppableOutputStream(boolean startStopped, OutputStream out) {
+      super(new BlockCompressedOutputStream(out, null));
+      origOut = out;
+      stopped = startStopped;
+    }
 
-  @Override
-  public void write(int b) throws IOException {
-    if (!stopped) {
-      super.write(b);
+    @Override
+    public void write(int b) throws IOException {
+      if (!stopped) {
+        super.write(b);
+      }
     }
-  }
 
-  @Override
-  public void write(byte[] b) throws IOException {
-    if (!stopped) {
-      super.write(b);
+    @Override
+    public void write(byte[] b) throws IOException {
+      if (!stopped) {
+        super.write(b);
+      }
     }
-  }
 
-  @Override
-  public void write(byte[] b, int off, int len) throws IOException {
-    if (!stopped) {
-      super.write(b, off, len);
+    @Override
+    public void write(byte[] b, int off, int len) throws IOException {
+      if (!stopped) {
+        super.write(b, off, len);
+      }
     }
-  }
 
-  @Override
-  public void close() throws IOException {
-    // Don't close the BlockCompressedOutputStream, as we don't want
-    // the BGZF terminator.
-    this.out.flush();
+    @Override
+    public void close() throws IOException {
+      // Don't close the BlockCompressedOutputStream, as we don't want
+      // the BGZF terminator.
+      this.out.flush();
 
-    // Instead, close the lower-level output stream directly.
-    origOut.close();
+      // Instead, close the lower-level output stream directly.
+      origOut.close();
+    }
   }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java b/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java
index 3a908b1..a1faf90 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/LazyBAMRecordFactory.java
@@ -65,89 +65,89 @@ public BAMRecord createBAMRecord(
         insertSize,
         variableLengthBlock);
   }
-}
-
-class LazyBAMRecord extends BAMRecord {
-
-  private boolean decodedRefIdx = false;
-  private boolean decodedMateRefIdx = false;
 
-  public LazyBAMRecord(
-      SAMFileHeader hdr,
-      int referenceID,
-      int coordinate,
-      short readNameLength,
-      short mappingQuality,
-      int indexingBin,
-      int cigarLen,
-      int flags,
-      int readLen,
-      int mateReferenceID,
-      int mateCoordinate,
-      int insertSize,
-      byte[] restOfData) {
-    super(
-        hdr,
-        referenceID,
-        coordinate,
-        readNameLength,
-        mappingQuality,
-        indexingBin,
-        cigarLen,
-        flags,
-        readLen,
-        mateReferenceID,
-        mateCoordinate,
-        insertSize,
-        restOfData);
-  }
+  static class LazyBAMRecord extends BAMRecord {
+
+    private boolean decodedRefIdx = false;
+    private boolean decodedMateRefIdx = false;
+
+    public LazyBAMRecord(
+        SAMFileHeader hdr,
+        int referenceID,
+        int coordinate,
+        short readNameLength,
+        short mappingQuality,
+        int indexingBin,
+        int cigarLen,
+        int flags,
+        int readLen,
+        int mateReferenceID,
+        int mateCoordinate,
+        int insertSize,
+        byte[] restOfData) {
+      super(
+          hdr,
+          referenceID,
+          coordinate,
+          readNameLength,
+          mappingQuality,
+          indexingBin,
+          cigarLen,
+          flags,
+          readLen,
+          mateReferenceID,
+          mateCoordinate,
+          insertSize,
+          restOfData);
+    }
 
-  @Override
-  public void setReferenceIndex(final int referenceIndex) {
-    mReferenceIndex = referenceIndex;
-    decodedRefIdx = false;
-  }
+    @Override
+    public void setReferenceIndex(final int referenceIndex) {
+      mReferenceIndex = referenceIndex;
+      decodedRefIdx = false;
+    }
 
-  @Override
-  public void setMateReferenceIndex(final int referenceIndex) {
-    mMateReferenceIndex = referenceIndex;
-    decodedMateRefIdx = false;
-  }
+    @Override
+    public void setMateReferenceIndex(final int referenceIndex) {
+      mMateReferenceIndex = referenceIndex;
+      decodedMateRefIdx = false;
+    }
 
-  @Override
-  public String getReferenceName() {
-    if (mReferenceIndex != null && !decodedRefIdx) {
-      decodedRefIdx = true;
-      super.setReferenceIndex(mReferenceIndex);
+    @Override
+    public String getReferenceName() {
+      if (mReferenceIndex != null && !decodedRefIdx) {
+        decodedRefIdx = true;
+        super.setReferenceIndex(mReferenceIndex);
+      }
+      return super.getReferenceName();
     }
-    return super.getReferenceName();
-  }
 
-  @Override
-  public String getMateReferenceName() {
-    if (mMateReferenceIndex != null && !decodedMateRefIdx) {
-      decodedMateRefIdx = true;
-      super.setMateReferenceIndex(mMateReferenceIndex);
+    @Override
+    public String getMateReferenceName() {
+      if (mMateReferenceIndex != null && !decodedMateRefIdx) {
+        decodedMateRefIdx = true;
+        super.setMateReferenceIndex(mMateReferenceIndex);
+      }
+      return super.getMateReferenceName();
     }
-    return super.getMateReferenceName();
-  }
 
-  @Override
-  protected void eagerDecode() {
-    getReferenceName();
-    getMateReferenceName();
-    super.eagerDecode();
-  }
+    @Override
+    protected void eagerDecode() {
+      getReferenceName();
+      getMateReferenceName();
+      super.eagerDecode();
+    }
 
-  @Override
-  public boolean equals(Object o) {
-    // don't use decoded flags for equality check
-    return super.equals(o);
-  }
+    @Override
+    public boolean equals(Object o) {
+      // don't use decoded flags for equality check
+      return super.equals(o);
+    }
 
-  @Override
-  public int hashCode() {
-    // don't use decoded flags for hash code
-    return super.hashCode();
+    @Override
+    public int hashCode() {
+      // don't use decoded flags for hash code
+      return super.hashCode();
+    }
   }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java b/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java
index 3fca4a9..3922d78 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/LazyVCFGenotypesContext.java
@@ -101,34 +101,34 @@ public LazyGenotypesContext.LazyData parse(final Object data) {
       return codec.createGenotypeMap(str, alleles, chrom, start);
     }
   }
-}
 
-// This is a HACK. But, the functionality is only in AbstractVCFCodec so it
-// can't be helped. This is preferable to copying the functionality into
-// parse() above.
-class HeaderSettableVCFCodec extends AbstractVCFCodec {
+  // This is a HACK. But, the functionality is only in AbstractVCFCodec so it
+  // can't be helped. This is preferable to copying the functionality into
+  // parse() above.
+  static class HeaderSettableVCFCodec extends AbstractVCFCodec {
 
-  public boolean hasHeader() {
-    return header != null;
-  }
+    public boolean hasHeader() {
+      return header != null;
+    }
 
-  public void setHeaderAndVersion(VCFHeader header, VCFHeaderVersion ver) {
-    this.header = header;
-    this.version = ver;
-  }
+    public void setHeaderAndVersion(VCFHeader header, VCFHeaderVersion ver) {
+      this.header = header;
+      this.version = ver;
+    }
 
-  @Override
-  public Object readActualHeader(LineIterator reader) {
-    throw new UnsupportedOperationException("Internal error: this shouldn't be called");
-  }
+    @Override
+    public Object readActualHeader(LineIterator reader) {
+      throw new UnsupportedOperationException("Internal error: this shouldn't be called");
+    }
 
-  @Override
-  public List<String> parseFilters(String filterString) {
-    throw new UnsupportedOperationException("Internal error: this shouldn't be called");
-  }
+    @Override
+    public List<String> parseFilters(String filterString) {
+      throw new UnsupportedOperationException("Internal error: this shouldn't be called");
+    }
 
-  @Override
-  public boolean canDecode(String s) {
-    return true;
+    @Override
+    public boolean canDecode(String s) {
+      return true;
+    }
   }
 }
diff --git a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java
index d6159d4..0a52d08 100644
--- a/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java
+++ b/src/main/java/org/seqdoop/hadoop_bam/SAMRecordReader.java
@@ -191,172 +191,172 @@ public boolean nextKeyValue() {
     record.set(r);
     return true;
   }
-}
 
-// See the long comment in SAMRecordReader.initialize() for what this does.
-class WorkaroundingStream extends InputStream {
+  // See the long comment in SAMRecordReader.initialize() for what this does.
+  static class WorkaroundingStream extends InputStream {
 
-  private final InputStream stream, headerStream;
-  private boolean headerRemaining;
-  private long length;
-  private int headerLength;
+    private final InputStream stream, headerStream;
+    private boolean headerRemaining;
+    private long length;
+    private int headerLength;
 
-  private boolean lookingForEOL = false,
-      foundEOL = false,
-      strippingAts = false; // HACK, see read(byte[], int, int).
-  private byte[] readBuf = new byte[1];
+    private boolean lookingForEOL = false,
+        foundEOL = false,
+        strippingAts = false; // HACK, see read(byte[], int, int).
+    private byte[] readBuf = new byte[1];
 
-  public WorkaroundingStream(InputStream stream, SAMFileHeader header) {
-    this.stream = stream;
+    public WorkaroundingStream(InputStream stream, SAMFileHeader header) {
+      this.stream = stream;
 
-    String text = header.getTextHeader();
-    if (text == null) {
-      StringWriter writer = new StringWriter();
-      new SAMTextHeaderCodec().encode(writer, header);
-      text = writer.toString();
-    }
-    byte[] b;
-    try {
-      b = text.getBytes("UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      b = null;
-      assert false;
-    }
-    headerRemaining = true;
-    headerLength = b.length;
-    headerStream = new ByteArrayInputStream(b);
+      String text = header.getTextHeader();
+      if (text == null) {
+        StringWriter writer = new StringWriter();
+        new SAMTextHeaderCodec().encode(writer, header);
+        text = writer.toString();
+      }
+      byte[] b;
+      try {
+        b = text.getBytes("UTF-8");
+      } catch (UnsupportedEncodingException e) {
+        b = null;
+        assert false;
+      }
+      headerRemaining = true;
+      headerLength = b.length;
+      headerStream = new ByteArrayInputStream(b);
 
-    this.length = Long.MAX_VALUE;
-  }
+      this.length = Long.MAX_VALUE;
+    }
 
-  public void setLength(long length) {
-    this.length = length;
-  }
+    public void setLength(long length) {
+      this.length = length;
+    }
 
-  public int getRemainingHeaderLength() {
-    return headerLength;
-  }
+    public int getRemainingHeaderLength() {
+      return headerLength;
+    }
 
-  @Override
-  public int read() throws IOException {
-    for (; ; ) {
-      switch (read(readBuf)) {
-        case 0:
-          continue;
-        case 1:
-          return readBuf[0];
-        case -1:
-          return -1;
+    @Override
+    public int read() throws IOException {
+      for (; ; ) {
+        switch (read(readBuf)) {
+          case 0:
+            continue;
+          case 1:
+            return readBuf[0];
+          case -1:
+            return -1;
+        }
       }
     }
-  }
 
-  @Override
-  public int read(byte[] buf, int off, int len) throws IOException {
-    if (!headerRemaining) {
-      return streamRead(buf, off, len);
-    }
+    @Override
+    public int read(byte[] buf, int off, int len) throws IOException {
+      if (!headerRemaining) {
+        return streamRead(buf, off, len);
+      }
 
-    int h;
-    if (strippingAts) {
-      h = 0;
-    } else {
-      h = headerStream.read(buf, off, len);
-      if (h == -1) {
-        // This should only happen when there was no header at all, in
-        // which case Picard doesn't throw an error until trying to read
-        // a record, for some reason. (Perhaps an oversight.) Thus we
-        // need to handle that case here.
-        assert (headerLength == 0);
+      int h;
+      if (strippingAts) {
         h = 0;
-      } else if (h < headerLength) {
-        headerLength -= h;
-        return h;
+      } else {
+        h = headerStream.read(buf, off, len);
+        if (h == -1) {
+          // This should only happen when there was no header at all, in
+          // which case Picard doesn't throw an error until trying to read
+          // a record, for some reason. (Perhaps an oversight.) Thus we
+          // need to handle that case here.
+          assert (headerLength == 0);
+          h = 0;
+        } else if (h < headerLength) {
+          headerLength -= h;
+          return h;
+        }
+        strippingAts = true;
+        headerStream.close();
       }
-      strippingAts = true;
-      headerStream.close();
-    }
 
-    final int newOff = off + h;
-    int s = streamRead(buf, newOff, len - h);
+      final int newOff = off + h;
+      int s = streamRead(buf, newOff, len - h);
 
-    if (s <= 0) {
-      return strippingAts ? s : h;
-    }
+      if (s <= 0) {
+        return strippingAts ? s : h;
+      }
 
-    // HACK HACK HACK.
-    //
-    // We gave all of the header, which means that SAMFileReader is still
-    // trying to read more header lines. If we're in a split that isn't at
-    // the start of the SAM file, we could be in the middle of a line and
-    // thus see @ characters at the start of our data. Then SAMFileReader
-    // would try to understand those as header lines and the end result is
-    // that it throws an error, since they aren't actually header lines,
-    // they're just part of a SAM record.
-    //
-    // So, if we're done with the header, strip all @ characters we see. Thus
-    // SAMFileReader will stop reading the header there and won't throw an
-    // exception until we use its SAMRecordIterator, at which point we can
-    // catch it, because we know to expect it.
-    //
-    // headerRemaining remains true while it's possible that there are still
-    // @ characters coming.
+      // HACK HACK HACK.
+      //
+      // We gave all of the header, which means that SAMFileReader is still
+      // trying to read more header lines. If we're in a split that isn't at
+      // the start of the SAM file, we could be in the middle of a line and
+      // thus see @ characters at the start of our data. Then SAMFileReader
+      // would try to understand those as header lines and the end result is
+      // that it throws an error, since they aren't actually header lines,
+      // they're just part of a SAM record.
+      //
+      // So, if we're done with the header, strip all @ characters we see. Thus
+      // SAMFileReader will stop reading the header there and won't throw an
+      // exception until we use its SAMRecordIterator, at which point we can
+      // catch it, because we know to expect it.
+      //
+      // headerRemaining remains true while it's possible that there are still
+      // @ characters coming.
+
+      int i = newOff - 1;
+      while (buf[++i] == '@' && --s > 0) {;
+      }
 
-    int i = newOff - 1;
-    while (buf[++i] == '@' && --s > 0) {;
-    }
+      if (i != newOff) {
+        System.arraycopy(buf, i, buf, newOff, s);
+      }
 
-    if (i != newOff) {
-      System.arraycopy(buf, i, buf, newOff, s);
+      headerRemaining = s == 0;
+      return h + s;
     }
 
-    headerRemaining = s == 0;
-    return h + s;
-  }
-
-  private int streamRead(byte[] buf, int off, int len) throws IOException {
-    if (len > length) {
-      if (foundEOL) {
-        return 0;
+    private int streamRead(byte[] buf, int off, int len) throws IOException {
+      if (len > length) {
+        if (foundEOL) {
+          return 0;
+        }
+        lookingForEOL = true;
       }
-      lookingForEOL = true;
-    }
-    int n = stream.read(buf, off, len);
-    if (n > 0) {
-      n = tryFindEOL(buf, off, n);
-      length -= n;
+      int n = stream.read(buf, off, len);
+      if (n > 0) {
+        n = tryFindEOL(buf, off, n);
+        length -= n;
+      }
+      return n;
     }
-    return n;
-  }
 
-  private int tryFindEOL(byte[] buf, int off, int len) {
-    assert !foundEOL;
+    private int tryFindEOL(byte[] buf, int off, int len) {
+      assert !foundEOL;
 
-    if (!lookingForEOL || len < length) {
-      return len;
-    }
+      if (!lookingForEOL || len < length) {
+        return len;
+      }
 
-    // Find the first EOL between length and len.
+      // Find the first EOL between length and len.
 
-    // len >= length so length fits in an int.
-    int i = Math.max(0, (int) length - 1);
+      // len >= length so length fits in an int.
+      int i = Math.max(0, (int) length - 1);
 
-    for (; i < len; ++i) {
-      if (buf[off + i] == '\n') {
-        foundEOL = true;
-        return i + 1;
+      for (; i < len; ++i) {
+        if (buf[off + i] == '\n') {
+          foundEOL = true;
+          return i + 1;
+        }
       }
+      return len;
     }
-    return len;
-  }
 
-  @Override
-  public void close() throws IOException {
-    stream.close();
-  }
+    @Override
+    public void close() throws IOException {
+      stream.close();
+    }
 
-  @Override
-  public int available() throws IOException {
-    return headerRemaining ? headerStream.available() : stream.available();
+    @Override
+    public int available() throws IOException {
+      return headerRemaining ? headerStream.available() : stream.available();
+    }
   }
 }