commit f705100cc8146d2e3dbe2cbf9f461fa8337f8ec0
Author: Richard Ipsum <richardipsum@fastmail.co.uk>
Date:   Sun,  5 Feb 2017 22:56:16 +0000
Initial commit
Diffstat:
6 files changed, 247 insertions(+), 0 deletions(-)
diff --git a/check b/check
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+set -u
+
+run_yarn_suite() {
+    PATH="$(realpath $(dirname check)):$PATH" yarn yarns/sparsemap.yarn \
+        yarns/implementations.yarn --stop-on-first-fail -s yarns/shell_lib \
+        --tempdir="$(mktemp -d /tmp/gpr_yarn_XXX)" \
+        --snapshot --shell='/bin/bash'
+}
+
+run_yarn_suite
diff --git a/mksparse b/mksparse
@@ -0,0 +1,87 @@
+#!/usr/bin/env python
+# Copyright 2013-2014  Lars Wirzenius
+#
+# Copyright 2017 Richard Ipsum
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program.  If not, see <http://www.gnu.org/licenses/>.
+#
+# =*= License: GPL-3+ =*=
+
+
+description = '''Create a sparse file.
+
+The first command line argument is the name of the output file. The
+second argument is a specification for how the file is to be made
+sparse: a sequence of "data" and "hole" words, which may be
+interspersed with spaces, commas, or the word "a", all of which are
+ignored, except that the "data" and "hole" words must have something
+in between them.
+
+'''
+
+
+import cliapp
+import os
+import sys
+
+DEFAULT_DATA_SIZE = 1024
+DEFAULT_HOLE_SIZE = 1024 ** 2
+
+class CreateSparseFile(cliapp.Application):
+
+    def add_settings(self):
+        self.settings.integer(['hole-size'], 'hole size',
+                              metavar='SIZE', default=DEFAULT_HOLE_SIZE)
+        self.settings.integer(['data-size'], 'data size',
+                              metavar='SIZE', default=DEFAULT_DATA_SIZE)
+
+    def process_args(self, args):
+        if len(args) != 1:
+            raise cliapp.AppException('mksparse OUTPUT_FILE')
+
+        output_filename = args[0]
+        spec = self.parse_spec()
+
+        with open(output_filename, 'w') as f:
+            for word in spec:
+                if word == 'hole':
+                    self.append_hole(f)
+                else:
+                    assert word == 'data'
+                    self.append_data(f)
+
+    def parse_spec(self):
+        text = sys.stdin.read()
+        # Remove commas.
+        text = ' '.join(text.split(','))
+
+        # Split into words.
+        words = text.split()
+
+        # Remove any words that are not "data" or "hole".
+        spec = [x for x in words if x in ('data', 'hole')]
+
+        return spec
+
+    def append_data(self, f):
+        f.write('x' * self.settings['data-size'])
+        f.flush()
+
+    def append_hole(self, f):
+        fd = f.fileno()
+        pos = os.lseek(fd, self.settings['hole-size'], os.SEEK_CUR)
+        os.ftruncate(fd, pos)
+
+
+CreateSparseFile(description=description).run()
diff --git a/sparsemap b/sparsemap
@@ -0,0 +1,68 @@
+#!/usr/bin/env python3
+#
+# sparsemap
+#
+# Copyright © 2017 Richard Ipsum
+#
+# This file is part of obnam.
+#
+# obnam is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# obnam is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with obnam.  If not, see <http://www.gnu.org/licenses/>.
+#
+
+import sys
+import os
+import errno
+
+def sparsemap(fd):
+
+    # First of all, where are we currently, data or hole?
+    end_of_file_pos = os.lseek(fd, 0, os.SEEK_END)
+    what = os.SEEK_DATA
+    pos = os.lseek(fd, 0, os.SEEK_HOLE)
+
+    if pos == 0:
+        what = os.SEEK_DATA # we are already in a hole
+    elif pos == end_of_file_pos:
+        # no holes in this file
+        print('DATA', end_of_file_pos)
+        return
+    else:
+        what = os.SEEK_HOLE # we were in data
+        pos = 0
+
+    while pos < end_of_file_pos:
+
+        current = 'DATA' if what == os.SEEK_HOLE else 'HOLE'
+
+        try:
+            next_pos = os.lseek(fd, pos, what)
+        except OSError as e:
+            if e.errno == errno.ENXIO:
+                # whatever we were looking for isn't in the file
+                # that means that either the rest of the file is a hole or data
+                print(current, end_of_file_pos - pos)
+                return
+
+        print(current, next_pos - pos)
+
+        pos = next_pos
+        what = os.SEEK_DATA if what == os.SEEK_HOLE else os.SEEK_HOLE
+
+if len(sys.argv) != 2:
+    print('usage: {} FILE'.format(sys.argv[0]), file=sys.stderr)
+    sys.exit(1)
+
+fd = os.open(sys.argv[1], os.O_RDONLY)
+sparsemap(fd)
+os.close(fd)
diff --git a/yarns/implementations.yarn b/yarns/implementations.yarn
@@ -0,0 +1,9 @@
+    IMPLEMENTS GIVEN a sparse file (\S+) with a hole size (\d+) and data size (\d+) with spec ([a-zA-z,]+)
+    set -e -o pipefail
+    echo "$MATCH_4" | mksparse "$DATADIR/$MATCH_1" --hole-size "$MATCH_2" --data-size "$MATCH_3"
+
+    IMPLEMENTS WHEN we run sparsemap on (\w+)
+    run_sparsemap "$DATADIR/$MATCH_1"
+
+    IMPLEMENTS THEN stdout contains exactly (.+)
+    diff -u <(echo -e "$MATCH_1") "$DATADIR/STDOUT"
diff --git a/yarns/shell_lib b/yarns/shell_lib
@@ -0,0 +1,5 @@
+set -e -u -o pipefail
+
+run_sparsemap() {
+	sparsemap $@ > "$DATADIR/STDOUT"
+}
diff --git a/yarns/sparsemap.yarn b/yarns/sparsemap.yarn
@@ -0,0 +1,65 @@
+sparse map tests
+================
+
+Please note that these tests will depend very much on the file
+system, it is up to the file system to decide how many zeroes
+are required before it decides to represent the hole as an actual
+hole rather than just a run of zeroes.
+
+At time of writing (2017) these tests pass on ext4,
+where the default block size is 4096 bytes.
+
+    SCENARIO sparsemap single data followed by single hole
+       GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec data,hole
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly DATA 8192\nHOLE 4096
+
+    SCENARIO sparsemap single hole followed by single data
+       GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec hole,data
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly HOLE 4096\nDATA 8192
+
+    SCENARIO sparsemap hole followed by data followed by hole
+       GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole,data,hole
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly HOLE 8192\nDATA 4096\nHOLE 8192
+
+    SCENARIO tiny hole can look like data
+       GIVEN a sparse file S with a hole size 6 and data size 4096 with spec hole,data,hole
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly DATA 4108
+
+    SCENARIO sparsemap data followed by hole followed by data
+       GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,data
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly DATA 4096\nHOLE 8192\nDATA 4096
+
+    SCENARIO sparsemap sequence of holes
+       GIVEN a sparse file S with a hole size 4096 and data size 8192 with spec data,hole,hole,data
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly DATA 8192\nHOLE 8192\nDATA 8192
+
+    SCENARIO sparsemap sequence of holes (ending with hole)
+       GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,hole,hole
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly DATA 4096\nHOLE 24576
+
+    SCENARIO sparsemap file with only data
+       GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly DATA 4096
+
+    SCENARIO sparsemap file with only hole
+       GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly HOLE 8192
+
+    SCENARIO sparsemap file with data,hole,data,hole,data
+       GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec data,hole,data,hole,data
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly DATA 4096\nHOLE 8192\nDATA 4096\nHOLE 8192\nDATA 4096
+
+    SCENARIO sparsemap file with hole,data,hole,data,hole
+       GIVEN a sparse file S with a hole size 8192 and data size 4096 with spec hole,data,hole,data,hole
+        WHEN we run sparsemap on S
+        THEN stdout contains exactly HOLE 8192\nDATA 4096\nHOLE 8192\nDATA 4096\nHOLE 8192