Discussion:
[ast-developers] Updated prototype patch for |SEEK_HOLE|/|SEEK_DATA|-based sparse file support for cp(1)/mv(1)/ln(1) ...
Roland Mainz
2013-10-09 16:20:43 UTC
Permalink
Hi!

----

Attached (as "astksh20130926_sparsefile_cp003.diff.txt") is an updated
patch with |SEEK_HOLE|/|SEEK_DATA|-based sparse file support for
cp(1)/mv(1)/ln(1) and the "lssparsemap" (previously called "lsholes")
builtin.

* Notes:
- This is an unfinished work-in-progress snapshot... just dumping it
here since I've been asked to come-up with an updated version
- Tests are still missing
- lssparsemap(1) still needs it's --man output finished
- Note that the "builtksh93.sh" wrapper script to build ksh93 requires
manual entries for "lssparsemap"
- cp(1) still has no --sparse option

Comments/rants/feedback etc. welcome... :-)

----

Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
-------------- next part --------------
diff -r -u original/src/lib/libast/include/sfio.h build_cpsparse/src/lib/libast/include/sfio.h
--- src/lib/libast/include/sfio.h 2013-08-27 18:32:42.000000000 +0200
+++ src/lib/libast/include/sfio.h 2013-10-09 17:58:27.594193327 +0200
@@ -252,6 +252,7 @@
extern ssize_t sfread _ARG_((Sfio_t*, Void_t*, size_t));
extern ssize_t sfwrite _ARG_((Sfio_t*, const Void_t*, size_t));
extern Sfoff_t sfmove _ARG_((Sfio_t*, Sfio_t*, Sfoff_t, int));
+extern int sfcopyfile _ARG_((Sfio_t* ip, Sfio_t* op, int flags));
extern int sfclose _ARG_((Sfio_t*));
extern Sfoff_t sftell _ARG_((Sfio_t*));
extern Sfoff_t sfseek _ARG_((Sfio_t*, Sfoff_t, int));
@@ -322,6 +323,33 @@
extern ssize_t sfslen _ARG_((void));
extern ssize_t sfmaxr _ARG_((ssize_t, int));

+#if defined(SEEK_HOLE) && defined(SEEK_DATA) && !defined(AST_SPARSEFILE_SUPPORT)
+#define AST_SPARSEFILE_SUPPORT 1
+#endif
+
+#if AST_SPARSEFILE_SUPPORT
+typedef struct _sparsefiledatarec
+{
+ enum
+ {
+ SPFDREC_UNDEFINED = 0,
+ SPFDREC_DATA = 1,
+ SPFDREC_HOLE = 2
+ } type;
+ off_t begin;
+ off_t end;
+} sparsefiledatarec;
+
+/*
+ * these two should live outside sfio because they operate on fds
+ * and not sfio steams
+ */
+extern bool supports_seek_hole(int fd);
+extern sparsefiledatarec *sparsefile_enumerate_holes(int fd, ssize_t *res_numrec);
+
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+
#undef extern
_END_EXTERNS_

diff -r -u original/src/lib/libast/sfio/sfmove.c build_cpsparse/src/lib/libast/sfio/sfmove.c
--- src/lib/libast/sfio/sfmove.c 2013-09-20 13:46:38.000000000 +0200
+++ src/lib/libast/sfio/sfmove.c 2013-10-09 17:58:21.239667312 +0200
@@ -244,3 +244,255 @@
SFOPEN(fr,0);
SFMTXRETURN(fr, n_move);
}
+
+
+#if AST_SPARSEFILE_SUPPORT
+bool supports_seek_hole(int fd)
+{
+ off_t pos;
+
+/* Linux does not support |_PC_MIN_HOLE_SIZE| */
+#ifdef _PC_MIN_HOLE_SIZE
+ if (fpathconf(fd, _PC_MIN_HOLE_SIZE) < 0)
+ return (false);
+#endif
+
+ /*
+ * Test two error conditions:
+ * 1. we have been compiled on an OS revision that
+ * supports |SEEK_HOLE| but run on an OS revision
+ * that does not support |SEEK_HOLE|, we get |EINVAL|.
+ * 2. the underlying filesystem does not support
+ * |SEEK_HOLE|, we get |ENOTSUP|.
+ */
+ pos = lseek(fd, 0LL, SEEK_HOLE);
+ if (pos < 0LL)
+ {
+ if ((errno == EINVAL) || (errno == ENOTSUP))
+ return (false);
+ }
+
+ /* Do the same for |SEEK_DATA| */
+ pos = lseek(fd, 0LL, SEEK_DATA);
+ if (pos < 0LL)
+ {
+ if ((errno == EINVAL) || (errno == ENOTSUP))
+ return (false);
+ }
+
+ return (true);
+}
+
+#if 1
+#define D(x)
+#else
+#define D(x) x
+#endif
+
+sparsefiledatarec *sparsefile_enumerate_holes(int fd, ssize_t *res_numrec)
+{
+ off_t data_pos,
+ hole_pos,
+ pos;
+ struct stat st;
+ D(int saved_errno);
+ sparsefiledatarec *rec = NULL;
+ size_t numrec = 0UL;
+
+ *res_numrec = -1L;
+
+ if (fstat(fd, &st) < 0)
+ return (NULL);
+
+ /* special case for files with zero size */
+ if (st.st_size == 0)
+ {
+ rec = malloc(sizeof(sparsefiledatarec));
+ if (!rec)
+ return (NULL);
+ rec->type = SPFDREC_DATA;
+ rec->begin = 0;
+ rec->end = 0;
+ *res_numrec = 0;
+ return (rec);
+ }
+
+ for (hole_pos = data_pos = pos = 0LL ; pos < st.st_size ; )
+ {
+ data_pos = lseek(fd, pos, SEEK_DATA);
+ D(saved_errno=errno;(void)printf("# data pos = %8ld\n", data_pos);errno=saved_errno);
+ if (data_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ /* final data block */
+ }
+ else
+ {
+ free(rec);
+ return (NULL);
+ }
+ }
+
+ hole_pos = lseek(fd, pos, SEEK_HOLE);
+ D(saved_errno=errno;(void)printf("# hole pos = %8ld\n", hole_pos);errno=saved_errno);
+ if (hole_pos < 0)
+ {
+ if (errno == ENXIO)
+ {
+ /* final hole block */
+ }
+ else
+ {
+ free(rec);
+ return (NULL);
+ }
+ }
+
+ if (data_pos == pos)
+ {
+ D((void)printf("#data from %8ld to %8ld (size %8ld)\n",
+ data_pos, hole_pos, (hole_pos - data_pos)));
+ pos = hole_pos;
+
+ rec = realloc(rec, sizeof(sparsefiledatarec)*(numrec+1));
+ if (!rec)
+ return (NULL);
+ rec[numrec].type = SPFDREC_DATA;
+ rec[numrec].begin = data_pos;
+ rec[numrec].end = hole_pos;
+ numrec++;
+ }
+ else if (hole_pos == pos)
+ {
+ D((void)printf("#hole from %8ld to %8ld (size %8ld)\n",
+ hole_pos, data_pos, (data_pos - hole_pos)));
+ pos = data_pos;
+
+ rec = realloc(rec, sizeof(sparsefiledatarec)*(numrec+1));
+ if (!rec)
+ return (NULL);
+ rec[numrec].type = SPFDREC_HOLE;
+ rec[numrec].begin = hole_pos;
+ rec[numrec].end = data_pos;
+ numrec++;
+ }
+ else
+ {
+ free(rec);
+ return (NULL);
+ }
+ }
+
+ *res_numrec = numrec;
+
+ return (rec);
+}
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+
+/*
+ * This function is a layer above |sfmove()| to copy both data and
+ * holes in sparse files. Most consumers in sfio (AFAIK until now
+ * only cp(1)/mv(1)/ln(1) fall into this category) do not deal with
+ * preserving holes in sparse files at all so we moved this
+ * extra complexity into a seperate function.
+ */
+int sfcopyfile(Sfio_t* ip, Sfio_t* op, int flags)
+{
+ int rfd;
+ int op_flags_saved;
+ int res = 0;
+#if AST_SPARSEFILE_SUPPORT
+ off_t origpos; /* original position */
+ sparsefiledatarec *sprec;
+ ssize_t spnumrec = 0L;
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+ rfd = sffileno(ip);
+
+#if AST_SPARSEFILE_SUPPORT
+ /*
+ * We enumerate the data/hole sections before copying the data
+ * for two reasons:
+ * 1. Early filesystem implementations of |SEEK_HOLE|/|SEEK_DATA|
+ * had bugs when doing both data/hole enumeration and reading
+ * data at the same time
+ * 2. Avoid extra seeking around which would otherwise be needed
+ * to avoid clashes between copy buffer size vs. data/hole
+ * boundaries. The boundaries *MUST* be preserved since
+ * filesystems are allowed to turn two continous |lseek()|
+ * calls into two holes instead of combining them into one.
+ * The same can happen for data sections, i.e. two |write()|
+ * calls may result in two independent |SEEK_DATA| sections.
+ */
+ origpos = lseek(rfd, 0LL, SEEK_CUR);
+ if (origpos < 0)
+ return (-1);
+ sprec = sparsefile_enumerate_holes(rfd, &spnumrec);
+ if (lseek(rfd, origpos, SEEK_SET) < 0)
+ return (-1);
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+ /*
+ * (Temporarily) set |SF_WHOLE| to prevent |sfmove()| from turning
+ * sequences of zero bytes into (more) holes (this would lead to
+ * data corruption for applications (like Oracle DB) which actually
+ * expect the holes to be at the correct positions).
+ *
+ * The difference is in this case that sequences of zero bytes
+ * represent "valid data of zero bytes here" while the holes
+ * represent "no data here". Turning the zero bytes into holes would
+ * therefore destroy data.
+ */
+ op_flags_saved = op->flags & SF_WHOLE;
+ op->flags |= SF_WHOLE;
+
+#if AST_SPARSEFILE_SUPPORT
+ if (sprec)
+ {
+ ssize_t i;
+
+ for (i=0 ; (i < spnumrec) && (res == 0) ; i++)
+ {
+ Sfoff_t movesize = sprec[i].end - sprec[i].begin;
+ switch(sprec[i].type)
+ {
+ case SPFDREC_DATA:
+ if (sfmove(ip, op, movesize, -1) < 0)
+ res |= 3;
+ break;
+ case SPFDREC_HOLE:
+ if (sfseek(ip, movesize, SEEK_CUR) < 0)
+ res |= 1;
+ if (sfseek(op, movesize, SEEK_CUR) < 0)
+ res |= 2;
+ break;
+ }
+ }
+
+ /*
+ * Just seeking to a new postion does not set
+ * the sfio-internal eof flag. If the file
+ * ends with a hole we explicitly have to read
+ * something to get the EOF (or not)
+ */
+ if ((res == 0) && (sfgetc(ip) != EOF))
+ {
+ res |= 1;
+ }
+
+ free(sprec);
+ }
+ else
+#endif /* AST_SPARSEFILE_SUPPORT */
+ {
+ if (sfmove(ip, op, (Sfoff_t)SF_UNBOUND, -1) < 0)
+ res |= 3;
+ if (!sfeof(ip))
+ res |= 1;
+ }
+
+ op->flags = (op->flags & ~SF_WHOLE) | op_flags_saved;
+ return (-res);
+}
diff -r -u original/src/lib/libcmd/cp.c build_cpsparse/src/lib/libcmd/cp.c
--- src/lib/libcmd/cp.c 2013-07-16 23:45:26.000000000 +0200
+++ src/lib/libcmd/cp.c 2013-10-09 18:06:05.034834989 +0200
@@ -620,7 +620,7 @@
return 0;
}
n = 0;
- if (sfmove(ip, op, (Sfoff_t)SF_UNBOUND, -1) < 0)
+ if (sfcopyfile(ip, op, 0) < 0)
n |= 3;
if (!sfeof(ip))
n |= 1;
@@ -1012,3 +1012,284 @@
}
return error_info.errors != 0;
}
+
+
+#if AST_SPARSEFILE_SUPPORT
+static
+void printrec(sparsefiledatarec *rec, ssize_t numrec)
+{
+ ssize_t i;
+
+ for (i=0 ; i < numrec ; i++)
+ {
+ switch(rec[i].type)
+ {
+ case SPFDREC_DATA:
+ (void)printf("data: from\t%8ld to\t%8ld\t(size %8ld)\n",
+ (long)rec[i].begin,
+ (long)rec[i].end,
+ (long)(rec[i].end - rec[i].begin));
+ break;
+ case SPFDREC_HOLE:
+ (void)printf("hole: from\t%8ld to\t%8ld\t(size %8ld)\n",
+ (long)rec[i].begin,
+ (long)rec[i].end,
+ (long)(rec[i].end - rec[i].begin));
+ break;
+ case SPFDREC_UNDEFINED: /*lint*/
+ break;
+ }
+ }
+}
+
+
+static
+void printreccpv(sparsefiledatarec *rec, ssize_t numrec)
+{
+ ssize_t i;
+
+ (void)printf("\ttypeset -C -a sparselayout=(\n");
+
+ for (i=0 ; i < numrec ; i++)
+ {
+ switch(rec[i].type)
+ {
+ case SPFDREC_DATA:
+ (void)printf("\t\t(\n\t\t\ttype='data'\n\t\t\ttypeset -l -i from=%ld\n\t\t\ttypeset -l -i to=%ld\n\t\t\ttypeset -l -i size=%ld\n\t\t)\n",
+ (long)rec[i].begin,
+ (long)rec[i].end,
+ (long)(rec[i].end - rec[i].begin));
+ break;
+ case SPFDREC_HOLE:
+ (void)printf("\t\t(\n\t\t\ttype='hole'\n\t\t\ttypeset -l -i from=%ld\n\t\t\ttypeset -l -i to=%ld\n\t\t\ttypeset -l -i size=%ld\n\t\t)\n",
+ (long)rec[i].begin,
+ (long)rec[i].end,
+ (long)(rec[i].end - rec[i].begin));
+ break;
+ case SPFDREC_UNDEFINED: /*lint*/
+ break;
+ }
+ }
+
+ (void)printf("\t)\n");
+}
+
+static
+bool hasholerecord(sparsefiledatarec *rec, ssize_t numrec)
+{
+ ssize_t i;
+
+ for (i=0 ; i < numrec ; i++)
+ {
+ switch(rec[i].type)
+ {
+ case SPFDREC_HOLE:
+ return (true);
+ case SPFDREC_DATA:
+ case SPFDREC_UNDEFINED: /*lint*/
+ break;
+ }
+ }
+ return (false);
+}
+
+
+static
+int do_listdataholeregions(const char *filename, bool compoundfmt)
+{
+ int fd;
+ int res = EXIT_SUCCESS;
+ sparsefiledatarec *rec;
+ ssize_t numrec = 0UL;
+
+ if (compoundfmt)
+ (void)printf("(\n\tfilename='%s'\n", filename);
+ else
+ (void)printf("# file: %s\n", filename);
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ {
+ error(ERROR_SYSTEM|ERROR_WARNING, "Cannot open %s", filename);
+ res = EXIT_FAILURE;
+ goto done;
+ }
+
+ if (!supports_seek_hole(fd))
+ {
+ error(ERROR_SYSTEM|ERROR_WARNING, "filesystem does not support holes for %s", filename);
+ (void)close(fd);
+ res = EXIT_FAILURE;
+ goto done;
+ }
+
+ (void)lseek(fd, 0LL, SEEK_SET);
+ rec = sparsefile_enumerate_holes(fd, &numrec);
+ if (!rec)
+ error(ERROR_SYSTEM|ERROR_WARNING, "cannot obtain list of sparse entries for %s", filename);
+ (void)close(fd);
+
+ if (!rec)
+ {
+ res = EXIT_FAILURE;
+ goto done;
+ }
+
+ if (compoundfmt)
+ printreccpv(rec, numrec);
+ else
+ printrec(rec, numrec);
+
+ free(rec);
+
+done:
+ if (compoundfmt)
+ (void)printf(")\n");
+
+ return (res);
+}
+
+
+static
+int do_issparsefile(const char *filename)
+{
+ int fd;
+ sparsefiledatarec *rec;
+ ssize_t numrec = 0UL;
+ bool hasholes;
+
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ {
+ error(ERROR_SYSTEM|ERROR_WARNING, "Cannot open %s", filename);
+ return (EXIT_FAILURE);
+ }
+
+ if (!supports_seek_hole(fd))
+ {
+ error(ERROR_SYSTEM|ERROR_WARNING, "filesystem does not support holes for %s", filename);
+ (void)close(fd);
+ return (EXIT_FAILURE);
+ }
+
+ (void)lseek(fd, 0LL, SEEK_SET);
+ rec = sparsefile_enumerate_holes(fd, &numrec);
+ if (!rec)
+ error(ERROR_SYSTEM|ERROR_WARNING, "cannot obtain list of sparse entries for %s", filename);
+ (void)close(fd);
+
+ if (!rec)
+ return (EXIT_FAILURE);
+
+ hasholes = hasholerecord(rec, numrec);
+
+ free(rec);
+
+ return (hasholes?EXIT_SUCCESS:EXIT_FAILURE);
+}
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+
+static const char optlssparsemap[] =
+"[-?\n@(#)$Id: lssparsemap (AT&T Research) 2013-10-04 $\n]"
+"[-author?Roland Mainz <roland.mainz at nrubsig.org>]"
+"[-license?http://www.eclipse.org/org/documents/epl-v10.html]"
+"[+NAME?lssparsemap - list hole/data layout of sparse files]"
+"[+DESCRIPTION?\blssparsemap\b displays information about sparse files"
+ ".]"
+"[+?Write me.]"
+"[l:list?Print data/hole layout with size and offsets of each region.]"
+"[t:testsparse|issparse?Test whether a file has one or more holes.]"
+"[C:compoundfmt?Output data as sequence of compound variables, one per file.]"
+"\n"
+"\n filename\n"
+"filename ...\n"
+"\n"
+"[+EXIT STATUS?]"
+ "{"
+ "[+0?Successful Completion.]"
+ "[+1?One or more files are not sparse if option --issparse was given.]"
+ "[+>0?An error occurred.]"
+ "}"
+"[+NOTES?]{"
+ "[+?A \"hole\" in a file is defined as a contiguous range of "
+ "bytes in a file, all reading as value of zero, representing "
+ "'no data'. Not all zeros in a file are guranteed to represent "
+ "holes, in fact sequences of zeros can represent valid data with "
+ "the meaning of 'zeros here'.]"
+
+ "[+?For filesystems that do not supply information about holes, "
+ "the file will be represented as one entire data region.]"
+"}"
+
+"[+SEE ALSO?\bcp\b(1), \bmkfile\b(1), \blseek\b(3)]"
+;
+
+int
+b_lssparsemap(int argc, register char** argv, Shbltin_t* context)
+{
+ int res = 0;
+ bool do_list = false;
+ bool do_test = false;
+ bool compoundfmt = false;
+
+ cmdinit(argc, argv, context, ERROR_CATALOG, 0);
+ for (;;)
+ {
+ switch (optget(argv, optlssparsemap))
+ {
+ case 'l':
+ do_list = true;
+ continue;
+ case 't':
+ do_test = true;
+ continue;
+ case 'C':
+ compoundfmt = true;
+ continue;
+ case ':':
+ error(2, "%s", opt_info.arg);
+ break;
+ case '?':
+ error(ERROR_usage(2), "%s", opt_info.arg);
+ break;
+ }
+ break;
+ }
+ argv += opt_info.index;
+ argc -= opt_info.index;
+ if (error_info.errors ||
+ argc < 1 ||
+ (!do_list && !do_test) ||
+ (do_list && do_test))
+ error(ERROR_usage(2), "%s", optusage(NiL));
+
+#if AST_SPARSEFILE_SUPPORT
+ if (do_list)
+ {
+ const char *name;
+ res = 0;
+
+ while (name = *argv++)
+ {
+ if (do_listdataholeregions(name, compoundfmt) != 0)
+ res = 1;
+ }
+ }
+ else if (do_test)
+ {
+ const char *name;
+ res = 0;
+
+ while (name = *argv++)
+ {
+ if (do_issparsefile(name) != 0)
+ res = 1;
+ }
+ }
+#else /* AST_SPARSEFILE_SUPPORT */
+ error(ERROR_ERROR, "No support for sparse files on this platform");
+#endif /* AST_SPARSEFILE_SUPPORT */
+
+ return (res);
+}
Irek Szczesniak
2014-01-20 23:28:26 UTC
Permalink
Post by Roland Mainz
Hi!
----
Attached (as "astksh20130926_sparsefile_cp003.diff.txt") is an updated
patch with |SEEK_HOLE|/|SEEK_DATA|-based sparse file support for
cp(1)/mv(1)/ln(1) and the "lssparsemap" (previously called "lsholes")
builtin.
- This is an unfinished work-in-progress snapshot... just dumping it
here since I've been asked to come-up with an updated version
- Tests are still missing
- lssparsemap(1) still needs it's --man output finished
- Note that the "builtksh93.sh" wrapper script to build ksh93 requires
manual entries for "lssparsemap"
- cp(1) still has no --sparse option
Comments/rants/feedback etc. welcome... :-)
----
Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
_______________________________________________
ast-developers mailing list
ast-developers at lists.research.att.com
http://lists.research.att.com/mailman/listinfo/ast-developers
What is the status of sparse file support in ast land these days?

Irek
Lionel Cons
2014-01-21 17:33:24 UTC
Permalink
Post by Irek Szczesniak
Post by Roland Mainz
Hi!
----
Attached (as "astksh20130926_sparsefile_cp003.diff.txt") is an updated
patch with |SEEK_HOLE|/|SEEK_DATA|-based sparse file support for
cp(1)/mv(1)/ln(1) and the "lssparsemap" (previously called "lsholes")
builtin.
- This is an unfinished work-in-progress snapshot... just dumping it
here since I've been asked to come-up with an updated version
- Tests are still missing
- lssparsemap(1) still needs it's --man output finished
- Note that the "builtksh93.sh" wrapper script to build ksh93 requires
manual entries for "lssparsemap"
- cp(1) still has no --sparse option
Comments/rants/feedback etc. welcome... :-)
----
Bye,
Roland
--
__ . . __
(o.\ \/ /.o) roland.mainz at nrubsig.org
\__\/\/__/ MPEG specialist, C&&JAVA&&Sun&&Unix programmer
/O /==\ O\ TEL +49 641 3992797
(;O/ \/ \O;)
_______________________________________________
ast-developers mailing list
ast-developers at lists.research.att.com
http://lists.research.att.com/mailman/listinfo/ast-developers
What is the status of sparse file support in ast land these days?
Irek
Hello irek,
we just Roland Mainz's patch since a while with excellent feedback
from our production teams.

Lionel

Loading...