[xiph-commits] r15596 - in trunk/vorbis-tools: . vorbiscomment

Tue Dec 30 17:37:11 PST 2008

Author: ivo
Date: 2008-12-30 17:37:11 -0800 (Tue, 30 Dec 2008)
New Revision: 15596

Modified:
   trunk/vorbis-tools/CHANGES
   trunk/vorbis-tools/vorbiscomment/vcomment.c
   trunk/vorbis-tools/vorbiscomment/vorbiscomment.1
Log:
Support for round-tripping multi-line comments in vorbiscomment.  Re-worked patch by Beni Cherniavsky.  Also, thanks to Eugene Kotlyarov.  Closes #273

Modified: trunk/vorbis-tools/CHANGES
===================================================================

--- trunk/vorbis-tools/CHANGES	2008-12-30 18:28:03 UTC (rev 15595)
+++ trunk/vorbis-tools/CHANGES	2008-12-31 01:37:11 UTC (rev 15596)
@@ -6,7 +6,7 @@
  * Added new en_GB.po, eo.po, pl.po, sk.po and vi.po translation files
  * Added AC_USE_SYSTEM_EXTENSIONS to configure.ac; no more configure warnings
  * Corrected SUBLANG values in intl/localename.c (#1415)
- * Modify -v to -V on oggenc and oggdec for consistency (#1112)
+ * Change -v to -V on oggenc and oggdec for consistency (#1112)
  * Fix for utf8_decode in Windows; improves behavior in vorbiscomment (#268)
  * Updated gettextize to version 0.17
  * ogg123: backported fix from libfishsound to patch the Speex decoder (#1347)
@@ -35,6 +35,7 @@
  * vorbiscomment: correct memory allocation (#472)
  * vorbiscomment: validate raw UTF-8 sent to vorbiscomment (#268)
  * vorbiscomment: fix segfault when using --tag (#1439)
+ * vorbiscomment: round-trip multi-line comments (#273)
 
 vorbis-tools 1.2.0 -- 2008-02-21
 

Modified: trunk/vorbis-tools/vorbiscomment/vcomment.c
===================================================================
--- trunk/vorbis-tools/vorbiscomment/vcomment.c	2008-12-30 18:28:03 UTC (rev 15595)
+++ trunk/vorbis-tools/vorbiscomment/vcomment.c	2008-12-31 01:37:11 UTC (rev 15596)
@@ -41,6 +41,7 @@
 	{"version", 0, 0, 'V'},
 	{"commentfile",1,0,'c'},
 	{"raw", 0,0,'R'},
+	{"escapes",0,0,'e'},
 	{NULL,0,0,0}
 };
 
@@ -49,6 +50,7 @@
 	/* mode and flags */
 	int	mode;
 	int	raw;
+	int	escapes;
 
 	/* file names and handles */
 	char	*infilename, *outfilename;
@@ -68,9 +70,12 @@
 
 /* prototypes */
 void usage(void);
-void print_comments(FILE *out, vorbis_comment *vc, int raw);
-int  add_comment(char *line, vorbis_comment *vc, int raw);
+void print_comments(FILE *out, vorbis_comment *vc, int raw, int escapes);
+int  add_comment(char *line, vorbis_comment *vc, int raw, int escapes);
 
+char *escape(const char *from, int fromsize);
+char *unescape(const char *from, int *tosize);
+
 param_t	*new_param(void);
 void free_param(param_t *param);
 void parse_options(int argc, char *argv[], param_t *param);
@@ -197,7 +202,7 @@
 
 		/* extract and display the comments */
 		vc = vcedit_comments(state);
-		print_comments(param->com, vc, param->raw);
+		print_comments(param->com, vc, param->raw, param->escapes);
 
 		/* done */
 		vcedit_clear(state);
@@ -231,7 +236,8 @@
 
 		for(i=0; i < param->commentcount; i++)
 		{
-			if(add_comment(param->comments[i], vc, param->raw) < 0)
+			if (add_comment(param->comments[i], vc,
+					param->raw, param->escapes) < 0)
 				fprintf(stderr, _("Bad comment: \"%s\"\n"), param->comments[i]);
 		}
 
@@ -242,7 +248,7 @@
 
 			while ((comment = read_line (param->com)))
                         {
-                                if (add_comment (comment, vc, param->raw) < 0)
+                                if (add_comment(comment, vc, param->raw, param->escapes) < 0)
                                 {
                                         fprintf (stderr, _("bad comment: \"%s\"\n"),
                                                  comment);
@@ -285,18 +291,28 @@
 
 ***********/
 
-void print_comments(FILE *out, vorbis_comment *vc, int raw)
+void print_comments(FILE *out, vorbis_comment *vc, int raw, int escapes)
 {
 	int i;
-	char *decoded_value;
+	char *escaped_value, *decoded_value;
 
 	for (i = 0; i < vc->comments; i++) {
-		if (!raw && utf8_decode(vc->user_comments[i], &decoded_value) >= 0) {
-    			fprintf(out, "%s\n", decoded_value);
+		if (escapes) {
+			escaped_value = escape(vc->user_comments[i], vc->comment_lengths[i]);
+		} else {
+			escaped_value = vc->user_comments[i];
+		}
+
+		if (!raw && utf8_decode(escaped_value, &decoded_value) >= 0) {
+			fprintf(out, "%s\n", decoded_value);
 			free(decoded_value);
 		} else {
-			fprintf(out, "%s\n", vc->user_comments[i]);
+			fprintf(out, "%s\n", escaped_value);
 		}
+
+		if (escapes) {
+			free(escaped_value);
+		}
 	}
 }
 
@@ -304,16 +320,17 @@
 
    Take a line of the form "TAG=value string", parse it, convert the
    value to UTF-8, and add it to the
-   vorbis_comment structure. Error checking is performed.
+   Error checking is performed (return 0 if OK, negative on error).
 
    Note that this assumes a null-terminated string, which may cause
    problems with > 8-bit character sets!
 
 ***********/
 
-int  add_comment(char *line, vorbis_comment *vc, int raw)
+int  add_comment(char *line, vorbis_comment *vc, int raw, int escapes)
 {
-	char	*mark, *value, *utf8_value;
+	char *mark, *value, *utf8_value, *unescaped_value;
+	int unescaped_len;
 
 	/* strip any terminal newline */
 	{
@@ -339,28 +356,158 @@
 	*mark = '\0';	
 	value++;
 
-    if(raw) {
-	if (!utf8_validate(value))
-	    fprintf(stderr, _("'%s' is not valid UTF-8, cannot add\n"), line);
-	else
-	    vorbis_comment_add_tag(vc, line, value);
-        return 0;
-    }
-	/* convert the value from the native charset to UTF-8 */
-    else if (utf8_encode(value, &utf8_value) >= 0) {
-		
-		/* append the comment and return */
-		vorbis_comment_add_tag(vc, line, utf8_value);
-        free(utf8_value);
-		return 0;
+	if (raw) {
+		if (!utf8_validate(value)) {
+			fprintf(stderr, _("'%s' is not valid UTF-8, cannot add\n"), line);
+			return -1;
+		}
+		utf8_value = value;
 	} else {
-		fprintf(stderr, _("Couldn't convert comment to UTF-8, "
-			"cannot add\n"));
-		return -1;
+		/* convert the value from the native charset to UTF-8 */
+		if (utf8_encode(value, &utf8_value) < 0) {
+			fprintf(stderr,
+					_("Couldn't convert comment to UTF-8, cannot add\n"));
+			return -1;
+		}
 	}
+
+	if (escapes) {
+		unescaped_value = unescape(utf8_value, &unescaped_len);
+		/*
+		  NOTE: unescaped_len remains unused; to write comments with embeded
+		  \0's one would need to access the vc struct directly -- see
+		  vorbis_comment_add() in vorbis/lib/info.c for details, but use mem*
+		  instead of str*...
+		*/
+		if(unescaped_value == NULL) {
+			fprintf(stderr,
+					_("Couldn't un-escape comment, cannot add\n"));
+			if (!raw)
+				free(utf8_value);
+			return -1;
+		}
+	} else {
+		unescaped_value = utf8_value;
+	}
+
+	/* append the comment and return */
+	vorbis_comment_add_tag(vc, line, unescaped_value);
+	if (escapes)
+		free(unescaped_value);
+	if (!raw)
+		free(utf8_value);
+	return 0;
 }
 
 
+/*** Escaping routines. ***/
+
+/**********
+
+   Convert raw comment content to a safely escaped single-line 0-terminated
+   string.  The raw comment can contain null bytes and thus requires an
+   explicit size argument.  The size argument doesn't include a trailing '\0'
+   (the vorbis bitstream doesn't use one).
+
+   Returns the address of a newly allocated string - caller is responsible to
+   free it.
+
+***********/
+
+char *escape(const char *from, int fromsize)
+{
+	/* worst-case allocation, will be trimmed when done */
+	char *to = malloc(fromsize * 2 + 1);
+
+	char *s;
+	for (s = to; fromsize > 0; fromsize--, from++) {
+		switch (*from) {
+		case '\n':
+			*s++ = '\\';
+			*s++ = 'n';
+			break;
+		case '\r':
+			*s++ = '\\';
+			*s++ = 'r';
+			break;
+		case '\0':
+			*s++ = '\\';
+			*s++ = '0';
+			break;
+		case '\\':
+			*s++ = '\\';
+			*s++ = '\\';
+			break;
+		default:
+			/* normal character */
+			*s++ = *from;
+			break;
+		}
+	}
+	
+	*s++ = '\0';
+	to = realloc(to, s - to);	/* free unused space */
+	return to;
+}
+
+/**********
+
+   Convert a safely escaped 0-terminated string to raw comment content.  The
+   result can contain null bytes, so the the result's length is written into
+   *tosize.  This size doesn't include a trailing '\0' (the vorbis bitstream
+   doesn't use one) but we do append it for convenience since
+   vorbis_comment_add[_tag]() has a null-terminated interface.
+
+   Returns the address of a newly allocated string - caller is responsible to
+   free it.  Returns NULL in case of error (if the input is mal-formed).
+
+***********/
+
+char *unescape(const char *from, int *tosize)
+{
+	/* worst-case allocation, will be trimmed when done */
+	char *to = malloc(strlen(from) + 1);
+
+	char *s;
+	for (s = to; *from != '\0'; ) {
+		if (*from == '\\') {
+			from++;
+			switch (*from++) {
+			case 'n':
+				*s++ = '\n';
+				break;
+			case 'r':
+				*s++ = '\r';
+				break;
+			case '0':
+				*s++ = '\0';
+				break;
+			case '\\':
+				*s++ = '\\';
+				break;
+			case '\0':
+				/* A backslash as the last character of the string is an error. */
+				/* FALL-THROUGH */
+			default:
+				/* We consider any unrecognized escape as an error.  This is
+				   good in general and reserves them for future expansion. */
+				free(to);
+				return NULL;
+			}
+		} else {
+			/* normal character */
+			*s++ = *from++;
+		}
+	}
+
+	*tosize = s - to;			/* excluding '\0' */
+
+	*s++ = '\0';
+	to = realloc(to, s - to);	/* free unused space */
+	return to;
+}
+
+
 /*** ui-specific routines ***/
 
 /**********
@@ -383,9 +530,9 @@
   printf ("\n");
 
   printf (_("Usage: \n"
-            "  vorbiscomment [-Vh]\n" 
-            "  vorbiscomment [-lR] file\n"
-            "  vorbiscomment [-R] [-c file] [-t tag] <-a|-w> inputfile [outputfile]\n"));
+	    "  vorbiscomment [-Vh]\n"
+	    "  vorbiscomment [-lRe] inputfile\n"
+	    "  vorbiscomment <-a|-w> [-Re] [-c file] [-t tag] inputfile [outputfile]\n"));
   printf ("\n");
 
   printf (_("Listing options\n"));
@@ -404,6 +551,7 @@
             "                          When listing, write comments to the specified file.\n"
             "                          When editing, read comments from the specified file.\n"));
   printf (_("  -R, --raw               Read and write comments in UTF-8\n"));
+  printf (_("  -e, --escapes           Use \\n-style escapes to allow multiline comments.\n"));
   printf ("\n");
 
   printf (_("  -h, --help              Display this help\n"));
@@ -428,8 +576,10 @@
   printf ("\n");
 
   printf (_("NOTE: Raw mode (--raw, -R) will read and write comments in UTF-8 rather than\n"
-            "converting to the user's character set, which is useful in scripts. However,\n"
-            "this is not sufficient for general round-tripping of comments in all cases.\n"));
+	    "converting to the user's character set, which is useful in scripts. However,\n"
+	    "this is not sufficient for general round-tripping of comments in all cases,\n"
+	    "since comments can contain newlines. To handle that, use escaping (-e,\n"
+	    "--escape).\n"));
 }
 
 void free_param(param_t *param) {
@@ -451,6 +601,7 @@
 	/* mode and flags */
 	param->mode = MODE_LIST;
 	param->raw = 0;
+	param->escapes = 0;
 
 	/* filenames */
 	param->infilename  = NULL;
@@ -485,7 +636,7 @@
 
 	setlocale(LC_ALL, "");
 
-	while ((ret = getopt_long(argc, argv, "alwhqVc:t:R",
+	while ((ret = getopt_long(argc, argv, "alwhqVc:t:Re",
 			long_options, &option_index)) != -1) {
 		switch (ret) {
 			case 0:
@@ -498,6 +649,9 @@
 			case 'R':
 				param->raw = 1;
 				break;
+			case 'e':
+				param->escapes = 1;
+				break;
 			case 'w':
 				param->mode = MODE_WRITE;
 				break;

Modified: trunk/vorbis-tools/vorbiscomment/vorbiscomment.1
===================================================================
--- trunk/vorbis-tools/vorbiscomment/vorbiscomment.1	2008-12-30 18:28:03 UTC (rev 15595)
+++ trunk/vorbis-tools/vorbiscomment/vorbiscomment.1	2008-12-31 01:37:11 UTC (rev 15596)
@@ -1,20 +1,24 @@
 .\" Process this file with
 .\" groff -man -Tascii vorbiscomment.1
 .\"
-.TH VORBISCOMMENT 1 "November 19, 2007" "Xiph.Org Foundation" "Ogg Vorbis Tools"
+.TH VORBISCOMMENT 1 "December 30, 2008" "Xiph.Org Foundation" "Ogg Vorbis Tools"
 
 .SH NAME
 vorbiscomment \- List or edit comments in Ogg Vorbis files
 
 .SH SYNOPSIS
 .B vorbiscomment
-.RB [ -l ]
+.B [-l]
+.RB [ -R ]
+.RB [ -e ]
 .I file.ogg
 .br
 .B vorbiscomment
 .B -a
 .B [ -c commentfile | -t \*(lqname=value\*(rq ]
 .RB [ -q ]
+.RB [ -R ]
+.RB [ -e ]
 .I in.ogg
 .I [out.ogg]
 .br
@@ -22,6 +26,8 @@
 .B -w
 .B [ -c commentfile | -t \*(lqname=value\*(rq ]
 .RB [ -q ]
+.RB [ -R ]
+.RB [ -e ]
 .I in.ogg
 .I [out.ogg]
 
@@ -46,6 +52,12 @@
 Replace comments with the new set given either on the command line with -t or from a file with -c. If neither -c nor -t is given, the new set will be read from the standard input.
 .IP "-R, --raw"
 Read and write comments in UTF-8, rather than converting to the user's character set.
+.IP "-e, --escapes"
+Quote/unquote newlines and backslashes in the comments. This ensures every comment is exactly one line in the output (or input), allowing to filter and round-trip them. Without it, you can only write multi-line comments by using -t and you can't reliably distinguish them from multiple one-line comments.
+
+Supported escapes are c-style "\en", "\er", "\e\e" and "\e0". A backslash followed by anything else is an error.
+
+Note: currently, anything after the first "\e0" is thrown away while writing.  This is a bug -- the Vorbis format can safely store null characters, but most other tools wouldn't handle them anyway.
 .IP "-V, --version"
 Display the version of vorbiscomment.