[xiph-commits] r15596 - in trunk/vorbis-tools: . vorbiscomment
ivo at svn.xiph.org
ivo at svn.xiph.org
Tue Dec 30 17:37:11 PST 2008
Author: ivo
Date: 2008-12-30 17:37:11 -0800 (Tue, 30 Dec 2008)
New Revision: 15596
Modified:
trunk/vorbis-tools/CHANGES
trunk/vorbis-tools/vorbiscomment/vcomment.c
trunk/vorbis-tools/vorbiscomment/vorbiscomment.1
Log:
Support for round-tripping multi-line comments in vorbiscomment. Re-worked patch by Beni Cherniavsky. Also, thanks to Eugene Kotlyarov. Closes #273
Modified: trunk/vorbis-tools/CHANGES
===================================================================
--- trunk/vorbis-tools/CHANGES 2008-12-30 18:28:03 UTC (rev 15595)
+++ trunk/vorbis-tools/CHANGES 2008-12-31 01:37:11 UTC (rev 15596)
@@ -6,7 +6,7 @@
* Added new en_GB.po, eo.po, pl.po, sk.po and vi.po translation files
* Added AC_USE_SYSTEM_EXTENSIONS to configure.ac; no more configure warnings
* Corrected SUBLANG values in intl/localename.c (#1415)
- * Modify -v to -V on oggenc and oggdec for consistency (#1112)
+ * Change -v to -V on oggenc and oggdec for consistency (#1112)
* Fix for utf8_decode in Windows; improves behavior in vorbiscomment (#268)
* Updated gettextize to version 0.17
* ogg123: backported fix from libfishsound to patch the Speex decoder (#1347)
@@ -35,6 +35,7 @@
* vorbiscomment: correct memory allocation (#472)
* vorbiscomment: validate raw UTF-8 sent to vorbiscomment (#268)
* vorbiscomment: fix segfault when using --tag (#1439)
+ * vorbiscomment: round-trip multi-line comments (#273)
vorbis-tools 1.2.0 -- 2008-02-21
Modified: trunk/vorbis-tools/vorbiscomment/vcomment.c
===================================================================
--- trunk/vorbis-tools/vorbiscomment/vcomment.c 2008-12-30 18:28:03 UTC (rev 15595)
+++ trunk/vorbis-tools/vorbiscomment/vcomment.c 2008-12-31 01:37:11 UTC (rev 15596)
@@ -41,6 +41,7 @@
{"version", 0, 0, 'V'},
{"commentfile",1,0,'c'},
{"raw", 0,0,'R'},
+ {"escapes",0,0,'e'},
{NULL,0,0,0}
};
@@ -49,6 +50,7 @@
/* mode and flags */
int mode;
int raw;
+ int escapes;
/* file names and handles */
char *infilename, *outfilename;
@@ -68,9 +70,12 @@
/* prototypes */
void usage(void);
-void print_comments(FILE *out, vorbis_comment *vc, int raw);
-int add_comment(char *line, vorbis_comment *vc, int raw);
+void print_comments(FILE *out, vorbis_comment *vc, int raw, int escapes);
+int add_comment(char *line, vorbis_comment *vc, int raw, int escapes);
+char *escape(const char *from, int fromsize);
+char *unescape(const char *from, int *tosize);
+
param_t *new_param(void);
void free_param(param_t *param);
void parse_options(int argc, char *argv[], param_t *param);
@@ -197,7 +202,7 @@
/* extract and display the comments */
vc = vcedit_comments(state);
- print_comments(param->com, vc, param->raw);
+ print_comments(param->com, vc, param->raw, param->escapes);
/* done */
vcedit_clear(state);
@@ -231,7 +236,8 @@
for(i=0; i < param->commentcount; i++)
{
- if(add_comment(param->comments[i], vc, param->raw) < 0)
+ if (add_comment(param->comments[i], vc,
+ param->raw, param->escapes) < 0)
fprintf(stderr, _("Bad comment: \"%s\"\n"), param->comments[i]);
}
@@ -242,7 +248,7 @@
while ((comment = read_line (param->com)))
{
- if (add_comment (comment, vc, param->raw) < 0)
+ if (add_comment(comment, vc, param->raw, param->escapes) < 0)
{
fprintf (stderr, _("bad comment: \"%s\"\n"),
comment);
@@ -285,18 +291,28 @@
***********/
-void print_comments(FILE *out, vorbis_comment *vc, int raw)
+void print_comments(FILE *out, vorbis_comment *vc, int raw, int escapes)
{
int i;
- char *decoded_value;
+ char *escaped_value, *decoded_value;
for (i = 0; i < vc->comments; i++) {
- if (!raw && utf8_decode(vc->user_comments[i], &decoded_value) >= 0) {
- fprintf(out, "%s\n", decoded_value);
+ if (escapes) {
+ escaped_value = escape(vc->user_comments[i], vc->comment_lengths[i]);
+ } else {
+ escaped_value = vc->user_comments[i];
+ }
+
+ if (!raw && utf8_decode(escaped_value, &decoded_value) >= 0) {
+ fprintf(out, "%s\n", decoded_value);
free(decoded_value);
} else {
- fprintf(out, "%s\n", vc->user_comments[i]);
+ fprintf(out, "%s\n", escaped_value);
}
+
+ if (escapes) {
+ free(escaped_value);
+ }
}
}
@@ -304,16 +320,17 @@
Take a line of the form "TAG=value string", parse it, convert the
value to UTF-8, and add it to the
- vorbis_comment structure. Error checking is performed.
+ Error checking is performed (return 0 if OK, negative on error).
Note that this assumes a null-terminated string, which may cause
problems with > 8-bit character sets!
***********/
-int add_comment(char *line, vorbis_comment *vc, int raw)
+int add_comment(char *line, vorbis_comment *vc, int raw, int escapes)
{
- char *mark, *value, *utf8_value;
+ char *mark, *value, *utf8_value, *unescaped_value;
+ int unescaped_len;
/* strip any terminal newline */
{
@@ -339,28 +356,158 @@
*mark = '\0';
value++;
- if(raw) {
- if (!utf8_validate(value))
- fprintf(stderr, _("'%s' is not valid UTF-8, cannot add\n"), line);
- else
- vorbis_comment_add_tag(vc, line, value);
- return 0;
- }
- /* convert the value from the native charset to UTF-8 */
- else if (utf8_encode(value, &utf8_value) >= 0) {
-
- /* append the comment and return */
- vorbis_comment_add_tag(vc, line, utf8_value);
- free(utf8_value);
- return 0;
+ if (raw) {
+ if (!utf8_validate(value)) {
+ fprintf(stderr, _("'%s' is not valid UTF-8, cannot add\n"), line);
+ return -1;
+ }
+ utf8_value = value;
} else {
- fprintf(stderr, _("Couldn't convert comment to UTF-8, "
- "cannot add\n"));
- return -1;
+ /* convert the value from the native charset to UTF-8 */
+ if (utf8_encode(value, &utf8_value) < 0) {
+ fprintf(stderr,
+ _("Couldn't convert comment to UTF-8, cannot add\n"));
+ return -1;
+ }
}
+
+ if (escapes) {
+ unescaped_value = unescape(utf8_value, &unescaped_len);
+ /*
+ NOTE: unescaped_len remains unused; to write comments with embeded
+ \0's one would need to access the vc struct directly -- see
+ vorbis_comment_add() in vorbis/lib/info.c for details, but use mem*
+ instead of str*...
+ */
+ if(unescaped_value == NULL) {
+ fprintf(stderr,
+ _("Couldn't un-escape comment, cannot add\n"));
+ if (!raw)
+ free(utf8_value);
+ return -1;
+ }
+ } else {
+ unescaped_value = utf8_value;
+ }
+
+ /* append the comment and return */
+ vorbis_comment_add_tag(vc, line, unescaped_value);
+ if (escapes)
+ free(unescaped_value);
+ if (!raw)
+ free(utf8_value);
+ return 0;
}
+/*** Escaping routines. ***/
+
+/**********
+
+ Convert raw comment content to a safely escaped single-line 0-terminated
+ string. The raw comment can contain null bytes and thus requires an
+ explicit size argument. The size argument doesn't include a trailing '\0'
+ (the vorbis bitstream doesn't use one).
+
+ Returns the address of a newly allocated string - caller is responsible to
+ free it.
+
+***********/
+
+char *escape(const char *from, int fromsize)
+{
+ /* worst-case allocation, will be trimmed when done */
+ char *to = malloc(fromsize * 2 + 1);
+
+ char *s;
+ for (s = to; fromsize > 0; fromsize--, from++) {
+ switch (*from) {
+ case '\n':
+ *s++ = '\\';
+ *s++ = 'n';
+ break;
+ case '\r':
+ *s++ = '\\';
+ *s++ = 'r';
+ break;
+ case '\0':
+ *s++ = '\\';
+ *s++ = '0';
+ break;
+ case '\\':
+ *s++ = '\\';
+ *s++ = '\\';
+ break;
+ default:
+ /* normal character */
+ *s++ = *from;
+ break;
+ }
+ }
+
+ *s++ = '\0';
+ to = realloc(to, s - to); /* free unused space */
+ return to;
+}
+
+/**********
+
+ Convert a safely escaped 0-terminated string to raw comment content. The
+ result can contain null bytes, so the the result's length is written into
+ *tosize. This size doesn't include a trailing '\0' (the vorbis bitstream
+ doesn't use one) but we do append it for convenience since
+ vorbis_comment_add[_tag]() has a null-terminated interface.
+
+ Returns the address of a newly allocated string - caller is responsible to
+ free it. Returns NULL in case of error (if the input is mal-formed).
+
+***********/
+
+char *unescape(const char *from, int *tosize)
+{
+ /* worst-case allocation, will be trimmed when done */
+ char *to = malloc(strlen(from) + 1);
+
+ char *s;
+ for (s = to; *from != '\0'; ) {
+ if (*from == '\\') {
+ from++;
+ switch (*from++) {
+ case 'n':
+ *s++ = '\n';
+ break;
+ case 'r':
+ *s++ = '\r';
+ break;
+ case '0':
+ *s++ = '\0';
+ break;
+ case '\\':
+ *s++ = '\\';
+ break;
+ case '\0':
+ /* A backslash as the last character of the string is an error. */
+ /* FALL-THROUGH */
+ default:
+ /* We consider any unrecognized escape as an error. This is
+ good in general and reserves them for future expansion. */
+ free(to);
+ return NULL;
+ }
+ } else {
+ /* normal character */
+ *s++ = *from++;
+ }
+ }
+
+ *tosize = s - to; /* excluding '\0' */
+
+ *s++ = '\0';
+ to = realloc(to, s - to); /* free unused space */
+ return to;
+}
+
+
/*** ui-specific routines ***/
/**********
@@ -383,9 +530,9 @@
printf ("\n");
printf (_("Usage: \n"
- " vorbiscomment [-Vh]\n"
- " vorbiscomment [-lR] file\n"
- " vorbiscomment [-R] [-c file] [-t tag] <-a|-w> inputfile [outputfile]\n"));
+ " vorbiscomment [-Vh]\n"
+ " vorbiscomment [-lRe] inputfile\n"
+ " vorbiscomment <-a|-w> [-Re] [-c file] [-t tag] inputfile [outputfile]\n"));
printf ("\n");
printf (_("Listing options\n"));
@@ -404,6 +551,7 @@
" When listing, write comments to the specified file.\n"
" When editing, read comments from the specified file.\n"));
printf (_(" -R, --raw Read and write comments in UTF-8\n"));
+ printf (_(" -e, --escapes Use \\n-style escapes to allow multiline comments.\n"));
printf ("\n");
printf (_(" -h, --help Display this help\n"));
@@ -428,8 +576,10 @@
printf ("\n");
printf (_("NOTE: Raw mode (--raw, -R) will read and write comments in UTF-8 rather than\n"
- "converting to the user's character set, which is useful in scripts. However,\n"
- "this is not sufficient for general round-tripping of comments in all cases.\n"));
+ "converting to the user's character set, which is useful in scripts. However,\n"
+ "this is not sufficient for general round-tripping of comments in all cases,\n"
+ "since comments can contain newlines. To handle that, use escaping (-e,\n"
+ "--escape).\n"));
}
void free_param(param_t *param) {
@@ -451,6 +601,7 @@
/* mode and flags */
param->mode = MODE_LIST;
param->raw = 0;
+ param->escapes = 0;
/* filenames */
param->infilename = NULL;
@@ -485,7 +636,7 @@
setlocale(LC_ALL, "");
- while ((ret = getopt_long(argc, argv, "alwhqVc:t:R",
+ while ((ret = getopt_long(argc, argv, "alwhqVc:t:Re",
long_options, &option_index)) != -1) {
switch (ret) {
case 0:
@@ -498,6 +649,9 @@
case 'R':
param->raw = 1;
break;
+ case 'e':
+ param->escapes = 1;
+ break;
case 'w':
param->mode = MODE_WRITE;
break;
Modified: trunk/vorbis-tools/vorbiscomment/vorbiscomment.1
===================================================================
--- trunk/vorbis-tools/vorbiscomment/vorbiscomment.1 2008-12-30 18:28:03 UTC (rev 15595)
+++ trunk/vorbis-tools/vorbiscomment/vorbiscomment.1 2008-12-31 01:37:11 UTC (rev 15596)
@@ -1,20 +1,24 @@
.\" Process this file with
.\" groff -man -Tascii vorbiscomment.1
.\"
-.TH VORBISCOMMENT 1 "November 19, 2007" "Xiph.Org Foundation" "Ogg Vorbis Tools"
+.TH VORBISCOMMENT 1 "December 30, 2008" "Xiph.Org Foundation" "Ogg Vorbis Tools"
.SH NAME
vorbiscomment \- List or edit comments in Ogg Vorbis files
.SH SYNOPSIS
.B vorbiscomment
-.RB [ -l ]
+.B [-l]
+.RB [ -R ]
+.RB [ -e ]
.I file.ogg
.br
.B vorbiscomment
.B -a
.B [ -c commentfile | -t \*(lqname=value\*(rq ]
.RB [ -q ]
+.RB [ -R ]
+.RB [ -e ]
.I in.ogg
.I [out.ogg]
.br
@@ -22,6 +26,8 @@
.B -w
.B [ -c commentfile | -t \*(lqname=value\*(rq ]
.RB [ -q ]
+.RB [ -R ]
+.RB [ -e ]
.I in.ogg
.I [out.ogg]
@@ -46,6 +52,12 @@
Replace comments with the new set given either on the command line with -t or from a file with -c. If neither -c nor -t is given, the new set will be read from the standard input.
.IP "-R, --raw"
Read and write comments in UTF-8, rather than converting to the user's character set.
+.IP "-e, --escapes"
+Quote/unquote newlines and backslashes in the comments. This ensures every comment is exactly one line in the output (or input), allowing to filter and round-trip them. Without it, you can only write multi-line comments by using -t and you can't reliably distinguish them from multiple one-line comments.
+
+Supported escapes are c-style "\en", "\er", "\e\e" and "\e0". A backslash followed by anything else is an error.
+
+Note: currently, anything after the first "\e0" is thrown away while writing. This is a bug -- the Vorbis format can safely store null characters, but most other tools wouldn't handle them anyway.
.IP "-V, --version"
Display the version of vorbiscomment.
More information about the commits
mailing list