From 4b2ce9a7dbed38bb7e56268beeeb46b5123ce362 Mon Sep 17 00:00:00 2001 From: nicm Date: Thu, 17 Apr 2014 15:37:55 +0000 Subject: [PATCH] Add some UTF-8 utility functions and use them to prevent the width limit on formats from splitting UTF-8 characters improperly. --- usr.bin/tmux/format.c | 15 +++--- usr.bin/tmux/tmux.h | 21 +++++--- usr.bin/tmux/utf8.c | 121 +++++++++++++++++++++++++++++++++++++++++- 3 files changed, 142 insertions(+), 15 deletions(-) diff --git a/usr.bin/tmux/format.c b/usr.bin/tmux/format.c index c05bef42f3c..55e0c47f07a 100644 --- a/usr.bin/tmux/format.c +++ b/usr.bin/tmux/format.c @@ -1,4 +1,4 @@ -/* $OpenBSD: format.c,v 1.43 2014/04/17 07:36:45 nicm Exp $ */ +/* $OpenBSD: format.c,v 1.44 2014/04/17 15:37:55 nicm Exp $ */ /* * Copyright (c) 2011 Nicholas Marriott @@ -194,10 +194,10 @@ int format_replace(struct format_tree *ft, const char *key, size_t keylen, char **buf, size_t *len, size_t *off) { - char *copy, *copy0, *endptr, *ptr, *saved; + char *copy, *copy0, *endptr, *ptr, *saved, *trimmed; const char *value; size_t valuelen; - u_long limit = ULONG_MAX; + u_long limit = 0; /* Make a copy of the key. */ copy0 = copy = xmalloc(keylen + 1); @@ -256,11 +256,14 @@ format_replace(struct format_tree *ft, const char *key, size_t keylen, value = ""; saved = NULL; } - valuelen = strlen(value); /* Truncate the value if needed. */ - if (valuelen > limit) - valuelen = limit; + if (limit != 0) { + value = trimmed = utf8_trimcstr(value, limit); + free(saved); + saved = trimmed; + } + valuelen = strlen(value); /* Expand the buffer and copy in the value. */ while (*len - *off < valuelen + 1) { diff --git a/usr.bin/tmux/tmux.h b/usr.bin/tmux/tmux.h index 84105c71cf5..c40bee1b26d 100644 --- a/usr.bin/tmux/tmux.h +++ b/usr.bin/tmux/tmux.h @@ -1,4 +1,4 @@ -/* $OpenBSD: tmux.h,v 1.455 2014/04/17 13:02:59 nicm Exp $ */ +/* $OpenBSD: tmux.h,v 1.456 2014/04/17 15:37:55 nicm Exp $ */ /* * Copyright (c) 2007 Nicholas Marriott @@ -2306,7 +2306,7 @@ struct winlink *session_new(struct session *, const char *, const char *, struct winlink *session_attach( struct session *, struct window *, int, char **); int session_detach(struct session *, struct winlink *); -struct winlink* session_has(struct session *, struct window *); +struct winlink *session_has(struct session *, struct window *); int session_next(struct session *, int); int session_previous(struct session *, int); int session_select(struct session *, int); @@ -2322,12 +2322,17 @@ void session_group_synchronize1(struct session *, struct session *); void session_renumber_windows(struct session *); /* utf8.c */ -void utf8_build(void); -int utf8_open(struct utf8_data *, u_char); -int utf8_append(struct utf8_data *, u_char); -u_int utf8_combine(const struct utf8_data *); -u_int utf8_split2(u_int, u_char *); -int utf8_strvis(char *, const char *, size_t, int); +void utf8_build(void); +void utf8_set(struct utf8_data *, u_char); +int utf8_open(struct utf8_data *, u_char); +int utf8_append(struct utf8_data *, u_char); +u_int utf8_combine(const struct utf8_data *); +u_int utf8_split2(u_int, u_char *); +int utf8_strvis(char *, const char *, size_t, int); +struct utf8_data *utf8_fromcstr(const char *); +char *utf8_tocstr(struct utf8_data *); +u_int utf8_cstrwidth(const char *); +char *utf8_trimcstr(const char *, u_int); /* procname.c */ char *get_proc_name(int, char *); diff --git a/usr.bin/tmux/utf8.c b/usr.bin/tmux/utf8.c index 85300f7ab90..1e6332e1816 100644 --- a/usr.bin/tmux/utf8.c +++ b/usr.bin/tmux/utf8.c @@ -1,4 +1,4 @@ -/* $OpenBSD: utf8.c,v 1.10 2014/04/17 14:45:49 nicm Exp $ */ +/* $OpenBSD: utf8.c,v 1.11 2014/04/17 15:37:55 nicm Exp $ */ /* * Copyright (c) 2008 Nicholas Marriott @@ -18,6 +18,7 @@ #include +#include #include #include @@ -199,6 +200,16 @@ int utf8_overlap(struct utf8_width_entry *, struct utf8_width_entry *); u_int utf8_combine(const struct utf8_data *); u_int utf8_width(const struct utf8_data *); +/* Set a single character. */ +void +utf8_set(struct utf8_data *utf8data, u_char ch) +{ + *utf8data->data = ch; + utf8data->size = 1; + + utf8data->width = 1; +} + /* * Open UTF-8 sequence. * @@ -392,3 +403,111 @@ utf8_strvis(char *dst, const char *src, size_t len, int flag) *dst = '\0'; return (dst - start); } + +/* + * Convert a string into a buffer of UTF-8 characters. Terminated by size == 0. + * Caller frees. + */ +struct utf8_data * +utf8_fromcstr(const char *src) +{ + struct utf8_data *dst; + size_t n; + int more; + + dst = NULL; + + n = 0; + while (*src != '\0') { + dst = xrealloc(dst, n + 1, sizeof *dst); + if (utf8_open(&dst[n], *src)) { + more = 1; + while (*++src != '\0' && more) + more = utf8_append(&dst[n], *src); + if (!more) { + n++; + continue; + } + src -= dst[n].have; + } + utf8_set(&dst[n], *src); + src++; + + n++; + } + + dst = xrealloc(dst, n + 1, sizeof *dst); + dst[n].size = 0; + return (dst); +} + +/* Convert from a buffer of UTF-8 characters into a string. Caller frees. */ +char * +utf8_tocstr(struct utf8_data *src) +{ + char *dst; + size_t n; + + dst = NULL; + + n = 0; + for(; src->size != 0; src++) { + dst = xrealloc(dst, n + src->size, 1); + memcpy(dst + n, src->data, src->size); + n += src->size; + } + + dst = xrealloc(dst, n + 1, 1); + dst[n] = '\0'; + return (dst); +} + +/* Get width of UTF-8 string. */ +u_int +utf8_cstrwidth(const char *s) +{ + struct utf8_data tmp; + u_int width; + int more; + + width = 0; + while (*s != '\0') { + if (utf8_open(&tmp, *s)) { + more = 1; + while (*++s != '\0' && more) + more = utf8_append(&tmp, *s); + if (!more) { + width += tmp.width; + continue; + } + s -= tmp.have; + } + width++; + s++; + } + return (width); +} + +/* Trim UTF-8 string to width. Caller frees. */ +char * +utf8_trimcstr(const char *s, u_int width) +{ + struct utf8_data *tmp, *next; + char *out; + u_int at; + + tmp = utf8_fromcstr(s); + + at = 0; + for (next = tmp; next->size != 0; next++) { + if (at + next->width > width) { + next->size = 0; + break; + } + at += next->width; + } + + out = utf8_tocstr(tmp); + free(tmp); + return (out); +} -- 2.20.1