smsutil: Fix receiving UTF-16 encoded messages

The spec supports UCS2, but in reality UTF-16 is used, which supports
4-byte characters, which could be split into different message
fragments. Accumulate the entire UTF-16 message before converting to
UTF8.

Author: Martin Jones <martin.jones@jolla.com>
This commit is contained in:
Slava Monich 2016-11-18 14:17:19 +02:00 committed by Denis Kenzior
parent eeaabafdee
commit 52c2c401b4
1 changed files with 27 additions and 4 deletions

View File

@ -2218,6 +2218,7 @@ char *sms_decode_text(GSList *sms_list)
const struct sms *sms; const struct sms *sms;
int guess_size = g_slist_length(sms_list); int guess_size = g_slist_length(sms_list);
char *utf8; char *utf8;
GByteArray *utf16 = 0;
if (guess_size == 1) if (guess_size == 1)
guess_size = 160; guess_size = 160;
@ -2289,8 +2290,12 @@ char *sms_decode_text(GSList *sms_list)
NULL, NULL, 0, NULL, NULL, 0,
locking_shift, locking_shift,
single_shift); single_shift);
if (converted) {
g_string_append(str, converted);
g_free(converted);
}
} else { } else {
const gchar *from = (const gchar *) (ud + taken); const guint8 *from = ud + taken;
/* /*
* According to the spec: A UCS2 character shall not be * According to the spec: A UCS2 character shall not be
* split in the middle; if the length of the User Data * split in the middle; if the length of the User Data
@ -2300,15 +2305,33 @@ char *sms_decode_text(GSList *sms_list)
gssize num_ucs2_chars = (udl_in_bytes - taken) >> 1; gssize num_ucs2_chars = (udl_in_bytes - taken) >> 1;
num_ucs2_chars = num_ucs2_chars << 1; num_ucs2_chars = num_ucs2_chars << 1;
converted = g_convert(from, num_ucs2_chars, /*
"UTF-8//TRANSLIT", "UCS-2BE", * In theory SMS supports encoding using UCS2 which
NULL, NULL, NULL); * is 16-bit, however in the real world messages
* are encoded in UTF-16 which can be 4 bytes and
* a multiple fragment message can split a 4-byte
* character in the middle. So accumulate the
* entire message before converting to UTF-8.
*/
if (!utf16)
utf16 = g_byte_array_new();
g_byte_array_append(utf16, from, num_ucs2_chars);
} }
}
if (utf16) {
char *converted = g_convert_with_fallback((const gchar *)
utf16->data, utf16->len,
"UTF-8//TRANSLIT", "UTF-16BE",
NULL, NULL, NULL, NULL);
if (converted) { if (converted) {
g_string_append(str, converted); g_string_append(str, converted);
g_free(converted); g_free(converted);
} }
g_byte_array_free(utf16, TRUE);
} }
utf8 = g_string_free(str, FALSE); utf8 = g_string_free(str, FALSE);