Add CBS Text converter utility

This commit is contained in:
Denis Kenzior 2009-07-02 12:46:26 -05:00
parent ac56263a57
commit e391d8d581
2 changed files with 151 additions and 0 deletions

View File

@ -2696,3 +2696,152 @@ static gboolean iso639_2_from_language(enum cbs_language lang, char *iso639)
return FALSE;
}
char *cbs_decode_text(GSList *cbs_list, char *iso639_lang)
{
GSList *l;
const struct cbs *cbs = cbs_list->data;
enum sms_charset charset;
enum cbs_language lang;
gboolean iso639;
int bufsize = 0;
unsigned char *buf;
char *utf8;
/* CBS can only come from the network, so we're much less lenient
* on what we support. Namely we require the same charset to be
* used across all pages.
*/
for (l = cbs_list; l; l = l->next) {
enum sms_charset curch;
gboolean curiso;
cbs = l->data;
if (!cbs_dcs_decode(cbs->dcs, NULL, NULL,
&curch, NULL, &lang, &curiso))
return NULL;
if (l == cbs_list) {
iso639 = curiso;
charset = curch;
}
if (curch != charset)
return NULL;
if (curiso != iso639)
return NULL;
if (curch == SMS_CHARSET_8BIT)
return NULL;
if (curch == SMS_CHARSET_7BIT) {
/* CBS can have up to 93 chars in 7Bit */
bufsize += 93;
if (iso639)
bufsize -= 3;
} else {
bufsize += 82;
if (iso639)
bufsize -= 2;
}
}
if (lang) {
cbs = cbs_list->data;
if (iso639) {
struct sms_udh_iter iter;
int taken = 0;
if (sms_udh_iter_init_from_cbs(cbs, &iter))
taken = sms_udh_iter_get_udh_length(&iter) + 1;
unpack_7bit_own_buf(cbs->ud + taken, 82 - taken,
taken, FALSE, 2,
NULL, 0, iso639_lang);
iso639_lang[2] = '\0';
} else
iso639_2_from_language(lang, iso639_lang);
}
buf = g_new(unsigned char, bufsize);
bufsize = 0;
for (l = cbs_list; l; l = l->next) {
const guint8 *ud;
struct sms_udh_iter iter;
int taken = 0;
cbs = l->data;
ud = cbs->ud;
if (sms_udh_iter_init_from_cbs(cbs, &iter))
taken = sms_udh_iter_get_udh_length(&iter) + 1;
if (charset == SMS_CHARSET_7BIT) {
unsigned char unpacked[93];
long written;
int max_chars = sms_text_capacity_gsm(93, taken);
int i;
unpack_7bit_own_buf(ud + taken, 82 - taken,
taken, FALSE, max_chars,
&written, 0, unpacked);
i = iso639 ? 3 : 0;
/* CR is a padding character, which means we can
* safely discard everything afterwards
*/
for (; i < written; i++, bufsize++) {
if (unpacked[i] == '\r')
break;
buf[bufsize] = unpacked[i];
}
/* It isn't clear whether extension sequences
* (2 septets) must be wholly present in the page
* and not broken over multiple pages. The behavior
* is probably the same as SMS, but we don't make
* the check here since the specification isn't clear
*/
} else {
int num_ucs2_chars = (82 - taken) >> 1;
int i = taken;
int max_offset = taken + num_ucs2_chars * 2;
/* It is completely unclear how UCS2 chars are handled
* especially across pages or when the UDH is present.
* For now do the best we can
*/
if (iso639) {
i += 2;
num_ucs2_chars -= 1;
}
while (i < max_offset) {
if (ud[i] == 0x00 && ud[i] == '\r')
break;
buf[bufsize] = ud[i];
buf[bufsize + 1] = ud[i+1];
bufsize += 2;
i += 2;
}
}
}
if (charset == SMS_CHARSET_7BIT)
utf8 = convert_gsm_to_utf8(buf, bufsize, NULL, NULL, 0);
else
utf8 = g_convert(buf, bufsize, "UTF-8//TRANSLIT", "UCS-2BE",
NULL, NULL, NULL);
return utf8;
}

View File

@ -451,3 +451,5 @@ gboolean cbs_decode(const unsigned char *pdu, int len, struct cbs *out);
gboolean cbs_encode(const struct cbs *cbs, int *len, unsigned char *pdu);
gboolean cbs_extract_app_port(const struct cbs *cbs, int *dst, int *src,
gboolean *is_8bit);
char *cbs_decode_text(GSList *cbs_list, char *iso639_lang);