Kannel: Open Source WAP and SMS gateway  svn-r5335
wsutf8.h File Reference

Go to the source code of this file.

Data Structures

struct  WsUtf8StringRec
 

Typedefs

typedef struct WsUtf8StringRec WsUtf8String
 

Functions

WsUtf8Stringws_utf8_alloc (void)
 
void ws_utf8_free (WsUtf8String *string)
 
int ws_utf8_append_char (WsUtf8String *string, unsigned long ch)
 
int ws_utf8_verify (const unsigned char *data, size_t len, size_t *strlen_return)
 
int ws_utf8_set_data (WsUtf8String *string, const unsigned char *data, size_t len)
 
int ws_utf8_get_char (const WsUtf8String *string, unsigned long *ch_return, size_t *posp)
 
unsigned char * ws_utf8_to_latin1 (const WsUtf8String *string, unsigned char unknown_char, size_t *len_return)
 
void ws_utf8_free_data (unsigned char *data)
 

Typedef Documentation

◆ WsUtf8String

typedef struct WsUtf8StringRec WsUtf8String

Definition at line 90 of file wsutf8.h.

Function Documentation

◆ ws_utf8_alloc()

WsUtf8String* ws_utf8_alloc ( void  )

Definition at line 182 of file wsutf8.c.

References ws_calloc().

Referenced by ws_bc_encode(), and ws_yy_lex().

183 {
184  return ws_calloc(1, sizeof(WsUtf8String));
185 }
void * ws_calloc(size_t num, size_t size)
Definition: wsalloc.c:83

◆ ws_utf8_append_char()

int ws_utf8_append_char ( WsUtf8String string,
unsigned long  ch 
)

Definition at line 198 of file wsutf8.c.

References WsUtf8StringRec::data, WsUtf8StringRec::len, utf8_hibits, ws_fatal(), ws_realloc(), WS_UTF8_CONT_DATA_MASK, WS_UTF8_ENC_C_BITS, and WS_UTF8_ENC_TYPE.

Referenced by ws_yy_lex().

199 {
200  unsigned char *d;
201  unsigned int num_bytes = WS_UTF8_ENC_TYPE(ch);
202  unsigned int len, i;
203 
204  if (num_bytes == 0)
205  ws_fatal("ws_utf8_append_char(): 0x%lx is not a valid UTF-8 character",
206  ch);
207 
208  d = ws_realloc(string->data, string->len + num_bytes);
209  if (d == NULL)
210  return 0;
211 
212  len = string->len;
213 
214  /* Encode the continuation bytes (n > 1). */
215  for (i = num_bytes - 1; i > 0; i--) {
216  d[len + i] = WS_UTF8_ENC_C_BITS;
217  d[len + i] |= ch & WS_UTF8_CONT_DATA_MASK;
218  ch >>= 6;
219  }
220 
221  /* And continue the first byte. */
222  d[len] = utf8_hibits[num_bytes];
223  d[len] |= ch;
224 
225  string->data = d;
226  string->len += num_bytes;
227  string->num_chars++;
228 
229  return 1;
230 }
void ws_fatal(char *fmt,...)
Definition: wserror.c:91
size_t len
Definition: wsutf8.h:81
unsigned char * data
Definition: wsutf8.h:84
static unsigned char utf8_hibits[7]
Definition: wsutf8.c:87
void * ws_realloc(void *ptr, size_t size)
Definition: wsalloc.c:89
#define WS_UTF8_ENC_C_BITS
Definition: wsutf8.c:99
#define WS_UTF8_CONT_DATA_MASK
Definition: wsutf8.c:102
#define WS_UTF8_ENC_TYPE(ch)
Definition: wsutf8.c:108

◆ ws_utf8_free()

void ws_utf8_free ( WsUtf8String string)

Definition at line 188 of file wsutf8.c.

References WsUtf8StringRec::data, and ws_free().

Referenced by ws_bc_encode(), and ws_yy_lex().

189 {
190  if (string == NULL)
191  return;
192 
193  ws_free(string->data);
194  ws_free(string);
195 }
void ws_free(void *ptr)
Definition: wsalloc.c:139
unsigned char * data
Definition: wsutf8.h:84

◆ ws_utf8_free_data()

void ws_utf8_free_data ( unsigned char *  data)

Definition at line 368 of file wsutf8.c.

References ws_free().

Referenced by pragma_meta(), and ws_bc_encode().

369 {
370  if (data)
371  ws_free(data);
372 }
void ws_free(void *ptr)
Definition: wsalloc.c:139

◆ ws_utf8_get_char()

int ws_utf8_get_char ( const WsUtf8String string,
unsigned long *  ch_return,
size_t *  posp 
)

Definition at line 293 of file wsutf8.c.

References WsUtf8StringRec::len, utf8_hidata_masks, WS_UTF8_CONT_DATA_MASK, and WS_UTF8_DEC_TYPE.

Referenced by main(), and ws_utf8_to_latin1().

295 {
296  size_t pos = *posp;
297  unsigned int num_bytes, i;
298  unsigned char *data;
299  unsigned long ch;
300 
301  if (pos < 0 || pos >= string->len)
302  /* Index out range. */
303  return 0;
304 
305  data = string->data + pos;
306 
307  num_bytes = WS_UTF8_DEC_TYPE(*data);
308  if (num_bytes == 0)
309  /* Invalid position. */
310  return 0;
311 
312  if (pos + num_bytes > string->len)
313  /* Truncated data. */
314  return 0;
315 
316  /* Get the first byte. */
317  ch = data[0] & utf8_hidata_masks[num_bytes];
318 
319  /* Add the continuation bytes. */
320  for (i = 1; i < num_bytes; i++) {
321  ch <<= 6;
322  ch |= data[i] & WS_UTF8_CONT_DATA_MASK;
323  }
324 
325  *ch_return = ch;
326  *posp = pos + num_bytes;
327 
328  return 1;
329 }
size_t len
Definition: wsutf8.h:81
#define WS_UTF8_DEC_TYPE(b)
Definition: wsutf8.c:161
static unsigned char utf8_hidata_masks[7]
Definition: wsutf8.c:142
#define WS_UTF8_CONT_DATA_MASK
Definition: wsutf8.c:102

◆ ws_utf8_set_data()

int ws_utf8_set_data ( WsUtf8String string,
const unsigned char *  data,
size_t  len 
)

Definition at line 266 of file wsutf8.c.

References WsUtf8StringRec::data, ws_free(), ws_memdup(), and ws_utf8_verify().

Referenced by ws_bc_encode().

268 {
269  size_t num_chars;
270 
271  if (!ws_utf8_verify(data, len, &num_chars))
272  /* Malformed data. */
273  return 0;
274 
275  /* Init `string' to empty. */
276  ws_free(string->data);
277  string->data = NULL;
278  string->len = 0;
279  string->num_chars = 0;
280 
281  /* Set the new data. */
282  string->data = ws_memdup(data, len);
283  if (string->data == NULL)
284  return 0;
285 
286  string->len = len;
287  string->num_chars = num_chars;
288 
289  return 1;
290 }
void ws_free(void *ptr)
Definition: wsalloc.c:139
unsigned char * data
Definition: wsutf8.h:84
void * ws_memdup(const void *ptr, size_t size)
Definition: wsalloc.c:105
int ws_utf8_verify(const unsigned char *data, size_t len, size_t *strlen_return)
Definition: wsutf8.c:233

◆ ws_utf8_to_latin1()

unsigned char* ws_utf8_to_latin1 ( const WsUtf8String string,
unsigned char  unknown_char,
size_t *  len_return 
)

Definition at line 332 of file wsutf8.c.

References WsUtf8StringRec::num_chars, ws_fatal(), ws_malloc(), and ws_utf8_get_char().

Referenced by pragma_meta(), and ws_bc_encode().

335 {
336  unsigned char *cstr;
337  size_t i;
338  size_t pos = 0;
339 
340  if (string == NULL)
341  return NULL;
342 
343  cstr = ws_malloc(string->num_chars + 1);
344  if (cstr == NULL)
345  return NULL;
346 
347  for (i = 0; i < string->num_chars; i++) {
348  unsigned long ch;
349 
350  if (!ws_utf8_get_char(string, &ch, &pos))
351  ws_fatal("ws_utf8_to_latin1_cstr(): internal inconsistency");
352 
353  if (ch > 0xff)
354  cstr[i] = unknown_char;
355  else
356  cstr[i] = (unsigned char) ch;
357  }
358 
359  cstr[i] = '\0';
360 
361  if (len_return)
362  *len_return = string->num_chars;
363 
364  return cstr;
365 }
void ws_fatal(char *fmt,...)
Definition: wserror.c:91
int ws_utf8_get_char(const WsUtf8String *string, unsigned long *ch_return, size_t *posp)
Definition: wsutf8.c:293
size_t num_chars
Definition: wsutf8.h:87
void * ws_malloc(size_t size)
Definition: wsalloc.c:77

◆ ws_utf8_verify()

int ws_utf8_verify ( const unsigned char *  data,
size_t  len,
size_t *  strlen_return 
)

Definition at line 233 of file wsutf8.c.

References WS_UTF8_DEC_C_P, and WS_UTF8_DEC_TYPE.

Referenced by ws_bc_decode(), and ws_utf8_set_data().

235 {
236  unsigned int num_bytes, i;
237  size_t strlen = 0;
238 
239  while (len > 0) {
240  num_bytes = WS_UTF8_DEC_TYPE(*data);
241  if (num_bytes == 0)
242  /* Not a valid beginning. */
243  return 0;
244 
245  if (len < num_bytes)
246  /* The data is truncated. */
247  return 0;
248 
249  for (i = 1; i < num_bytes; i++)
250  if (!WS_UTF8_DEC_C_P(data[i]))
251  /* Not a valid continuation byte. */
252  return 0;
253 
254  len -= num_bytes;
255  data += num_bytes;
256  strlen++;
257  }
258 
259  if (strlen_return)
260  *strlen_return = strlen;
261 
262  return 1;
263 }
#define WS_UTF8_DEC_TYPE(b)
Definition: wsutf8.c:161
#define WS_UTF8_DEC_C_P(b)
Definition: wsutf8.c:178
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.