Kannel: Open Source WAP and SMS gateway  svn-r5335
charset.c
Go to the documentation of this file.
1 /* ====================================================================
2  * The Kannel Software License, Version 1.0
3  *
4  * Copyright (c) 2001-2018 Kannel Group
5  * Copyright (c) 1998-2001 WapIT Ltd.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  * notice, this list of conditions and the following disclaimer.
14  *
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in
17  * the documentation and/or other materials provided with the
18  * distribution.
19  *
20  * 3. The end-user documentation included with the redistribution,
21  * if any, must include the following acknowledgment:
22  * "This product includes software developed by the
23  * Kannel Group (http://www.kannel.org/)."
24  * Alternately, this acknowledgment may appear in the software itself,
25  * if and wherever such third-party acknowledgments normally appear.
26  *
27  * 4. The names "Kannel" and "Kannel Group" must not be used to
28  * endorse or promote products derived from this software without
29  * prior written permission. For written permission, please
30  * contact org@kannel.org.
31  *
32  * 5. Products derived from this software may not be called "Kannel",
33  * nor may "Kannel" appear in their name, without prior written
34  * permission of the Kannel Group.
35  *
36  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
37  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
38  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
39  * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS
40  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
41  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
42  * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
43  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
44  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
45  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
46  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
47  * ====================================================================
48  *
49  * This software consists of voluntary contributions made by many
50  * individuals on behalf of the Kannel Group. For more information on
51  * the Kannel Group, please see <http://www.kannel.org/>.
52  *
53  * Portions of this software are based upon software originally written at
54  * WapIT Ltd., Helsinki, Finland for the Kannel project.
55  */
56 
57 /*
58  * gwlib/charset.c - character set conversions
59  *
60  * This file implements the character set conversions declared in charset.h.
61  *
62  * Richard Braakman
63  */
64 
65 #include "gwlib/gwlib.h"
66 
67 #if HAVE_ICONV
68 #include <errno.h>
69 #include <iconv.h>
70 #endif
71 
72 /* Code used for non-representable characters */
73 #define NRP '?'
74 
75 #include "gwlib/latin1_to_gsm.h"
76 
77 
78 /* This is the extension table defined in GSM 03.38. It is the mapping
79  * used for the character after a GSM 27 (Escape) character. All characters
80  * not in the table, as well as characters we can't represent, will map
81  * to themselves. We cannot represent the euro symbol, which is an escaped
82  * 'e', so we left it out of this table. */
83 static const struct {
84  int gsmesc;
85  int latin1;
86 } gsm_esctolatin1[] = {
87  { 10, 12 }, /* ASCII page break */
88  { 20, '^' },
89  { 40, '{' },
90  { 41, '}' },
91  { 47, '\\' },
92  { 60, '[' },
93  { 61, '~' },
94  { 62, ']' },
95  { 64, '|' },
96  { 101, 128 },
97  { -1, -1 }
98 };
99 
100 
104 static const struct {
105  int gsmesc;
106  int unichar;
107 } gsm_esctouni[] = {
108  { 10, 12 }, /* ASCII page break */
109  { 20, '^' },
110  { 40, '{' },
111  { 41, '}' },
112  { 47, '\\' },
113  { 60, '[' },
114  { 61, '~' },
115  { 62, ']' },
116  { 64, '|' },
117  { 'e', 0x20AC }, /* euro symbol */
118  { -1, -1 }
119 };
120 
121 
122 /* Map GSM default alphabet characters to ISO-Latin-1 characters.
123  * The greek characters at positions 16 and 18 through 26 are not
124  * mappable. They are mapped to '?' characters.
125  * The escape character, at position 27, is mapped to a space,
126  * though normally the function that indexes into this table will
127  * treat it specially. */
128 static const unsigned char gsm_to_latin1[128] = {
129  '@', 0xa3, '$', 0xa5, 0xe8, 0xe9, 0xf9, 0xec, /* 0 - 7 */
130  0xf2, 0xc7, 10, 0xd8, 0xf8, 13, 0xc5, 0xe5, /* 8 - 15 */
131  '?', '_', '?', '?', '?', '?', '?', '?', /* 16 - 23 */
132  '?', '?', '?', ' ', 0xc6, 0xe6, 0xdf, 0xc9, /* 24 - 31 */
133  ' ', '!', '"', '#', 0xa4, '%', '&', '\'', /* 32 - 39 */
134  '(', ')', '*', '+', ',', '-', '.', '/', /* 40 - 47 */
135  '0', '1', '2', '3', '4', '5', '6', '7', /* 48 - 55 */
136  '8', '9', ':', ';', '<', '=', '>', '?', /* 56 - 63 */
137  0xa1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', /* 64 - 71 */
138  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', /* 73 - 79 */
139  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', /* 80 - 87 */
140  'X', 'Y', 'Z', 0xc4, 0xd6, 0xd1, 0xdc, 0xa7, /* 88 - 95 */
141  0xbf, 'a', 'b', 'c', 'd', 'e', 'f', 'g', /* 96 - 103 */
142  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', /* 104 - 111 */
143  'p', 'q', 'r', 's', 't', 'u', 'v', 'w', /* 112 - 119 */
144  'x', 'y', 'z', 0xe4, 0xf6, 0xf1, 0xfc, 0xe0 /* 120 - 127 */
145 };
146 
153 static const int gsm_to_unicode[128] = {
154  '@', 0xA3, '$', 0xA5, 0xE8, 0xE9, 0xF9, 0xEC, /* 0 - 7 */
155  0xF2, 0xC7, 10, 0xd8, 0xF8, 13, 0xC5, 0xE5, /* 8 - 15 */
156  0x394, '_', 0x3A6, 0x393, 0x39B, 0x3A9, 0x3A0, 0x3A8, /* 16 - 23 */
157  0x3A3, 0x398, 0x39E, NRP, 0xC6, 0xE6, 0xDF, 0xC9, /* 24 - 31 */
158  ' ', '!', '"', '#', 0xA4, '%', '&', '\'', /* 32 - 39 */
159  '(', ')', '*', '+', ',', '-', '.', '/', /* 40 - 47 */
160  '0', '1', '2', '3', '4', '5', '6', '7', /* 48 - 55 */
161  '8', '9', ':', ';', '<', '=', '>', '?', /* 56 - 63 */
162  0xA1, 'A', 'B', 'C', 'D', 'E', 'F', 'G', /* 64 - 71 */
163  'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', /* 73 - 79 */
164  'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', /* 80 - 87 */
165  'X', 'Y', 'Z', 0xC4, 0xD6, 0xD1, 0xDC, 0xA7, /* 88 - 95 */
166  0xBF, 'a', 'b', 'c', 'd', 'e', 'f', 'g', /* 96 - 103 */
167  'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', /* 104 - 111 */
168  'p', 'q', 'r', 's', 't', 'u', 'v', 'w', /* 112 - 119 */
169  'x', 'y', 'z', 0xE4, 0xF6, 0xF1, 0xFC, 0xE0 /* 120 - 127 */
170 };
171 
172 /*
173  * Register alises for Windows character sets that the libxml/libiconv can
174  * recoqnise them.
175  */
176 
177 struct alias_t {
178  char *real;
179  char *alias;
180 };
181 
182 typedef struct alias_t alias_t;
183 
185  { "CP1250", "WIN-1250" },
186  { "CP1250", "WINDOWS-1250" },
187  { "CP1251", "WIN-1251" },
188  { "CP1251", "WINDOWS-1251" },
189  { "CP1252", "WIN-1252" },
190  { "CP1252", "WINDOWS-1252" },
191  { "CP1253", "WIN-1253" },
192  { "CP1253", "WINDOWS-1253" },
193  { "CP1254", "WIN-1254" },
194  { "CP1254", "WINDOWS-1254" },
195  { "CP1257", "WIN-1257" },
196  { "CP1257", "WINDOWS-1257" },
197  { NULL }
198 };
199 
201 {
202  int i;
203 
204  for (i = 0; chars_aliases[i].real != NULL; i++) {
205  xmlAddEncodingAlias(chars_aliases[i].real,chars_aliases[i].alias);
206  /*debug("encoding",0,"Add encoding for %s",chars_aliases[i].alias);*/
207  }
208 }
209 
211 {
212  xmlCleanupEncodingAliases();
213 }
214 
221 {
222  long pos, len;
223  Octstr *newostr;
224 
225  if (ostr == NULL)
226  return;
227 
228  newostr = octstr_create("");
229  len = octstr_len(ostr);
230 
231  for (pos = 0; pos < len; pos++) {
232  int c, i;
233 
234  c = octstr_get_char(ostr, pos);
235  if (c > 127) {
236  warning(0, "Could not convert GSM (0x%02x) to Unicode.", c);
237  continue;
238  }
239 
240  if(c == 27 && pos + 1 < len) {
241  c = octstr_get_char(ostr, ++pos);
242  for (i = 0; gsm_esctouni[i].gsmesc >= 0; i++) {
243  if (gsm_esctouni[i].gsmesc == c)
244  break;
245  }
246  if (gsm_esctouni[i].gsmesc == c) {
247  /* found a value for escaped char */
248  c = gsm_esctouni[i].unichar;
249  } else {
250  /* nothing found, look esc in our table */
251  c = gsm_to_unicode[27];
252  pos--;
253  }
254  } else if (c < 128) {
255  c = gsm_to_unicode[c];
256  }
257  /* unicode to utf-8 */
258  if(c < 128) {
259  /* 0-127 are ASCII chars that need no conversion */
260  octstr_append_char(newostr, c);
261  } else {
262  /* test if it can be converterd into a two byte char */
263  if(c < 0x0800) {
264  octstr_append_char(newostr, ((c >> 6) | 0xC0) & 0xFF); /* add 110xxxxx */
265  octstr_append_char(newostr, (c & 0x3F) | 0x80); /* add 10xxxxxx */
266  } else {
267  /* else we encode with 3 bytes. This only happens in case of euro symbol */
268  octstr_append_char(newostr, ((c >> 12) | 0xE0) & 0xFF); /* add 1110xxxx */
269  octstr_append_char(newostr, (((c >> 6) & 0x3F) | 0x80) & 0xFF); /* add 10xxxxxx */
270  octstr_append_char(newostr, ((c & 0x3F) | 0x80) & 0xFF); /* add 10xxxxxx */
271  }
272  /* There are no 4 bytes encoded characters in GSM charset */
273  }
274  }
275 
276  octstr_truncate(ostr, 0);
277  octstr_append(ostr, newostr);
278  octstr_destroy(newostr);
279 }
280 
289 {
290  long pos, len;
291  int val1, val2;
292  Octstr *newostr;
293 
294  if (ostr == NULL)
295  return;
296 
297  newostr = octstr_create("");
298  len = octstr_len(ostr);
299 
300  for (pos = 0; pos < len; pos++) {
301  val1 = octstr_get_char(ostr, pos);
302 
303  /* check range */
304  if (val1 < 0 || val1 > 255) {
305  warning(0, "Char (0x%02x) in UTF-8 string not in the range (0, 255). Skipped.", val1);
306  continue;
307  }
308 
309  /* Convert UTF-8 to unicode code */
310 
311  /* test if two byte utf8 char */
312  if ((val1 & 0xE0) == 0xC0) {
313  /* test if incomplete utf char */
314  if(pos + 1 < len) {
315  val2 = octstr_get_char(ostr, ++pos);
316  val1 = (((val1 & ~0xC0) << 6) | (val2 & 0x3F));
317  } else {
318  /* incomplete, ignore it */
319  warning(0, "Incomplete UTF-8 char discovered, skipped. 1");
320  pos += 1;
321  continue;
322  }
323  } else if ((val1 & 0xF0) == 0xE0) { /* test for three byte utf8 char */
324  if(pos + 2 < len) {
325  val2 = octstr_get_char(ostr, ++pos);
326  val1 = (((val1 & ~0xE0) << 6) | (val2 & 0x3F));
327  val2 = octstr_get_char(ostr, ++pos);
328  val1 = (val1 << 6) | (val2 & 0x3F);
329  } else {
330  /* incomplete, ignore it */
331  warning(0, "Incomplete UTF-8 char discovered, skipped. 2");
332  pos += 2;
333  continue;
334  }
335  }
336 
337  /* test Latin code page 1 char */
338  if(val1 <= 255) {
339  val1 = latin1_to_gsm[val1];
340  /* needs to be escaped ? */
341  if(val1 < 0) {
342  octstr_append_char(newostr, 27);
343  val1 *= -1;
344  }
345  } else {
346  /* Its not a Latin1 char, test for allowed GSM chars */
347  switch(val1) {
348  case 0x394:
349  val1 = 0x10; /* GREEK CAPITAL LETTER DELTA */
350  break;
351  case 0x3A6:
352  val1 = 0x12; /* GREEK CAPITAL LETTER PHI */
353  break;
354  case 0x393:
355  val1 = 0x13; /* GREEK CAPITAL LETTER GAMMA */
356  break;
357  case 0x39B:
358  val1 = 0x14; /* GREEK CAPITAL LETTER LAMBDA */
359  break;
360  case 0x3A9:
361  val1 = 0x15; /* GREEK CAPITAL LETTER OMEGA */
362  break;
363  case 0x3A0:
364  val1 = 0x16; /* GREEK CAPITAL LETTER PI */
365  break;
366  case 0x3A8:
367  val1 = 0x17; /* GREEK CAPITAL LETTER PSI */
368  break;
369  case 0x3A3:
370  val1 = 0x18; /* GREEK CAPITAL LETTER SIGMA */
371  break;
372  case 0x398:
373  val1 = 0x19; /* GREEK CAPITAL LETTER THETA */
374  break;
375  case 0x39E:
376  val1 = 0x1A; /* GREEK CAPITAL LETTER XI */
377  break;
378  case 0x20AC:
379  val1 = 'e'; /* EURO SIGN */
380  octstr_append_char(newostr, 27);
381  break;
382  default: val1 = NRP; /* character cannot be represented in GSM 03.38 */
383  }
384  }
385  octstr_append_char(newostr, val1);
386  }
387 
388  octstr_truncate(ostr, 0);
389  octstr_append(ostr, newostr);
390  octstr_destroy(newostr);
391 }
392 
393 
395 {
396  long pos, len;
397 
398  len = octstr_len(ostr);
399  for (pos = 0; pos < len; pos++) {
400  int c, new, i;
401 
402  c = octstr_get_char(ostr, pos);
403  if (c == 27 && pos + 1 < len) {
404  /* GSM escape code. Delete it, then process the next
405  * character specially. */
406  octstr_delete(ostr, pos, 1);
407  len--;
408  c = octstr_get_char(ostr, pos);
409  for (i = 0; gsm_esctolatin1[i].gsmesc >= 0; i++) {
410  if (gsm_esctolatin1[i].gsmesc == c)
411  break;
412  }
413  if (gsm_esctolatin1[i].gsmesc == c)
414  new = gsm_esctolatin1[i].latin1;
415  else if (c < 128)
416  new = gsm_to_latin1[c];
417  else
418  continue;
419  } else if (c < 128) {
420  new = gsm_to_latin1[c];
421  } else {
422  continue;
423  }
424  if (new != c)
425  octstr_set_char(ostr, pos, new);
426  }
427 }
428 
429 
431 {
432  long pos, len;
433  int c, new;
434  unsigned char esc = 27;
435 
436  len = octstr_len(ostr);
437  for (pos = 0; pos < len; pos++) {
438  c = octstr_get_char(ostr, pos);
439  gw_assert(c >= 0);
440  gw_assert(c <= 256);
441  new = latin1_to_gsm[c];
442  if (new < 0) {
443  /* Escaped GSM code */
444  octstr_insert_data(ostr, pos, (char*) &esc, 1);
445  pos++;
446  len++;
447  new = -new;
448  }
449  if (new != c)
450  octstr_set_char(ostr, pos, new);
451  }
452 }
453 
454 
455 /*
456  * This function is a wrapper arround charset_latin1_to_gsm()
457  * which implements the mapping of a NRCs (national reprentation codes)
458  * ISO 21 German.
459  */
461 {
462  long pos, len;
463  int c, new;
464 
465  len = octstr_len(ostr);
466 
467  for (pos = 0; pos < len; pos++) {
468  c = octstr_get_char(ostr, pos);
469  switch (c) {
470  /* GSM value; NRC value */
471  case 0x5b: new = 0x5b; break; /* Ä */
472  case 0x5c: new = 0x5c; break; /* Ö */
473  case 0x5e: new = 0x5d; break; /* Ü */
474  case 0x7b: new = 0x7b; break; /* ä */
475  case 0x7c: new = 0x7c; break; /* ö */
476  case 0x7e: new = 0x7d; break; /* ü */
477  case 0x1e: new = 0x7e; break; /* ß */
478  case 0x5f: new = 0x5e; break; /* § */
479  default: new = c;
480  }
481  if (new != c)
482  octstr_set_char(ostr, pos, new);
483  }
484 }
485 
487 {
488  long pos, len;
489  int c, new;
490 
491  len = octstr_len(ostr);
492 
493  for (pos = 0; pos < len; pos++) {
494  c = octstr_get_char(ostr, pos);
495  switch (c) {
496  /* NRC value; GSM value */
497  case 0x5b: new = 0x5b; break; /* Ä */
498  case 0x5c: new = 0x5c; break; /* Ö */
499  case 0x5d: new = 0x5e; break; /* Ü */
500  case 0x7b: new = 0x7b; break; /* ä */
501  case 0x7c: new = 0x7c; break; /* ö */
502  case 0x7d: new = 0x7e; break; /* ü */
503  case 0x7e: new = 0x1e; break; /* ß */
504  case 0x5e: new = 0x5f; break; /* § */
505  default: new = c;
506  }
507  if (new != c)
508  octstr_set_char(ostr, pos, new);
509  }
510 }
511 
513 {
514  if (octstr_len(gsm) > max) {
515  /* If the last GSM character was an escaped character,
516  * then chop off the escape as well as the character. */
517  if (octstr_get_char(gsm, max - 1) == 27)
518  octstr_truncate(gsm, max - 1);
519  else
520  octstr_truncate(gsm, max);
521  return 1;
522  }
523  return 0;
524 }
525 
526 int charset_to_utf8(Octstr *from, Octstr **to, Octstr *charset_from)
527 {
528  int ret;
529  xmlCharEncodingHandlerPtr handler = NULL;
530  xmlBufferPtr frombuffer = NULL;
531  xmlBufferPtr tobuffer = NULL;
532 
533  if (octstr_compare(charset_from, octstr_imm("UTF-8")) == 0) {
534  *to = octstr_duplicate(from);
535  return 0;
536  }
537 
538  handler = xmlFindCharEncodingHandler(octstr_get_cstr(charset_from));
539  if (handler == NULL)
540  return -2;
541 
542  /* Build the libxml buffers for the transcoding. */
543  tobuffer = xmlBufferCreate();
544  frombuffer = xmlBufferCreate();
545  xmlBufferAdd(frombuffer, (unsigned char*)octstr_get_cstr(from), octstr_len(from));
546 
547  ret = xmlCharEncInFunc(handler, tobuffer, frombuffer);
548 
549  *to = octstr_create_from_data((char*)tobuffer->content, tobuffer->use);
550 
551  /* Memory cleanup. */
552  xmlBufferFree(tobuffer);
553  xmlBufferFree(frombuffer);
554 
555  return ret;
556 }
557 
558 int charset_from_utf8(Octstr *utf8, Octstr **to, Octstr *charset_to)
559 {
560  int ret;
561  xmlCharEncodingHandlerPtr handler = NULL;
562  xmlBufferPtr frombuffer = NULL;
563  xmlBufferPtr tobuffer = NULL;
564 
565  handler = xmlFindCharEncodingHandler(octstr_get_cstr(charset_to));
566  if (handler == NULL)
567  return -2;
568 
569  /* Build the libxml buffers for the transcoding. */
570  tobuffer = xmlBufferCreate();
571  frombuffer = xmlBufferCreate();
572  xmlBufferAdd(frombuffer, (unsigned char*)octstr_get_cstr(utf8), octstr_len(utf8));
573 
574  ret = xmlCharEncOutFunc(handler, tobuffer, frombuffer);
575  if (ret < -2)
576  /* Libxml seems to be here a little uncertain what would be the
577  * return code -3, so let's make it -1. Ugly thing, indeed. --tuo */
578  ret = -1;
579 
580  *to = octstr_create_from_data((char*)tobuffer->content, tobuffer->use);
581 
582  /* Memory cleanup. */
583  xmlBufferFree(tobuffer);
584  xmlBufferFree(frombuffer);
585 
586  return ret;
587 }
588 
589 int charset_convert(Octstr* string, char* charset_from, char* charset_to)
590 {
591 #if HAVE_ICONV
592  char *from_buf, *to_buf, *pointer;
593  size_t inbytesleft, outbytesleft, ret;
594  iconv_t cd;
595 
596  if (!charset_from || !charset_to || !string) /* sanity check */
597  return -1;
598 
599  if (octstr_len(string) < 1 || strcasecmp(charset_from, charset_to) == 0)
600  return 0; /* we are done, nothing to convert */
601 
602  cd = iconv_open(charset_to, charset_from);
603  /* Did I succeed in getting a conversion descriptor ? */
604  if (cd == (iconv_t)(-1)) {
605  /* I guess not */
606  error(0,"Failed to convert string from <%s> to <%s> - probably broken type names.",
607  charset_from, charset_to);
608  return -1;
609  }
610 
611  from_buf = octstr_get_cstr(string);
612  inbytesleft = octstr_len(string);
613  /* allocate max sized buffer, assuming target encoding may be 4 byte unicode */
614  outbytesleft = inbytesleft * 4;
615  pointer = to_buf = gw_malloc(outbytesleft);
616 
617  do {
618  ret = iconv(cd, (ICONV_CONST char**) &from_buf, &inbytesleft, &pointer, &outbytesleft);
619  if(ret == -1) {
620  long tmp;
621  /* the conversion failed somewhere */
622  switch(errno) {
623  case E2BIG: /* no space in output buffer */
624  debug("charset", 0, "outbuf to small, realloc.");
625  tmp = pointer - to_buf;
626  to_buf = gw_realloc(to_buf, tmp + inbytesleft * 4);
627  outbytesleft += inbytesleft * 4;
628  pointer = to_buf + tmp;
629  ret = 0;
630  break;
631  case EILSEQ: /* invalid multibyte sequence */
632  case EINVAL: /* incomplete multibyte sequence */
633  warning(0, "Invalid/Incomplete multibyte sequence at position %d, skeep it.",
634  (int)(from_buf - octstr_get_cstr(string)));
635  /* skeep char and try next */
636  if (outbytesleft == 0) {
637  /* buffer to small */
638  tmp = pointer - to_buf;
639  to_buf = gw_realloc(to_buf, tmp + inbytesleft * 4);
640  outbytesleft += inbytesleft * 4;
641  pointer = to_buf + tmp;
642  }
643  pointer[0] = from_buf[0];
644  pointer++;
645  from_buf++;
646  inbytesleft--;
647  outbytesleft--;
648  ret = 0;
649  break;
650  }
651  }
652  } while(inbytesleft && ret == 0); /* stop if error occurs and not handled above */
653 
654  iconv_close(cd);
655 
656  if (ret != -1) {
657  /* conversion succeeded */
658  octstr_truncate(string, 0);
659  octstr_append_data(string, to_buf, pointer - to_buf);
660  if (ret)
661  debug("charset", 0, "charset_convert did %ld non-reversible conversions", (long) ret);
662  ret = 0;
663  } else
664  error(errno,"Failed to convert string from <%s> to <%s>.", charset_from, charset_to);
665 
666  if (errno == EILSEQ) {
667  debug("charset_convert", 0, "Found an invalid multibyte sequence at position <%d>",
668  (int)(from_buf - octstr_get_cstr(string)));
669  }
670  gw_free(to_buf);
671  return ret;
672 #endif
673  /* no convertion done due to not having iconv */
674  return -1;
675 }
static const struct @55 gsm_esctouni[]
void error(int err, const char *fmt,...)
Definition: log.c:648
void charset_shutdown()
Definition: charset.c:210
void octstr_append_data(Octstr *ostr, const char *data, long len)
Definition: octstr.c:1497
gw_assert(wtls_machine->packet_to_send !=NULL)
void octstr_append(Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:1504
unsigned char gsm
Definition: smsc_cimd2.c:1025
static const struct @54 gsm_esctolatin1[]
int charset_to_utf8(Octstr *from, Octstr **to, Octstr *charset_from)
Definition: charset.c:526
void octstr_append_char(Octstr *ostr, int ch)
Definition: octstr.c:1517
void charset_utf8_to_gsm(Octstr *ostr)
Definition: charset.c:288
int gsmesc
Definition: charset.c:84
void charset_nrc_iso_21_german_to_gsm(Octstr *ostr)
Definition: charset.c:486
void octstr_insert_data(Octstr *ostr, long pos, const char *data, long len)
Definition: octstr.c:1461
#define octstr_get_cstr(ostr)
Definition: octstr.h:233
static const unsigned char gsm_to_latin1[128]
Definition: charset.c:128
static Octstr * from
Definition: mtbatch.c:95
alias_t chars_aliases[]
Definition: charset.c:184
static const int latin1_to_gsm[256]
Definition: latin1_to_gsm.h:4
Octstr * octstr_imm(const char *cstr)
Definition: octstr.c:283
char * real
Definition: charset.c:178
#define NRP
Definition: charset.c:73
void octstr_delete(Octstr *ostr1, long pos, long len)
Definition: octstr.c:1527
void charset_init()
Definition: charset.c:200
char * alias
Definition: charset.c:179
static const int gsm_to_unicode[128]
Definition: charset.c:153
int latin1
Definition: charset.c:85
#define octstr_duplicate(ostr)
Definition: octstr.h:187
void warning(int err, const char *fmt,...)
Definition: log.c:660
void octstr_destroy(Octstr *ostr)
Definition: octstr.c:324
#define octstr_create(cstr)
Definition: octstr.h:125
void charset_gsm_to_latin1(Octstr *ostr)
Definition: charset.c:394
long octstr_len(const Octstr *ostr)
Definition: octstr.c:342
Definition: octstr.c:118
void charset_latin1_to_gsm(Octstr *ostr)
Definition: charset.c:430
int charset_from_utf8(Octstr *utf8, Octstr **to, Octstr *charset_to)
Definition: charset.c:558
void debug(const char *place, int err, const char *fmt,...)
Definition: log.c:726
void octstr_truncate(Octstr *ostr, int new_len)
Definition: octstr.c:1327
void charset_gsm_to_nrc_iso_21_german(Octstr *ostr)
Definition: charset.c:460
int unichar
Definition: charset.c:106
int octstr_get_char(const Octstr *ostr, long pos)
Definition: octstr.c:406
SMPP_PDU *(* handler)(ESME *, SMPP_PDU *)
Definition: drive_smpp.c:213
void octstr_set_char(Octstr *ostr, long pos, int ch)
Definition: octstr.c:415
#define octstr_create_from_data(data, len)
Definition: octstr.h:134
int charset_gsm_truncate(Octstr *gsm, long max)
Definition: charset.c:512
int charset_convert(Octstr *string, char *charset_from, char *charset_to)
Definition: charset.c:589
int octstr_compare(const Octstr *ostr1, const Octstr *ostr2)
Definition: octstr.c:871
void charset_gsm_to_utf8(Octstr *ostr)
Definition: charset.c:220
See file LICENSE for details about the license agreement for using, modifying, copying or deriving work from this software.