libunistring is old, slow, messy code and uncompilable on new systems.
remove the sole user of it, and replace it with inline utf8 decoder
implementation from
http://bjoern.hoehrmann.de/utf-8/decoder/dfa/.
improves performance and portability as libunistring is not needed.
---
@Peter, Hugh: Would it be ok for me to push this?
modules/websocket/Makefile | 2 +-
modules/websocket/README | 1 -
modules/websocket/doc/websocket_admin.xml | 3 --
modules/websocket/utf8_decode.h | 52 +++++++++++++++++++++++++++++++
modules/websocket/ws_frame.c | 4 +--
5 files changed, 55 insertions(+), 7 deletions(-)
create mode 100644 modules/websocket/utf8_decode.h
diff --git a/modules/websocket/Makefile b/modules/websocket/Makefile
index bb7c809..c686a82 100644
--- a/modules/websocket/Makefile
+++ b/modules/websocket/Makefile
@@ -27,7 +27,7 @@ else
# E.g.: make TLS_HOOKS=1 TLS_EXTRA_LIBS="-lz -lkrb5"
endif
-LIBS+= $(TLS_EXTRA_LIBS) -lunistring
+LIBS+= $(TLS_EXTRA_LIBS)
# Static linking, if you'd like to use TLS and WEBSOCKET at the same time
#
diff --git a/modules/websocket/README b/modules/websocket/README
index 49d8693..bdba3e4 100644
--- a/modules/websocket/README
+++ b/modules/websocket/README
@@ -316,7 +316,6 @@ onreply_route[WS_REPLY] {
The following libraries must be installed before running Kamailio with
this module loaded:
* OpenSSL.
- * GNU libunistring.
4. Parameters
diff --git a/modules/websocket/doc/websocket_admin.xml
b/modules/websocket/doc/websocket_admin.xml
index fa7d300..e40dc09 100644
--- a/modules/websocket/doc/websocket_admin.xml
+++ b/modules/websocket/doc/websocket_admin.xml
@@ -262,9 +262,6 @@ onreply_route[WS_REPLY] {
<listitem>
<para><emphasis>OpenSSL</emphasis>.</para>
</listitem>
- <listitem>
- <para><emphasis>GNU libunistring</emphasis>.</para>
- </listitem>
</itemizedlist>
</para>
</section>
diff --git a/modules/websocket/utf8_decode.h b/modules/websocket/utf8_decode.h
new file mode 100644
index 0000000..b274fe7
--- /dev/null
+++ b/modules/websocket/utf8_decode.h
@@ -0,0 +1,52 @@
+#include <stdint.h>
+#include <stddef.h>
+
+// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern(a)hoehrmann.de>
+// See
http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
+
+#define UTF8_ACCEPT 0
+#define UTF8_REJECT 12
+
+static const uint8_t utf8d[] = {
+ // The first part of the table maps bytes to character classes that
+ // to reduce the size of the transition table and create bitmasks.
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
+
+ // The second part is a transition table that maps a combination
+ // of a state of the automaton and a character class to a state.
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12,
+};
+
+static inline uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte)
+{
+ uint32_t type = utf8d[byte];
+
+ *codep = (*state != UTF8_ACCEPT) ?
+ (byte & 0x3fu) | (*codep << 6) :
+ (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state + type];
+ return *state;
+}
+
+static inline int IsUTF8(uint8_t* s, size_t len)
+{
+ uint32_t codepoint, state = 0;
+
+ while (len--)
+ decode(&state, &codepoint, *s++);
+
+ return state == UTF8_ACCEPT;
+}
+
diff --git a/modules/websocket/ws_frame.c b/modules/websocket/ws_frame.c
index a3a4cef..3562437 100644
--- a/modules/websocket/ws_frame.c
+++ b/modules/websocket/ws_frame.c
@@ -22,7 +22,7 @@
*/
#include <limits.h>
-#include <unistr.h>
+#include "utf8_decode.h"
#include "../../events.h"
#include "../../receive.h"
#include "../../stats.h"
@@ -695,7 +695,7 @@ int ws_frame_transmit(void *data)
frame.fin = 1;
/* Can't be sure whether this message is UTF-8 or not so check to see
if it "might" be UTF-8 and send as binary if it definitely isn't */
- frame.opcode = (u8_check((uint8_t *) wsev->buf, wsev->len) == NULL) ?
+ frame.opcode = IsUTF8((uint8_t *) wsev->buf, wsev->len) ?
OPCODE_TEXT_FRAME : OPCODE_BINARY_FRAME;
frame.payload_len = wsev->len;
frame.payload_data = wsev->buf;
--
1.8.5.1