Content-type: text/html; charset=UTF-8
If len is set to SIZE_MAX (stdint.h is already included by grapheme.h) the string str is interpreted to be NUL-terminated and processing stops when a NUL-byte is encountered.
For non-UTF-8 input data grapheme_next_word_break3 can be used instead.
/* cc (-static) -o example example.c -lgrapheme */ #include <grapheme.h> #include <stdint.h> #include <stdio.h> int main(void) { /* UTF-8 encoded input */ char *s = "T\xC3\xABst \xF0\x9F\x91\xA8\xE2\x80\x8D\xF0" "\x9F\x91\xA9\xE2\x80\x8D\xF0\x9F\x91\xA6 \xF0" "\x9F\x87\xBA\xF0\x9F\x87\xB8 \xE0\xA4\xA8\xE0" "\xA5\x80 \xE0\xAE\xA8\xE0\xAE\xBF!"; size_t ret, len, off; printf("Input: \"%s\"\n", s); /* print each word with byte-length */ printf("words in NUL-delimited input:\n"); for (off = 0; s[off] != '\0'; off += ret) { ret = grapheme_next_word_break_utf8(s + off, SIZE_MAX); printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off); } printf("\n"); /* do the same, but this time string is length-delimited */ len = 17; printf("words in input delimited to %zu bytes:\n", len); for (off = 0; off < len; off += ret) { ret = grapheme_next_word_break_utf8(s + off, len - off); printf("%2zu bytes | %.*s\n", ret, (int)ret, s + off); } return 0; }