Skip to content

Commit 2938967

Browse files
committed
Fixed problem with invalid UTF-8 format text
1 parent aa23ad0 commit 2938967

File tree

1 file changed

+7
-1
lines changed

1 file changed

+7
-1
lines changed

peglib.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,13 @@ inline size_t codepoint_length(const char *s8, size_t l) {
9797

9898
inline size_t codepoint_count(const char *s8, size_t l) {
9999
size_t count = 0;
100-
for (size_t i = 0; i < l; i += codepoint_length(s8 + i, l - i)) {
100+
for (size_t i = 0; i < l;) {
101+
auto len = codepoint_length(s8 + i, l - i);
102+
if (len == 0) {
103+
// Invalid UTF-8 byte, treat as single byte to avoid infinite loop
104+
len = 1;
105+
}
106+
i += len;
101107
count++;
102108
}
103109
return count;

0 commit comments

Comments
 (0)