14#include "ruby/internal/config.h"
24#include "debug_counter.h"
28#include "internal/array.h"
29#include "internal/compar.h"
30#include "internal/compilers.h"
31#include "internal/encoding.h"
32#include "internal/error.h"
33#include "internal/gc.h"
34#include "internal/numeric.h"
35#include "internal/object.h"
36#include "internal/proc.h"
37#include "internal/re.h"
38#include "internal/sanitizers.h"
39#include "internal/string.h"
40#include "internal/transcode.h"
45#include "ruby_assert.h"
49#if defined HAVE_CRYPT_R
50# if defined HAVE_CRYPT_H
53#elif !defined HAVE_CRYPT
54# include "missing/crypt.h"
55# define HAVE_CRYPT_R 1
58#define BEG(no) (regs->beg[(no)])
59#define END(no) (regs->end[(no)])
62#undef rb_usascii_str_new
66#undef rb_usascii_str_new_cstr
67#undef rb_utf8_str_new_cstr
68#undef rb_enc_str_new_cstr
69#undef rb_external_str_new_cstr
70#undef rb_locale_str_new_cstr
71#undef rb_str_dup_frozen
72#undef rb_str_buf_new_cstr
126#define RUBY_MAX_CHAR_LEN 16
127#define STR_PRECOMPUTED_HASH FL_USER4
128#define STR_SHARED_ROOT FL_USER5
129#define STR_BORROWED FL_USER6
130#define STR_TMPLOCK FL_USER7
131#define STR_NOFREE FL_USER18
132#define STR_FAKESTR FL_USER19
134#define STR_SET_NOEMBED(str) do {\
135 FL_SET((str), STR_NOEMBED);\
136 FL_UNSET((str), STR_SHARED | STR_SHARED_ROOT | STR_BORROWED);\
138#define STR_SET_EMBED(str) FL_UNSET((str), STR_NOEMBED | STR_SHARED | STR_NOFREE)
140#define STR_SET_LEN(str, n) do { \
141 RSTRING(str)->len = (n); \
145str_encindex_fastpath(
int encindex)
149 case ENCINDEX_ASCII_8BIT:
151 case ENCINDEX_US_ASCII:
159str_enc_fastpath(
VALUE str)
164#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
165#define TERM_FILL(ptr, termlen) do {\
166 char *const term_fill_ptr = (ptr);\
167 const int term_fill_len = (termlen);\
168 *term_fill_ptr = '\0';\
169 if (UNLIKELY(term_fill_len > 1))\
170 memset(term_fill_ptr, 0, term_fill_len);\
173#define RESIZE_CAPA(str,capacity) do {\
174 const int termlen = TERM_LEN(str);\
175 RESIZE_CAPA_TERM(str,capacity,termlen);\
177#define RESIZE_CAPA_TERM(str,capacity,termlen) do {\
178 if (STR_EMBED_P(str)) {\
179 if (str_embed_capa(str) < capacity + termlen) {\
180 char *const tmp = ALLOC_N(char, (size_t)(capacity) + (termlen));\
181 const long tlen = RSTRING_LEN(str);\
182 memcpy(tmp, RSTRING_PTR(str), tlen);\
183 RSTRING(str)->as.heap.ptr = tmp;\
184 RSTRING(str)->len = tlen;\
185 STR_SET_NOEMBED(str);\
186 RSTRING(str)->as.heap.aux.capa = (capacity);\
190 RUBY_ASSERT(!FL_TEST((str), STR_SHARED)); \
191 SIZED_REALLOC_N(RSTRING(str)->as.heap.ptr, char, \
192 (size_t)(capacity) + (termlen), STR_HEAP_SIZE(str)); \
193 RSTRING(str)->as.heap.aux.capa = (capacity);\
197#define STR_SET_SHARED(str, shared_str) do { \
198 if (!FL_TEST(str, STR_FAKESTR)) { \
199 RUBY_ASSERT(RSTRING_PTR(shared_str) <= RSTRING_PTR(str)); \
200 RUBY_ASSERT(RSTRING_PTR(str) <= RSTRING_PTR(shared_str) + RSTRING_LEN(shared_str)); \
201 RB_OBJ_WRITE((str), &RSTRING(str)->as.heap.aux.shared, (shared_str)); \
202 FL_SET((str), STR_SHARED); \
203 FL_SET((shared_str), STR_SHARED_ROOT); \
204 if (RBASIC_CLASS((shared_str)) == 0) \
205 FL_SET_RAW((shared_str), STR_BORROWED); \
209#define STR_HEAP_PTR(str) (RSTRING(str)->as.heap.ptr)
210#define STR_HEAP_SIZE(str) ((size_t)RSTRING(str)->as.heap.aux.capa + TERM_LEN(str))
213#define STR_ENC_GET(str) get_encoding(str)
215#if !defined SHARABLE_MIDDLE_SUBSTRING
216# define SHARABLE_MIDDLE_SUBSTRING 0
218#if !SHARABLE_MIDDLE_SUBSTRING
219#define SHARABLE_SUBSTRING_P(beg, len, end) ((beg) + (len) == (end))
221#define SHARABLE_SUBSTRING_P(beg, len, end) 1
226str_embed_capa(
VALUE str)
228 return rb_gc_obj_slot_size(str) - offsetof(
struct RString, as.
embed.ary);
232rb_str_reembeddable_p(
VALUE str)
234 return !
FL_TEST(str, STR_NOFREE|STR_SHARED_ROOT|STR_SHARED);
238rb_str_embed_size(
long capa)
244rb_str_size_as_embedded(
VALUE str)
247 if (STR_EMBED_P(str)) {
248 real_size = rb_str_embed_size(
RSTRING(str)->
len) + TERM_LEN(str);
252 else if (rb_str_reembeddable_p(str)) {
253 real_size = rb_str_embed_size(
RSTRING(str)->as.heap.aux.capa) + TERM_LEN(str);
256 real_size =
sizeof(
struct RString);
260 real_size +=
sizeof(st_index_t);
267STR_EMBEDDABLE_P(
long len,
long termlen)
269 return rb_gc_size_allocatable_p(rb_str_embed_size(
len + termlen));
274static VALUE str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding);
275static VALUE str_new_static(
VALUE klass,
const char *ptr,
long len,
int encindex);
277static void str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen);
278static inline void str_modifiable(
VALUE str);
283str_make_independent(
VALUE str)
286 int termlen = TERM_LEN(str);
287 str_make_independent_expand((str),
len, 0L, termlen);
290static inline int str_dependent_p(
VALUE str);
293rb_str_make_independent(
VALUE str)
295 if (str_dependent_p(str)) {
296 str_make_independent(str);
301rb_str_make_embedded(
VALUE str)
306 char *buf =
RSTRING(str)->as.heap.ptr;
310 STR_SET_LEN(str,
len);
317 TERM_FILL(
RSTRING(str)->
as.embed.ary +
len, TERM_LEN(str));
321rb_debug_rstring_null_ptr(
const char *func)
323 fprintf(stderr,
"%s is returning NULL!! "
324 "SIGSEGV is highly expected to follow immediately.\n"
325 "If you could reproduce, attach your debugger here, "
326 "and look at the passed string.\n",
331static VALUE sym_ascii, sym_turkic, sym_lithuanian, sym_fold;
334get_encoding(
VALUE str)
340mustnot_broken(
VALUE str)
342 if (is_broken_string(str)) {
343 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(STR_ENC_GET(str)));
348mustnot_wchar(
VALUE str)
350 rb_encoding *enc = STR_ENC_GET(str);
351 if (rb_enc_mbminlen(enc) > 1) {
352 rb_raise(rb_eArgError,
"wide char encoding: %s", rb_enc_name(enc));
358static VALUE register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash);
360#if SIZEOF_LONG == SIZEOF_VOIDP
361#define PRECOMPUTED_FAKESTR_HASH 1
365#ifdef PRECOMPUTED_FAKESTR_HASH
367fstring_hash(
VALUE str)
371 return (st_index_t)
RSTRING(str)->as.heap.aux.capa;
378#define fstring_hash rb_str_hash
381const struct st_hash_type rb_fstring_hash_type = {
386#define BARE_STRING_P(str) (!FL_ANY_RAW(str, FL_EXIVAR) && RBASIC_CLASS(str) == rb_cString)
388static inline st_index_t
389str_do_hash(
VALUE str)
391 st_index_t h =
rb_memhash((
const void *)RSTRING_PTR(str), RSTRING_LEN(str));
393 if (e && !is_ascii_string(str)) {
400str_store_precomputed_hash(
VALUE str, st_index_t hash)
406 size_t used_bytes = (RSTRING_LEN(str) + TERM_LEN(str));
407 size_t free_bytes = str_embed_capa(str) - used_bytes;
411 memcpy(
RSTRING_END(str) + TERM_LEN(str), &hash,
sizeof(hash));
413 FL_SET(str, STR_PRECOMPUTED_HASH);
421 bool force_precompute_hash;
425fstr_update_callback(st_data_t *key, st_data_t *value, st_data_t data,
int existing)
434 if (rb_objspace_garbage_object_p(str)) {
453 long len = RSTRING_LEN(str);
454 long capa =
len +
sizeof(st_index_t);
455 int term_len = TERM_LEN(str);
457 if (arg->force_precompute_hash && STR_EMBEDDABLE_P(
capa, term_len)) {
459 memcpy(RSTRING_PTR(new_str), RSTRING_PTR(str),
len);
460 STR_SET_LEN(new_str, RSTRING_LEN(str));
462 rb_enc_copy(new_str, str);
463 str_store_precomputed_hash(new_str, fstring_hash(str));
467 rb_enc_copy(new_str, str);
468#ifdef PRECOMPUTED_FAKESTR_HASH
469 if (
rb_str_capacity(new_str) >= RSTRING_LEN(str) + term_len +
sizeof(st_index_t)) {
470 str_store_precomputed_hash(new_str, (st_index_t)
RSTRING(str)->as.heap.aux.capa);
484 if (!
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
487 if (STR_SHARED_P(str)) {
489 str_make_independent(str);
492 if (!BARE_STRING_P(str)) {
498 RBASIC(str)->flags |= RSTRING_FSTR;
500 *key = *value = arg->fstr = str;
513 if (
FL_TEST(str, RSTRING_FSTR))
516 bare = BARE_STRING_P(str);
518 if (STR_EMBED_P(str)) {
523 if (
FL_TEST_RAW(str, STR_SHARED_ROOT | STR_SHARED) == STR_SHARED_ROOT) {
530 rb_str_resize(str, RSTRING_LEN(str));
532 fstr = register_fstring(str,
false,
false);
535 str_replace_shared_without_enc(str, fstr);
543register_fstring(
VALUE str,
bool copy,
bool force_precompute_hash)
547 .force_precompute_hash = force_precompute_hash
550#if SIZEOF_VOIDP == SIZEOF_LONG
554 RSTRING(str)->as.heap.aux.capa = (long)str_do_hash(str);
560 st_table *frozen_strings = rb_vm_fstring_table();
563 st_update(frozen_strings, (st_data_t)str, fstr_update_callback, (st_data_t)&args);
564 }
while (UNDEF_P(args.fstr));
577setup_fake_str(
struct RString *fake_str,
const char *name,
long len,
int encidx)
592 return (
VALUE)fake_str;
599rb_setup_fake_str(
struct RString *fake_str,
const char *name,
long len, rb_encoding *enc)
601 return setup_fake_str(fake_str, name,
len, rb_enc_to_index(enc));
610rb_fstring_new(
const char *ptr,
long len)
612 struct RString fake_str = {RBASIC_INIT};
613 return register_fstring(setup_fake_str(&fake_str, ptr,
len, ENCINDEX_US_ASCII),
false,
false);
617rb_fstring_enc_new(
const char *ptr,
long len, rb_encoding *enc)
619 struct RString fake_str = {RBASIC_INIT};
620 return register_fstring(rb_setup_fake_str(&fake_str, ptr,
len, enc),
false,
false);
624rb_fstring_cstr(
const char *ptr)
626 return rb_fstring_new(ptr, strlen(ptr));
630fstring_set_class_i(st_data_t key, st_data_t val, st_data_t arg)
640 const char *aptr, *bptr;
643 return (alen != blen ||
645 memcmp(aptr, bptr, alen) != 0);
649single_byte_optimizable(
VALUE str)
653 case ENCINDEX_ASCII_8BIT:
654 case ENCINDEX_US_ASCII:
676static inline const char *
677search_nonascii(
const char *p,
const char *e)
681#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)
682# if SIZEOF_UINTPTR_T == 8
683# define NONASCII_MASK UINT64_C(0x8080808080808080)
684# elif SIZEOF_UINTPTR_T == 4
685# define NONASCII_MASK UINT32_C(0x80808080)
687# error "don't know what to do."
690# if SIZEOF_UINTPTR_T == 8
691# define NONASCII_MASK ((uintptr_t)0x80808080UL << 32 | (uintptr_t)0x80808080UL)
692# elif SIZEOF_UINTPTR_T == 4
693# define NONASCII_MASK 0x80808080UL
695# error "don't know what to do."
699 if (UNALIGNED_WORD_ACCESS || e - p >= SIZEOF_VOIDP) {
700#if !UNALIGNED_WORD_ACCESS
701 if ((uintptr_t)p % SIZEOF_VOIDP) {
702 int l = SIZEOF_VOIDP - (uintptr_t)p % SIZEOF_VOIDP;
707 case 7:
if (p[-7]&0x80)
return p-7;
708 case 6:
if (p[-6]&0x80)
return p-6;
709 case 5:
if (p[-5]&0x80)
return p-5;
710 case 4:
if (p[-4]&0x80)
return p-4;
712 case 3:
if (p[-3]&0x80)
return p-3;
713 case 2:
if (p[-2]&0x80)
return p-2;
714 case 1:
if (p[-1]&0x80)
return p-1;
719#if defined(HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED) &&! UNALIGNED_WORD_ACCESS
720#define aligned_ptr(value) \
721 __builtin_assume_aligned((value), sizeof(uintptr_t))
723#define aligned_ptr(value) (value)
726 t = (e - (SIZEOF_VOIDP-1));
728 for (;s < t; s +=
sizeof(uintptr_t)) {
730 memcpy(&word, s,
sizeof(word));
731 if (word & NONASCII_MASK) {
732#ifdef WORDS_BIGENDIAN
733 return (
const char *)s + (nlz_intptr(word&NONASCII_MASK)>>3);
735 return (
const char *)s + (ntz_intptr(word&NONASCII_MASK)>>3);
745 case 7:
if (e[-7]&0x80)
return e-7;
746 case 6:
if (e[-6]&0x80)
return e-6;
747 case 5:
if (e[-5]&0x80)
return e-5;
748 case 4:
if (e[-4]&0x80)
return e-4;
750 case 3:
if (e[-3]&0x80)
return e-3;
751 case 2:
if (e[-2]&0x80)
return e-2;
752 case 1:
if (e[-1]&0x80)
return e-1;
758coderange_scan(
const char *p,
long len, rb_encoding *enc)
760 const char *e = p +
len;
764 p = search_nonascii(p, e);
768 if (rb_enc_asciicompat(enc)) {
769 p = search_nonascii(p, e);
772 int ret = rb_enc_precise_mbclen(p, e, enc);
776 p = search_nonascii(p, e);
782 int ret = rb_enc_precise_mbclen(p, e, enc);
801 p = search_nonascii(p, e);
805 else if (rb_enc_asciicompat(enc)) {
806 p = search_nonascii(p, e);
812 int ret = rb_enc_precise_mbclen(p, e, enc);
819 p = search_nonascii(p, e);
825 int ret = rb_enc_precise_mbclen(p, e, enc);
850 rb_enc_set_index(str1, rb_enc_get_index(str2));
858rb_enc_cr_str_copy_for_substr(
VALUE dest,
VALUE src)
863 str_enc_copy(dest, src);
865 if (!rb_enc_asciicompat(STR_ENC_GET(src)))
876 if (!rb_enc_asciicompat(STR_ENC_GET(src)) ||
888rb_enc_cr_str_exact_copy(
VALUE dest,
VALUE src)
890 str_enc_copy(dest, src);
895enc_coderange_scan(
VALUE str, rb_encoding *enc)
901rb_enc_str_coderange_scan(
VALUE str, rb_encoding *enc)
903 return enc_coderange_scan(str, enc);
912 cr = enc_coderange_scan(str, get_encoding(str));
919rb_enc_str_asciicompat(
VALUE str)
922 return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
930 return rb_enc_str_asciicompat(str) && is_ascii_string(str);
939str_mod_check(
VALUE s,
const char *p,
long len)
947str_capacity(
VALUE str,
const int termlen)
949 if (STR_EMBED_P(str)) {
950 return str_embed_capa(str) - termlen;
952 else if (
FL_ANY_RAW(str, STR_SHARED|STR_NOFREE)) {
956 return RSTRING(str)->as.heap.aux.capa;
963 return str_capacity(str, TERM_LEN(str));
967must_not_null(
const char *ptr)
970 rb_raise(rb_eArgError,
"NULL pointer given");
977 size_t size = rb_str_embed_size(
capa);
981 NEWOBJ_OF(str,
struct RString, klass,
988str_alloc_heap(
VALUE klass)
990 NEWOBJ_OF(str,
struct RString, klass,
997empty_str_alloc(
VALUE klass)
999 RUBY_DTRACE_CREATE_HOOK(STRING, 0);
1000 VALUE str = str_alloc_embed(klass, 0);
1001 memset(
RSTRING(str)->
as.embed.ary, 0, str_embed_capa(str));
1007str_enc_new(
VALUE klass,
const char *ptr,
long len, rb_encoding *enc)
1012 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1019 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1021 int termlen = rb_enc_mbminlen(enc);
1023 if (STR_EMBEDDABLE_P(
len, termlen)) {
1024 str = str_alloc_embed(klass,
len + termlen);
1030 str = str_alloc_heap(klass);
1036 rb_xmalloc_mul_add_mul(
sizeof(
char),
len,
sizeof(
char), termlen);
1039 rb_enc_raw_set(str, enc);
1045 STR_SET_LEN(str,
len);
1051str_new(
VALUE klass,
const char *ptr,
long len)
1075rb_enc_str_new(
const char *ptr,
long len, rb_encoding *enc)
1088 __msan_unpoison_string(ptr);
1108 if (rb_enc_mbminlen(enc) != 1) {
1109 rb_raise(rb_eArgError,
"wchar encoding given");
1111 return rb_enc_str_new(ptr, strlen(ptr), enc);
1115str_new_static(
VALUE klass,
const char *ptr,
long len,
int encindex)
1120 rb_raise(rb_eArgError,
"negative string size (or size too big)");
1124 str = str_enc_new(klass, ptr,
len, rb_enc_from_index(encindex));
1127 RUBY_DTRACE_CREATE_HOOK(STRING,
len);
1128 str = str_alloc_heap(klass);
1130 RSTRING(str)->as.heap.ptr = (
char *)ptr;
1132 RBASIC(str)->flags |= STR_NOFREE;
1133 rb_enc_associate_index(str, encindex);
1147 return str_new_static(
rb_cString, ptr,
len, ENCINDEX_US_ASCII);
1153 return str_new_static(
rb_cString, ptr,
len, ENCINDEX_UTF_8);
1159 return str_new_static(
rb_cString, ptr,
len, rb_enc_to_index(enc));
1162static VALUE str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *ptr,
long len,
1163 rb_encoding *from, rb_encoding *to,
1164 int ecflags,
VALUE ecopts);
1167is_enc_ascii_string(
VALUE str, rb_encoding *enc)
1169 int encidx = rb_enc_to_index(enc);
1170 if (rb_enc_get_index(str) == encidx)
1171 return is_ascii_string(str);
1182 if (!to)
return str;
1183 if (!from) from = rb_enc_get(str);
1184 if (from == to)
return str;
1185 if ((rb_enc_asciicompat(to) && is_enc_ascii_string(str, from)) ||
1186 rb_is_ascii8bit_enc(to)) {
1187 if (STR_ENC_GET(str) != to) {
1189 rb_enc_associate(str, to);
1196 from, to, ecflags, ecopts);
1197 if (
NIL_P(newstr)) {
1205rb_str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *ptr,
long len,
1206 rb_encoding *from,
int ecflags,
VALUE ecopts)
1211 if (ofs < -olen || olen < ofs)
1213 if (ofs < 0) ofs += olen;
1215 STR_SET_LEN(newstr, ofs);
1219 rb_str_modify(newstr);
1220 return str_cat_conv_enc_opts(newstr, ofs, ptr,
len, from,
1226rb_str_initialize(
VALUE str,
const char *ptr,
long len, rb_encoding *enc)
1228 STR_SET_LEN(str, 0);
1229 rb_enc_associate(str, enc);
1235str_cat_conv_enc_opts(
VALUE newstr,
long ofs,
const char *ptr,
long len,
1236 rb_encoding *from, rb_encoding *to,
1237 int ecflags,
VALUE ecopts)
1242 VALUE econv_wrapper;
1243 const unsigned char *start, *sp;
1244 unsigned char *dest, *dp;
1245 size_t converted_output = (size_t)ofs;
1250 RBASIC_CLEAR_CLASS(econv_wrapper);
1252 if (!ec)
return Qnil;
1255 sp = (
unsigned char*)ptr;
1257 while ((dest = (
unsigned char*)
RSTRING_PTR(newstr)),
1258 (dp = dest + converted_output),
1262 size_t converted_input = sp - start;
1263 size_t rest =
len - converted_input;
1264 converted_output = dp - dest;
1266 if (converted_input && converted_output &&
1267 rest < (LONG_MAX / converted_output)) {
1268 rest = (rest * converted_output) / converted_input;
1273 olen += rest < 2 ? 2 : rest;
1274 rb_str_resize(newstr, olen);
1283 rb_enc_associate(newstr, to);
1302 const int eidx = rb_enc_to_index(eenc);
1305 return rb_enc_str_new(ptr,
len, eenc);
1315 if (!ienc || eenc == ienc) {
1316 return rb_enc_str_new(ptr,
len, eenc);
1322 (rb_enc_asciicompat(eenc) && !search_nonascii(ptr, ptr +
len))) {
1323 return rb_enc_str_new(ptr,
len, ienc);
1326 str = rb_enc_str_new(NULL, 0, ienc);
1329 if (
NIL_P(rb_str_cat_conv_enc_opts(str, 0, ptr,
len, eenc, 0,
Qnil))) {
1330 rb_str_initialize(str, ptr,
len, eenc);
1336rb_external_str_with_enc(
VALUE str, rb_encoding *eenc)
1338 int eidx = rb_enc_to_index(eenc);
1340 !is_ascii_string(str)) {
1344 rb_enc_associate_index(str, eidx);
1403str_replace_shared_without_enc(
VALUE str2,
VALUE str)
1405 const int termlen = TERM_LEN(str);
1410 if (str_embed_capa(str2) >=
len + termlen) {
1411 char *ptr2 =
RSTRING(str2)->as.embed.ary;
1412 STR_SET_EMBED(str2);
1414 TERM_FILL(ptr2+
len, termlen);
1418 if (STR_SHARED_P(str)) {
1419 root =
RSTRING(str)->as.heap.aux.shared;
1428 if (!STR_EMBED_P(str2) && !
FL_TEST_RAW(str2, STR_SHARED|STR_NOFREE)) {
1430 rb_fatal(
"about to free a possible shared root");
1432 char *ptr2 = STR_HEAP_PTR(str2);
1434 ruby_sized_xfree(ptr2, STR_HEAP_SIZE(str2));
1437 FL_SET(str2, STR_NOEMBED);
1438 RSTRING(str2)->as.heap.ptr = ptr;
1439 STR_SET_SHARED(str2, root);
1442 STR_SET_LEN(str2,
len);
1450 str_replace_shared_without_enc(str2, str);
1451 rb_enc_cr_str_exact_copy(str2, str);
1458 return str_replace_shared(str_alloc_heap(klass), str);
1475rb_str_new_frozen_String(
VALUE orig)
1482rb_str_tmp_frozen_acquire(
VALUE orig)
1485 return str_new_frozen_buffer(0, orig, FALSE);
1489rb_str_tmp_frozen_no_embed_acquire(
VALUE orig)
1491 if (
OBJ_FROZEN_RAW(orig) && !STR_EMBED_P(orig) && !rb_str_reembeddable_p(orig))
return orig;
1492 if (STR_SHARED_P(orig) && !STR_EMBED_P(
RSTRING(orig)->
as.heap.aux.shared))
return rb_str_tmp_frozen_acquire(orig);
1494 VALUE str = str_alloc_heap(0);
1497 FL_SET(str, STR_SHARED_ROOT);
1499 size_t capa = str_capacity(orig, TERM_LEN(orig));
1505 if (STR_EMBED_P(orig) ||
FL_TEST_RAW(orig, STR_SHARED | STR_SHARED_ROOT | RSTRING_FSTR)) {
1506 RSTRING(str)->as.heap.ptr = rb_xmalloc_mul_add_mul(
sizeof(
char),
capa,
sizeof(
char), TERM_LEN(orig));
1513 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1514 RBASIC(orig)->flags &= ~STR_NOFREE;
1515 STR_SET_SHARED(orig, str);
1525rb_str_tmp_frozen_release(
VALUE orig,
VALUE tmp)
1530 if (STR_EMBED_P(tmp)) {
1543 RSTRING(orig)->as.heap.aux.capa =
RSTRING(tmp)->as.heap.aux.capa;
1544 RBASIC(orig)->flags |=
RBASIC(tmp)->flags & STR_NOFREE;
1549 STR_SET_LEN(tmp, 0);
1557 return str_new_frozen_buffer(klass, orig, TRUE);
1566 VALUE str = str_alloc_heap(klass);
1569 RSTRING(str)->as.heap.aux.capa =
RSTRING(orig)->as.heap.aux.capa;
1570 RBASIC(str)->flags |=
RBASIC(orig)->flags & STR_NOFREE;
1571 RBASIC(orig)->flags &= ~STR_NOFREE;
1572 STR_SET_SHARED(orig, str);
1579str_new_frozen_buffer(
VALUE klass,
VALUE orig,
int copy_encoding)
1584 rb_encoding *enc = copy_encoding ? STR_ENC_GET(orig) : rb_ascii8bit_encoding();
1585 int termlen = copy_encoding ? TERM_LEN(orig) : 1;
1587 if (STR_EMBED_P(orig) || STR_EMBEDDABLE_P(
len, termlen)) {
1601 if ((ofs > 0) || (rest > 0) ||
1604 str = str_new_shared(klass,
shared);
1606 RSTRING(str)->as.heap.ptr += ofs;
1607 STR_SET_LEN(str,
RSTRING_LEN(str) - (ofs + rest));
1615 else if (STR_EMBEDDABLE_P(
RSTRING_LEN(orig), TERM_LEN(orig))) {
1616 str = str_alloc_embed(klass,
RSTRING_LEN(orig) + TERM_LEN(orig));
1624 str = heap_str_make_shared(klass, orig);
1628 if (copy_encoding) rb_enc_cr_str_exact_copy(str, orig);
1640str_new_empty_String(
VALUE str)
1643 rb_enc_copy(v, str);
1647#define STR_BUF_MIN_SIZE 63
1652 if (STR_EMBEDDABLE_P(
capa, 1)) {
1660 RSTRING(str)->as.heap.ptr[0] =
'\0';
1669 long len = strlen(ptr);
1680 return str_new(0, 0,
len);
1686 if (STR_EMBED_P(str)) {
1687 RB_DEBUG_COUNTER_INC(obj_str_embed);
1689 else if (
FL_TEST(str, STR_SHARED | STR_NOFREE)) {
1690 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_SHARED));
1691 (void)RB_DEBUG_COUNTER_INC_IF(obj_str_shared,
FL_TEST(str, STR_NOFREE));
1694 RB_DEBUG_COUNTER_INC(obj_str_ptr);
1695 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
1700rb_str_memsize(
VALUE str)
1702 if (
FL_TEST(str, STR_NOEMBED|STR_SHARED|STR_NOFREE) == STR_NOEMBED) {
1703 return STR_HEAP_SIZE(str);
1713 return rb_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
1716static inline void str_discard(
VALUE str);
1717static void str_shared_replace(
VALUE str,
VALUE str2);
1722 if (str != str2) str_shared_replace(str, str2);
1733 enc = STR_ENC_GET(str2);
1736 termlen = rb_enc_mbminlen(enc);
1740 if (str_embed_capa(str) >=
RSTRING_LEN(str2) + termlen) {
1743 rb_enc_associate(str, enc);
1747 if (STR_EMBED_P(str2)) {
1752 char *new_ptr =
ALLOC_N(
char,
len + termlen);
1753 memcpy(new_ptr,
RSTRING(str2)->
as.embed.ary,
len + termlen);
1754 RSTRING(str2)->as.heap.ptr = new_ptr;
1755 STR_SET_LEN(str2,
len);
1757 STR_SET_NOEMBED(str2);
1760 STR_SET_NOEMBED(str);
1764 if (
FL_TEST(str2, STR_SHARED)) {
1766 STR_SET_SHARED(str,
shared);
1769 RSTRING(str)->as.heap.aux.capa =
RSTRING(str2)->as.heap.aux.capa;
1773 STR_SET_EMBED(str2);
1775 STR_SET_LEN(str2, 0);
1776 rb_enc_associate(str, enc);
1790 return rb_obj_as_string_result(str, obj);
1807 if (STR_SHARED_P(str2)) {
1810 STR_SET_NOEMBED(str);
1811 STR_SET_LEN(str,
len);
1813 STR_SET_SHARED(str,
shared);
1814 rb_enc_cr_str_exact_copy(str, str2);
1817 str_replace_shared(str, str2);
1826 size_t size = rb_str_embed_size(
capa);
1830 NEWOBJ_OF(str,
struct RString, klass,
1839 NEWOBJ_OF(str,
struct RString, klass,
1850 encidx = rb_enc_get_index(str);
1854 if (encidx) rb_enc_associate_index(dup, encidx);
1870 return str_duplicate_setup_encoding(str, dup, flags);
1879 root =
RSTRING(str)->as.heap.aux.shared;
1881 else if (UNLIKELY(!(flags &
FL_FREEZE))) {
1882 root = str = str_new_frozen(klass, str);
1889 FL_SET(root, STR_SHARED_ROOT);
1891 flags |= RSTRING_NOEMBED | STR_SHARED;
1894 return str_duplicate_setup_encoding(str, dup, flags);
1900 if (STR_EMBED_P(str)) {
1901 return str_duplicate_setup_embed(klass, str, dup);
1904 return str_duplicate_setup_heap(klass, str, dup);
1912 if (STR_EMBED_P(str)) {
1913 dup = str_alloc_embed(klass,
RSTRING_LEN(str) + TERM_LEN(str));
1916 dup = str_alloc_heap(klass);
1919 return str_duplicate_setup(klass, str, dup);
1930rb_str_dup_m(
VALUE str)
1932 if (LIKELY(BARE_STRING_P(str))) {
1943 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1950 RUBY_DTRACE_CREATE_HOOK(STRING,
RSTRING_LEN(str));
1954 new_str = ec_str_alloc_embed(ec, klass,
RSTRING_LEN(str) + TERM_LEN(str));
1955 str_duplicate_setup_embed(klass, str, new_str);
1958 new_str = ec_str_alloc_heap(ec, klass);
1959 str_duplicate_setup_heap(klass, str, new_str);
1968rb_str_with_debug_created_info(
VALUE str,
VALUE path,
int line)
1970 VALUE debug_info = rb_ary_new_from_args(2, path,
INT2FIX(line));
1972 rb_ivar_set(str, id_debug_created_info, rb_ary_freeze(debug_info));
1989 static ID keyword_ids[2];
1990 VALUE orig, opt, venc, vcapa;
1992 rb_encoding *enc = 0;
1995 if (!keyword_ids[0]) {
1996 keyword_ids[0] = rb_id_encoding();
1997 CONST_ID(keyword_ids[1],
"capacity");
2005 if (!UNDEF_P(venc) && !
NIL_P(venc)) {
2006 enc = rb_to_encoding(venc);
2008 if (!UNDEF_P(vcapa) && !
NIL_P(vcapa)) {
2011 int termlen = enc ? rb_enc_mbminlen(enc) : 1;
2013 if (
capa < STR_BUF_MIN_SIZE) {
2014 capa = STR_BUF_MIN_SIZE;
2022 if (orig == str) n = 0;
2024 str_modifiable(str);
2025 if (STR_EMBED_P(str) ||
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2027 const size_t size = (size_t)
capa + termlen;
2029 const size_t osize =
RSTRING_LEN(str) + TERM_LEN(str);
2030 char *new_ptr =
ALLOC_N(
char, size);
2031 if (STR_EMBED_P(str))
RUBY_ASSERT((
long)osize <= str_embed_capa(str));
2032 memcpy(new_ptr, old_ptr, osize < size ? osize : size);
2034 RSTRING(str)->as.heap.ptr = new_ptr;
2036 else if (STR_HEAP_SIZE(str) != (
size_t)
capa + termlen) {
2037 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
2038 (
size_t)
capa + termlen, STR_HEAP_SIZE(str));
2040 STR_SET_LEN(str,
len);
2043 memcpy(
RSTRING(str)->
as.heap.ptr, RSTRING_PTR(orig),
len);
2044 rb_enc_cr_str_exact_copy(str, orig);
2046 FL_SET(str, STR_NOEMBED);
2053 rb_enc_associate(str, enc);
2065rb_str_s_new(
int argc,
VALUE *argv,
VALUE klass)
2071 static ID keyword_ids[2];
2074 rb_encoding *enc = NULL;
2081 keyword_ids[0] = rb_id_encoding();
2082 CONST_ID(keyword_ids[1],
"capacity");
2084 encoding = kwargs[0];
2085 capacity = kwargs[1];
2094 if (UNDEF_P(encoding)) {
2096 encoding = rb_obj_encoding(orig);
2100 if (!UNDEF_P(encoding)) {
2101 enc = rb_to_encoding(encoding);
2105 if (UNDEF_P(capacity)) {
2107 VALUE empty_str = str_new(klass,
"", 0);
2109 rb_enc_associate(empty_str, enc);
2113 VALUE copy = str_duplicate(klass, orig);
2114 rb_enc_associate(copy, enc);
2127 if (orig_capa >
capa) {
2132 VALUE str = str_enc_new(klass, NULL,
capa, enc);
2133 STR_SET_LEN(str, 0);
2144#define is_utf8_lead_byte(c) (((c)&0xC0) != 0x80)
2159static inline uintptr_t
2160count_utf8_lead_bytes_with_word(
const uintptr_t *s)
2165 d = (d>>6) | (~d>>7);
2166 d &= NONASCII_MASK >> 7;
2169#if defined(HAVE_BUILTIN___BUILTIN_POPCOUNT) && defined(__POPCNT__)
2171 return rb_popcount_intptr(d);
2175# if SIZEOF_VOIDP == 8
2184enc_strlen(
const char *p,
const char *e, rb_encoding *enc,
int cr)
2190 long diff = (long)(e - p);
2191 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2196 if ((
int)
sizeof(uintptr_t) * 2 < e - p) {
2197 const uintptr_t *s, *t;
2198 const uintptr_t lowbits =
sizeof(uintptr_t) - 1;
2199 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2200 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2201 while (p < (
const char *)s) {
2202 if (is_utf8_lead_byte(*p))
len++;
2206 len += count_utf8_lead_bytes_with_word(s);
2209 p = (
const char *)s;
2212 if (is_utf8_lead_byte(*p))
len++;
2218 else if (rb_enc_asciicompat(enc)) {
2223 q = search_nonascii(p, e);
2229 p += rb_enc_fast_mbclen(p, e, enc);
2236 q = search_nonascii(p, e);
2242 p += rb_enc_mbclen(p, e, enc);
2249 for (c=0; p<e; c++) {
2250 p += rb_enc_mbclen(p, e, enc);
2265rb_enc_strlen_cr(
const char *p,
const char *e, rb_encoding *enc,
int *cr)
2273 long diff = (long)(e - p);
2274 return diff / rb_enc_mbminlen(enc) + !!(diff % rb_enc_mbminlen(enc));
2276 else if (rb_enc_asciicompat(enc)) {
2280 q = search_nonascii(p, e);
2288 ret = rb_enc_precise_mbclen(p, e, enc);
2303 for (c=0; p<e; c++) {
2304 ret = rb_enc_precise_mbclen(p, e, enc);
2311 if (p + rb_enc_mbminlen(enc) <= e)
2312 p += rb_enc_mbminlen(enc);
2323str_strlen(
VALUE str, rb_encoding *enc)
2328 if (single_byte_optimizable(str))
return RSTRING_LEN(str);
2329 if (!enc) enc = STR_ENC_GET(str);
2335 long n = rb_enc_strlen_cr(p, e, enc, &cr);
2340 return enc_strlen(p, e, enc, cr);
2347 return str_strlen(str, NULL);
2361 return LONG2NUM(str_strlen(str, NULL));
2373rb_str_bytesize(
VALUE str)
2391rb_str_empty(
VALUE str)
2411 char *ptr1, *ptr2, *ptr3;
2416 enc = rb_enc_check_str(str1, str2);
2419 termlen = rb_enc_mbminlen(enc);
2420 if (len1 > LONG_MAX - len2) {
2421 rb_raise(rb_eArgError,
"string size too big");
2423 str3 = str_enc_new(
rb_cString, 0, len1+len2, enc);
2425 memcpy(ptr3, ptr1, len1);
2426 memcpy(ptr3+len1, ptr2, len2);
2427 TERM_FILL(&ptr3[len1+len2], termlen);
2443 MAYBE_UNUSED(
char) *ptr1, *ptr2;
2446 int enc1 = rb_enc_get_index(str1);
2447 int enc2 = rb_enc_get_index(str2);
2452 else if (enc2 < 0) {
2455 else if (enc1 != enc2) {
2458 else if (len1 > LONG_MAX - len2) {
2491 rb_enc_copy(str2, str);
2496 rb_raise(rb_eArgError,
"negative argument");
2499 if (STR_EMBEDDABLE_P(
len, 1)) {
2508 STR_SET_LEN(str2,
len);
2509 rb_enc_copy(str2, str);
2513 rb_raise(rb_eArgError,
"argument too big");
2517 termlen = TERM_LEN(str);
2523 while (n <=
len/2) {
2524 memcpy(ptr2 + n, ptr2, n);
2527 memcpy(ptr2 + n, ptr2,
len-n);
2529 STR_SET_LEN(str2,
len);
2530 TERM_FILL(&ptr2[
len], termlen);
2531 rb_enc_cr_str_copy_for_substr(str2, str);
2557 VALUE tmp = rb_check_array_type(arg);
2568rb_check_lockedtmp(
VALUE str)
2570 if (
FL_TEST(str, STR_TMPLOCK)) {
2577#define STR_UNMODIFIABLE_MASK (FL_FREEZE | STR_TMPLOCK | STR_CHILLED)
2579str_modifiable(
VALUE str)
2581 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_UNMODIFIABLE_MASK))) {
2582 if (CHILLED_STRING_P(str)) {
2583 CHILLED_STRING_MUTATED(str);
2585 rb_check_lockedtmp(str);
2586 rb_check_frozen(str);
2591str_dependent_p(
VALUE str)
2593 if (STR_EMBED_P(str) || !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2603#define STR_DEPENDANT_MASK (STR_UNMODIFIABLE_MASK | STR_SHARED | STR_NOFREE)
2605str_independent(
VALUE str)
2607 if (RB_UNLIKELY(
FL_ANY_RAW(str, STR_DEPENDANT_MASK))) {
2608 str_modifiable(str);
2609 return !str_dependent_p(str);
2615str_make_independent_expand(
VALUE str,
long len,
long expand,
const int termlen)
2623 if (!STR_EMBED_P(str) && str_embed_capa(str) >=
capa + termlen) {
2624 ptr =
RSTRING(str)->as.heap.ptr;
2628 STR_SET_LEN(str,
len);
2635 memcpy(ptr, oldptr,
len);
2637 if (
FL_TEST_RAW(str, STR_NOEMBED|STR_NOFREE|STR_SHARED) == STR_NOEMBED) {
2640 STR_SET_NOEMBED(str);
2641 FL_UNSET(str, STR_SHARED|STR_NOFREE);
2642 TERM_FILL(ptr +
len, termlen);
2643 RSTRING(str)->as.heap.ptr = ptr;
2644 STR_SET_LEN(str,
len);
2651 if (!str_independent(str))
2652 str_make_independent(str);
2659 int termlen = TERM_LEN(str);
2663 rb_raise(rb_eArgError,
"negative expanding string size");
2665 if (expand >= LONG_MAX -
len) {
2666 rb_raise(rb_eArgError,
"string size too big");
2669 if (!str_independent(str)) {
2670 str_make_independent_expand(str,
len, expand, termlen);
2672 else if (expand > 0) {
2673 RESIZE_CAPA_TERM(str,
len + expand, termlen);
2680str_modify_keep_cr(
VALUE str)
2682 if (!str_independent(str))
2683 str_make_independent(str);
2690str_discard(
VALUE str)
2692 str_modifiable(str);
2693 if (!STR_EMBED_P(str) && !
FL_TEST(str, STR_SHARED|STR_NOFREE)) {
2694 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
2695 RSTRING(str)->as.heap.ptr = 0;
2696 STR_SET_LEN(str, 0);
2703 int encindex = rb_enc_get_index(str);
2705 if (RB_UNLIKELY(encindex == -1)) {
2709 if (RB_LIKELY(str_encindex_fastpath(encindex))) {
2713 rb_encoding *enc = rb_enc_from_index(encindex);
2714 if (!rb_enc_asciicompat(enc)) {
2738zero_filled(
const char *s,
int n)
2740 for (; n > 0; --n) {
2747str_null_char(
const char *s,
long len,
const int minlen, rb_encoding *enc)
2749 const char *e = s +
len;
2751 for (; s + minlen <= e; s += rb_enc_mbclen(s, e, enc)) {
2752 if (zero_filled(s, minlen))
return s;
2758str_fill_term(
VALUE str,
char *s,
long len,
int termlen)
2763 if (str_dependent_p(str)) {
2764 if (!zero_filled(s +
len, termlen))
2765 str_make_independent_expand(str,
len, 0L, termlen);
2768 TERM_FILL(s +
len, termlen);
2775rb_str_change_terminator_length(
VALUE str,
const int oldtermlen,
const int termlen)
2777 long capa = str_capacity(str, oldtermlen) + oldtermlen;
2782 rb_check_lockedtmp(str);
2783 str_make_independent_expand(str,
len, 0L, termlen);
2785 else if (str_dependent_p(str)) {
2786 if (termlen > oldtermlen)
2787 str_make_independent_expand(str,
len, 0L, termlen);
2790 if (!STR_EMBED_P(str)) {
2795 if (termlen > oldtermlen) {
2804str_null_check(
VALUE str,
int *w)
2808 rb_encoding *enc = rb_enc_get(str);
2809 const int minlen = rb_enc_mbminlen(enc);
2813 if (str_null_char(s,
len, minlen, enc)) {
2816 return str_fill_term(str, s,
len, minlen);
2819 if (!s || memchr(s, 0,
len)) {
2823 s = str_fill_term(str, s,
len, minlen);
2829rb_str_to_cstr(
VALUE str)
2832 return str_null_check(str, &w);
2840 char *s = str_null_check(str, &w);
2843 rb_raise(rb_eArgError,
"string contains null char");
2845 rb_raise(rb_eArgError,
"string contains null byte");
2851rb_str_fill_terminator(
VALUE str,
const int newminlen)
2855 return str_fill_term(str, s,
len, newminlen);
2861 str = rb_check_convert_type_with_id(str,
T_STRING,
"String", idTo_str);
2885str_nth_len(
const char *p,
const char *e,
long *nthp, rb_encoding *enc)
2894 else if (rb_enc_asciicompat(enc)) {
2895 const char *p2, *e2;
2898 while (p < e && 0 < nth) {
2905 p2 = search_nonascii(p, e2);
2914 n = rb_enc_mbclen(p, e, enc);
2925 while (p < e && nth--) {
2926 p += rb_enc_mbclen(p, e, enc);
2935rb_enc_nth(
const char *p,
const char *e,
long nth, rb_encoding *enc)
2937 return str_nth_len(p, e, &nth, enc);
2941str_nth(
const char *p,
const char *e,
long nth, rb_encoding *enc,
int singlebyte)
2946 p = str_nth_len(p, e, &nth, enc);
2955str_offset(
const char *p,
const char *e,
long nth, rb_encoding *enc,
int singlebyte)
2957 const char *pp = str_nth(p, e, nth, enc, singlebyte);
2958 if (!pp)
return e - p;
2966 STR_ENC_GET(str), single_byte_optimizable(str));
2971str_utf8_nth(
const char *p,
const char *e,
long *nthp)
2974 if ((
int)SIZEOF_VOIDP * 2 < e - p && (
int)SIZEOF_VOIDP * 2 < nth) {
2975 const uintptr_t *s, *t;
2976 const uintptr_t lowbits = SIZEOF_VOIDP - 1;
2977 s = (
const uintptr_t*)(~lowbits & ((uintptr_t)p + lowbits));
2978 t = (
const uintptr_t*)(~lowbits & (uintptr_t)e);
2979 while (p < (
const char *)s) {
2980 if (is_utf8_lead_byte(*p)) nth--;
2984 nth -= count_utf8_lead_bytes_with_word(s);
2986 }
while (s < t && (
int)SIZEOF_VOIDP <= nth);
2990 if (is_utf8_lead_byte(*p)) {
2991 if (nth == 0)
break;
3001str_utf8_offset(
const char *p,
const char *e,
long nth)
3003 const char *pp = str_utf8_nth(p, e, &nth);
3012 if (single_byte_optimizable(str) || pos < 0)
3016 return enc_strlen(p, p + pos, STR_ENC_GET(str),
ENC_CODERANGE(str));
3021str_subseq(
VALUE str,
long beg,
long len)
3029 const int termlen = TERM_LEN(str);
3037 if (str_embed_capa(str2) >=
len + termlen) {
3038 char *ptr2 =
RSTRING(str2)->as.embed.ary;
3039 STR_SET_EMBED(str2);
3041 TERM_FILL(ptr2+
len, termlen);
3043 STR_SET_LEN(str2,
len);
3047 str_replace_shared(str2, str);
3050 RSTRING(str2)->as.heap.ptr += beg;
3052 STR_SET_LEN(str2,
len);
3062 VALUE str2 = str_subseq(str, beg,
len);
3063 rb_enc_cr_str_copy_for_substr(str2, str);
3073 rb_encoding *enc = STR_ENC_GET(str);
3076 if (
len < 0)
return 0;
3077 if (beg < 0 && -beg < 0)
return 0;
3081 if (single_byte_optimizable(str)) {
3082 if (beg > blen)
return 0;
3085 if (beg < 0)
return 0;
3087 if (
len > blen - beg)
3089 if (
len < 0)
return 0;
3094 if (
len > -beg)
len = -beg;
3098 while (beg-- >
len && (e = rb_enc_prev_char(s, e, e, enc)) != 0);
3101 while (
len-- > 0 && (p = rb_enc_prev_char(s, p, e, enc)) != 0);
3107 slen = str_strlen(str, enc);
3109 if (beg < 0)
return 0;
3111 if (
len == 0)
goto end;
3114 else if (beg > 0 && beg > blen) {
3118 if (beg > str_strlen(str, enc))
return 0;
3124 p = str_utf8_nth(s, e, &beg);
3125 if (beg > 0)
return 0;
3126 len = str_utf8_offset(p, e,
len);
3132 p = s + beg * char_sz;
3136 else if (
len * char_sz > e - p)
3141 else if ((p = str_nth_len(s, e, &beg, enc)) == e) {
3142 if (beg > 0)
return 0;
3146 len = str_offset(p, e,
len, enc, 0);
3154static VALUE str_substr(
VALUE str,
long beg,
long len,
int empty);
3159 return str_substr(str, beg,
len, TRUE);
3169str_substr(
VALUE str,
long beg,
long len,
int empty)
3173 if (!p)
return Qnil;
3174 if (!
len && !empty)
return Qnil;
3178 VALUE str2 = str_subseq(str, beg,
len);
3179 rb_enc_cr_str_copy_for_substr(str2, str);
3187 if (CHILLED_STRING_P(str)) {
3208 if (
OBJ_FROZEN(str) || CHILLED_STRING_P(str)) {
3238str_uminus(
VALUE str)
3243 return rb_fstring(str);
3247#define rb_str_dup_frozen rb_str_new_frozen
3252 if (
FL_TEST(str, STR_TMPLOCK)) {
3255 FL_SET(str, STR_TMPLOCK);
3262 if (!
FL_TEST(str, STR_TMPLOCK)) {
3280 const int termlen = TERM_LEN(str);
3282 str_modifiable(str);
3283 if (STR_SHARED_P(str)) {
3286 if (
len > (
capa = (
long)str_capacity(str, termlen)) ||
len < 0) {
3287 rb_bug(
"probable buffer overflow: %ld for %ld",
len,
capa);
3303 rb_encoding *enc = rb_enc_get(str);
3320 STR_SET_LEN(str,
len);
3328 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3331 int independent = str_independent(str);
3333 const int termlen = TERM_LEN(str);
3335 if (slen >
len || (termlen != 1 && slen <
len)) {
3341 if (STR_EMBED_P(str)) {
3342 if (
len == slen)
return str;
3343 if (str_embed_capa(str) >=
len + termlen) {
3344 STR_SET_LEN(str,
len);
3348 str_make_independent_expand(str, slen,
len - slen, termlen);
3350 else if (str_embed_capa(str) >=
len + termlen) {
3351 char *ptr = STR_HEAP_PTR(str);
3353 if (slen >
len) slen =
len;
3356 STR_SET_LEN(str,
len);
3357 if (independent) ruby_xfree(ptr);
3360 else if (!independent) {
3361 if (
len == slen)
return str;
3362 str_make_independent_expand(str, slen,
len - slen, termlen);
3366 SIZED_REALLOC_N(
RSTRING(str)->
as.heap.ptr,
char,
3367 (
size_t)
len + termlen, STR_HEAP_SIZE(str));
3370 else if (
len == slen)
return str;
3371 STR_SET_LEN(str,
len);
3378str_ensure_available_capa(
VALUE str,
long len)
3380 str_modify_keep_cr(str);
3382 const int termlen = TERM_LEN(str);
3385 if (RB_UNLIKELY(olen > LONG_MAX -
len)) {
3386 rb_raise(rb_eArgError,
"string sizes too big");
3389 long total = olen +
len;
3390 long capa = str_capacity(str, termlen);
3393 if (total >= LONG_MAX / 2) {
3396 while (total >
capa) {
3399 RESIZE_CAPA_TERM(str,
capa, termlen);
3404str_buf_cat4(
VALUE str,
const char *ptr,
long len,
bool keep_cr)
3407 str_modify_keep_cr(str);
3412 if (
len == 0)
return 0;
3414 long total, olen,
off = -1;
3416 const int termlen = TERM_LEN(str);
3419 if (ptr >= sptr && ptr <= sptr + olen) {
3423 long capa = str_capacity(str, termlen);
3425 if (olen > LONG_MAX -
len) {
3426 rb_raise(rb_eArgError,
"string sizes too big");
3430 if (total >= LONG_MAX / 2) {
3433 while (total >
capa) {
3436 RESIZE_CAPA_TERM(str,
capa, termlen);
3442 memcpy(sptr + olen, ptr,
len);
3443 STR_SET_LEN(str, total);
3444 TERM_FILL(sptr + total, termlen);
3449#define str_buf_cat(str, ptr, len) str_buf_cat4((str), (ptr), len, false)
3450#define str_buf_cat2(str, ptr) str_buf_cat4((str), (ptr), rb_strlen_lit(ptr), false)
3455 if (
len == 0)
return str;
3457 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3459 return str_buf_cat(str, ptr,
len);
3470rb_str_buf_cat_byte(
VALUE str,
unsigned char byte)
3475 if (UNLIKELY(!str_independent(str))) {
3476 str_make_independent(str);
3479 long string_length = -1;
3480 const int null_terminator_length = 1;
3485 if (UNLIKELY(string_length > LONG_MAX - 1)) {
3486 rb_raise(rb_eArgError,
"string sizes too big");
3489 long string_capacity = str_capacity(str, null_terminator_length);
3495 if (LIKELY(string_capacity >= string_length + 1)) {
3497 sptr[string_length] = byte;
3498 STR_SET_LEN(str, string_length + 1);
3499 TERM_FILL(sptr + string_length + 1, null_terminator_length);
3503 str_buf_cat(str, (
char *)&
byte, 1);
3511 if (ISASCII(
byte)) {
3519 rb_enc_associate_index(str, ENCINDEX_ASCII_8BIT);
3530rb_enc_cr_str_buf_cat(
VALUE str,
const char *ptr,
long len,
3531 int ptr_encindex,
int ptr_cr,
int *ptr_cr_ret)
3536 rb_encoding *str_enc, *ptr_enc;
3540 if (str_encindex == ptr_encindex) {
3542 ptr_cr = coderange_scan(ptr,
len, rb_enc_from_index(ptr_encindex));
3546 str_enc = rb_enc_from_index(str_encindex);
3547 ptr_enc = rb_enc_from_index(ptr_encindex);
3548 if (!rb_enc_asciicompat(str_enc) || !rb_enc_asciicompat(ptr_enc)) {
3554 rb_str_change_terminator_length(str, rb_enc_mbminlen(str_enc), rb_enc_mbminlen(ptr_enc));
3560 ptr_cr = coderange_scan(ptr,
len, ptr_enc);
3569 *ptr_cr_ret = ptr_cr;
3571 if (str_encindex != ptr_encindex &&
3574 str_enc = rb_enc_from_index(str_encindex);
3575 ptr_enc = rb_enc_from_index(ptr_encindex);
3580 res_encindex = str_encindex;
3585 res_encindex = str_encindex;
3589 res_encindex = ptr_encindex;
3594 res_encindex = str_encindex;
3601 res_encindex = str_encindex;
3607 rb_raise(rb_eArgError,
"negative string size (or size too big)");
3609 str_buf_cat(str, ptr,
len);
3615 rb_enc_inspect_name(str_enc), rb_enc_inspect_name(ptr_enc));
3622 return rb_enc_cr_str_buf_cat(str, ptr,
len,
3631 rb_encoding *enc = rb_enc_from_index(encindex);
3632 if (rb_enc_asciicompat(enc)) {
3633 return rb_enc_cr_str_buf_cat(str, ptr, strlen(ptr),
3639 unsigned int c = (
unsigned char)*ptr;
3640 int len = rb_enc_codelen(c, enc);
3641 rb_enc_mbcput(c, buf, enc);
3642 rb_enc_cr_str_buf_cat(str, buf,
len,
3655 if (str_enc_fastpath(str)) {
3692rb_str_concat_literals(
size_t num,
const VALUE *strary)
3696 unsigned long len = 1;
3703 str_enc_copy_direct(str, strary[0]);
3705 for (i = s; i < num; ++i) {
3706 const VALUE v = strary[i];
3710 if (encidx != ENCINDEX_US_ASCII) {
3712 rb_enc_set_index(str, encidx);
3737rb_str_concat_multi(
int argc,
VALUE *argv,
VALUE str)
3739 str_modifiable(str);
3744 else if (argc > 1) {
3747 rb_enc_copy(arg_str, str);
3748 for (i = 0; i < argc; i++) {
3781rb_str_append_as_bytes(
int argc,
VALUE *argv,
VALUE str)
3783 long needed_capacity = 0;
3787 for (
int index = 0; index < argc; index++) {
3788 VALUE obj = argv[index];
3801 "wrong argument type %"PRIsVALUE
" (expected String or Integer)",
3808 str_ensure_available_capa(str, needed_capacity);
3811 for (
int index = 0; index < argc; index++) {
3812 VALUE obj = argv[index];
3817 argv[index] = obj = rb_int_and(obj,
INT2FIX(0xff));
3818 char byte = (char)(
NUM2INT(obj) & 0xFF);
3827 memcpy(sptr, ptr,
len);
3832 rb_bug(
"append_as_bytes arguments should have been validated");
3836 STR_SET_LEN(str,
RSTRING_LEN(str) + needed_capacity);
3837 TERM_FILL(sptr, TERM_LEN(str));
3842 for (
int index = 0; index < argc; index++) {
3843 VALUE obj = argv[index];
3860 rb_bug(
"append_as_bytes arguments should have been validated");
3930 rb_encoding *enc = STR_ENC_GET(str1);
3934 if (rb_num_to_uint(str2, &code) == 0) {
3947 encidx = rb_ascii8bit_appendable_encoding_index(enc, code);
3950 rb_str_buf_cat_byte(str1, (
unsigned char)code);
3958 switch (
len = rb_enc_codelen(code, enc)) {
3959 case ONIGERR_INVALID_CODE_POINT_VALUE:
3960 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3962 case ONIGERR_TOO_BIG_WIDE_CHAR_VALUE:
3968 rb_enc_mbcput(code, buf, enc);
3969 if (rb_enc_precise_mbclen(buf, buf +
len + 1, enc) !=
len) {
3970 rb_raise(
rb_eRangeError,
"invalid codepoint 0x%X in %s", code, rb_enc_name(enc));
3972 rb_str_resize(str1, pos+
len);
3986rb_ascii8bit_appendable_encoding_index(rb_encoding *enc,
unsigned int code)
3988 int encidx = rb_enc_to_index(enc);
3990 if (encidx == ENCINDEX_ASCII_8BIT || encidx == ENCINDEX_US_ASCII) {
3995 if (encidx == ENCINDEX_US_ASCII && code > 127) {
3996 return ENCINDEX_ASCII_8BIT;
4019rb_str_prepend_multi(
int argc,
VALUE *argv,
VALUE str)
4021 str_modifiable(str);
4026 else if (argc > 1) {
4029 rb_enc_copy(arg_str, str);
4030 for (i = 0; i < argc; i++) {
4043 st_index_t precomputed_hash;
4044 memcpy(&precomputed_hash,
RSTRING_END(str) + TERM_LEN(str),
sizeof(precomputed_hash));
4046 RUBY_ASSERT(precomputed_hash == str_do_hash(str));
4047 return precomputed_hash;
4050 return str_do_hash(str);
4057 const char *ptr1, *ptr2;
4060 return (len1 != len2 ||
4062 memcmp(ptr1, ptr2, len1) != 0);
4076rb_str_hash_m(
VALUE str)
4082#define lesser(a,b) (((a)>(b))?(b):(a))
4094 if (idx1 == idx2)
return TRUE;
4099 if (rb_enc_asciicompat(rb_enc_from_index(idx2)))
4103 if (rb_enc_asciicompat(rb_enc_from_index(idx1)))
4113 const char *ptr1, *ptr2;
4116 if (str1 == str2)
return 0;
4119 if (ptr1 == ptr2 || (retval = memcmp(ptr1, ptr2, lesser(len1, len2))) == 0) {
4128 if (len1 > len2)
return 1;
4131 if (retval > 0)
return 1;
4158 if (str1 == str2)
return Qtrue;
4165 return rb_str_eql_internal(str1, str2);
4189 if (str1 == str2)
return Qtrue;
4191 return rb_str_eql_internal(str1, str2);
4222 return rb_invcmp(str1, str2);
4264 return str_casecmp(str1, s);
4272 const char *p1, *p1end, *p2, *p2end;
4274 enc = rb_enc_compatible(str1, str2);
4281 if (single_byte_optimizable(str1) && single_byte_optimizable(str2)) {
4282 while (p1 < p1end && p2 < p2end) {
4284 unsigned int c1 =
TOLOWER(*p1 & 0xff);
4285 unsigned int c2 =
TOLOWER(*p2 & 0xff);
4287 return INT2FIX(c1 < c2 ? -1 : 1);
4294 while (p1 < p1end && p2 < p2end) {
4295 int l1, c1 = rb_enc_ascget(p1, p1end, &l1, enc);
4296 int l2, c2 = rb_enc_ascget(p2, p2end, &l2, enc);
4298 if (0 <= c1 && 0 <= c2) {
4302 return INT2FIX(c1 < c2 ? -1 : 1);
4306 l1 = rb_enc_mbclen(p1, p1end, enc);
4307 l2 = rb_enc_mbclen(p2, p2end, enc);
4308 len = l1 < l2 ? l1 : l2;
4309 r = memcmp(p1, p2,
len);
4311 return INT2FIX(r < 0 ? -1 : 1);
4313 return INT2FIX(l1 < l2 ? -1 : 1);
4354 return str_casecmp_p(str1, s);
4361 VALUE folded_str1, folded_str2;
4362 VALUE fold_opt = sym_fold;
4364 enc = rb_enc_compatible(str1, str2);
4369 folded_str1 = rb_str_downcase(1, &fold_opt, str1);
4370 folded_str2 = rb_str_downcase(1, &fold_opt, str2);
4372 return rb_str_eql(folded_str1, folded_str2);
4376strseq_core(
const char *str_ptr,
const char *str_ptr_end,
long str_len,
4377 const char *sub_ptr,
long sub_len,
long offset, rb_encoding *enc)
4379 const char *search_start = str_ptr;
4380 long pos, search_len = str_len - offset;
4384 pos =
rb_memsearch(sub_ptr, sub_len, search_start, search_len, enc);
4385 if (pos < 0)
return pos;
4387 if (t == search_start + pos)
break;
4388 search_len -= t - search_start;
4389 if (search_len <= 0)
return -1;
4390 offset += t - search_start;
4393 return pos + offset;
4397#define rb_str_index(str, sub, offset) rb_strseq_index(str, sub, offset, 0)
4398#define rb_str_byteindex(str, sub, offset) rb_strseq_index(str, sub, offset, 1)
4401rb_strseq_index(
VALUE str,
VALUE sub,
long offset,
int in_byte)
4403 const char *str_ptr, *str_ptr_end, *sub_ptr;
4404 long str_len, sub_len;
4407 enc = rb_enc_check(str, sub);
4408 if (is_broken_string(sub))
return -1;
4416 if (str_len < sub_len)
return -1;
4419 long str_len_char, sub_len_char;
4420 int single_byte = single_byte_optimizable(str);
4421 str_len_char = (in_byte || single_byte) ? str_len : str_strlen(str, enc);
4422 sub_len_char = in_byte ? sub_len : str_strlen(sub, enc);
4424 offset += str_len_char;
4425 if (offset < 0)
return -1;
4427 if (str_len_char - offset < sub_len_char)
return -1;
4428 if (!in_byte) offset = str_offset(str_ptr, str_ptr_end, offset, enc, single_byte);
4431 if (sub_len == 0)
return offset;
4434 return strseq_core(str_ptr, str_ptr_end, str_len, sub_ptr, sub_len, offset, enc);
4448rb_str_index_m(
int argc,
VALUE *argv,
VALUE str)
4452 rb_encoding *enc = STR_ENC_GET(str);
4455 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4456 long slen = str_strlen(str, enc);
4458 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4471 enc, single_byte_optimizable(str));
4482 pos = rb_str_index(str, sub, pos);
4496str_ensure_byte_pos(
VALUE str,
long pos)
4498 if (!single_byte_optimizable(str)) {
4499 const char *s = RSTRING_PTR(str);
4501 const char *p = s + pos;
4502 if (!at_char_boundary(s, p, e, rb_enc_get(str))) {
4504 "offset %ld does not land on character boundary", pos);
4551rb_str_byteindex_m(
int argc,
VALUE *argv,
VALUE str)
4557 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4558 long slen = RSTRING_LEN(str);
4560 if (pos < 0 ? (pos += slen) < 0 : pos > slen) {
4571 str_ensure_byte_pos(str, pos);
4583 pos = rb_str_byteindex(str, sub, pos);
4584 if (pos >= 0)
return LONG2NUM(pos);
4591memrchr(
const char *search_str,
int chr,
long search_len)
4593 const char *ptr = search_str + search_len;
4594 while (ptr > search_str) {
4595 if ((
unsigned char)*(--ptr) == chr)
return (
void *)ptr;
4603str_rindex(
VALUE str,
VALUE sub,
const char *s, rb_encoding *enc)
4605 char *hit, *adjusted;
4607 long slen, searchlen;
4610 sbeg = RSTRING_PTR(str);
4611 slen = RSTRING_LEN(sub);
4612 if (slen == 0)
return s - sbeg;
4614 t = RSTRING_PTR(sub);
4616 searchlen = s - sbeg + 1;
4618 if (memcmp(s, t, slen) == 0) {
4623 hit = memrchr(sbeg, c, searchlen);
4626 if (hit != adjusted) {
4627 searchlen = adjusted - sbeg;
4630 if (memcmp(hit, t, slen) == 0)
4632 searchlen = adjusted - sbeg;
4633 }
while (searchlen > 0);
4647 enc = rb_enc_check(str, sub);
4648 if (is_broken_string(sub))
return -1;
4649 singlebyte = single_byte_optimizable(str);
4650 len = singlebyte ? RSTRING_LEN(str) : str_strlen(str, enc);
4651 slen = str_strlen(sub, enc);
4654 if (
len < slen)
return -1;
4655 if (
len - pos < slen) pos =
len - slen;
4656 if (
len == 0)
return pos;
4658 sbeg = RSTRING_PTR(str);
4661 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4667 s = str_nth(sbeg,
RSTRING_END(str), pos, enc, singlebyte);
4668 return str_rindex(str, sub, s, enc);
4729rb_str_rindex_m(
int argc,
VALUE *argv,
VALUE str)
4733 rb_encoding *enc = STR_ENC_GET(str);
4734 long pos,
len = str_strlen(str, enc);
4736 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4738 if (pos < 0 && (pos +=
len) < 0) {
4744 if (pos >
len) pos =
len;
4752 pos = str_offset(RSTRING_PTR(str),
RSTRING_END(str), pos,
4753 enc, single_byte_optimizable(str));
4764 pos = rb_str_rindex(str, sub, pos);
4774rb_str_byterindex(
VALUE str,
VALUE sub,
long pos)
4780 enc = rb_enc_check(str, sub);
4781 if (is_broken_string(sub))
return -1;
4782 len = RSTRING_LEN(str);
4783 slen = RSTRING_LEN(sub);
4786 if (
len < slen)
return -1;
4787 if (
len - pos < slen) pos =
len - slen;
4788 if (
len == 0)
return pos;
4790 sbeg = RSTRING_PTR(str);
4793 if (memcmp(sbeg, RSTRING_PTR(sub), RSTRING_LEN(sub)) == 0)
4800 return str_rindex(str, sub, s, enc);
4865rb_str_byterindex_m(
int argc,
VALUE *argv,
VALUE str)
4869 long pos,
len = RSTRING_LEN(str);
4871 if (
rb_scan_args(argc, argv,
"11", &sub, &initpos) == 2) {
4873 if (pos < 0 && (pos +=
len) < 0) {
4879 if (pos >
len) pos =
len;
4885 str_ensure_byte_pos(str, pos);
4897 pos = rb_str_byterindex(str, sub, pos);
4898 if (pos >= 0)
return LONG2NUM(pos);
4934 switch (OBJ_BUILTIN_TYPE(y)) {
4986rb_str_match_m(
int argc,
VALUE *argv,
VALUE str)
4993 result = rb_funcallv(get_pat(re), rb_intern(
"match"), argc, argv);
5025rb_str_match_m_p(
int argc,
VALUE *argv,
VALUE str)
5029 re = get_pat(argv[0]);
5030 return rb_reg_match_p(re, str, argc > 1 ?
NUM2LONG(argv[1]) : 0);
5039static enum neighbor_char
5040enc_succ_char(
char *p,
long len, rb_encoding *enc)
5045 if (rb_enc_mbminlen(enc) > 1) {
5047 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5049 return NEIGHBOR_NOT_CHAR;
5051 c = rb_enc_mbc_to_codepoint(p, p +
len, enc) + 1;
5053 if (!l)
return NEIGHBOR_NOT_CHAR;
5054 if (l !=
len)
return NEIGHBOR_WRAPPED;
5055 rb_enc_mbcput(c, p, enc);
5056 r = rb_enc_precise_mbclen(p, p +
len, enc);
5058 return NEIGHBOR_NOT_CHAR;
5060 return NEIGHBOR_FOUND;
5063 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0xff; i--)
5066 return NEIGHBOR_WRAPPED;
5067 ++((
unsigned char*)p)[i];
5068 l = rb_enc_precise_mbclen(p, p+
len, enc);
5072 return NEIGHBOR_FOUND;
5075 memset(p+l, 0xff,
len-l);
5081 for (len2 =
len-1; 0 < len2; len2--) {
5082 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5086 memset(p+len2+1, 0xff,
len-(len2+1));
5091static enum neighbor_char
5092enc_pred_char(
char *p,
long len, rb_encoding *enc)
5096 if (rb_enc_mbminlen(enc) > 1) {
5098 int r = rb_enc_precise_mbclen(p, p +
len, enc), c;
5100 return NEIGHBOR_NOT_CHAR;
5102 c = rb_enc_mbc_to_codepoint(p, p +
len, enc);
5103 if (!c)
return NEIGHBOR_NOT_CHAR;
5106 if (!l)
return NEIGHBOR_NOT_CHAR;
5107 if (l !=
len)
return NEIGHBOR_WRAPPED;
5108 rb_enc_mbcput(c, p, enc);
5109 r = rb_enc_precise_mbclen(p, p +
len, enc);
5111 return NEIGHBOR_NOT_CHAR;
5113 return NEIGHBOR_FOUND;
5116 for (i =
len-1; 0 <= i && (
unsigned char)p[i] == 0; i--)
5119 return NEIGHBOR_WRAPPED;
5120 --((
unsigned char*)p)[i];
5121 l = rb_enc_precise_mbclen(p, p+
len, enc);
5125 return NEIGHBOR_FOUND;
5128 memset(p+l, 0,
len-l);
5134 for (len2 =
len-1; 0 < len2; len2--) {
5135 l2 = rb_enc_precise_mbclen(p, p+len2, enc);
5139 memset(p+len2+1, 0,
len-(len2+1));
5153static enum neighbor_char
5154enc_succ_alnum_char(
char *p,
long len, rb_encoding *enc,
char *carry)
5156 enum neighbor_char ret;
5160 char save[ONIGENC_CODE_TO_MBC_MAXLEN];
5164 const int max_gaps = 1;
5166 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5168 ctype = ONIGENC_CTYPE_DIGIT;
5170 ctype = ONIGENC_CTYPE_ALPHA;
5172 return NEIGHBOR_NOT_CHAR;
5175 for (
try = 0;
try <= max_gaps; ++
try) {
5176 ret = enc_succ_char(p,
len, enc);
5177 if (ret == NEIGHBOR_FOUND) {
5178 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5180 return NEIGHBOR_FOUND;
5187 ret = enc_pred_char(p,
len, enc);
5188 if (ret == NEIGHBOR_FOUND) {
5189 c = rb_enc_mbc_to_codepoint(p, p+
len, enc);
5202 return NEIGHBOR_NOT_CHAR;
5205 if (ctype != ONIGENC_CTYPE_DIGIT) {
5207 return NEIGHBOR_WRAPPED;
5211 enc_succ_char(carry,
len, enc);
5212 return NEIGHBOR_WRAPPED;
5280 str =
rb_str_new(RSTRING_PTR(orig), RSTRING_LEN(orig));
5281 rb_enc_cr_str_copy_for_substr(str, orig);
5282 return str_succ(str);
5289 char *sbeg, *s, *e, *last_alnum = 0;
5290 int found_alnum = 0;
5292 char carry[ONIGENC_CODE_TO_MBC_MAXLEN] =
"\1";
5293 long carry_pos = 0, carry_len = 1;
5294 enum neighbor_char neighbor = NEIGHBOR_FOUND;
5296 slen = RSTRING_LEN(str);
5297 if (slen == 0)
return str;
5299 enc = STR_ENC_GET(str);
5300 sbeg = RSTRING_PTR(str);
5301 s = e = sbeg + slen;
5303 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5304 if (neighbor == NEIGHBOR_NOT_CHAR && last_alnum) {
5310 l = rb_enc_precise_mbclen(s, e, enc);
5311 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5312 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5313 neighbor = enc_succ_alnum_char(s, l, enc, carry);
5315 case NEIGHBOR_NOT_CHAR:
5317 case NEIGHBOR_FOUND:
5319 case NEIGHBOR_WRAPPED:
5324 carry_pos = s - sbeg;
5329 while ((s = rb_enc_prev_char(sbeg, s, e, enc)) != 0) {
5330 enum neighbor_char neighbor;
5331 char tmp[ONIGENC_CODE_TO_MBC_MAXLEN];
5332 l = rb_enc_precise_mbclen(s, e, enc);
5333 if (!ONIGENC_MBCLEN_CHARFOUND_P(l))
continue;
5334 l = ONIGENC_MBCLEN_CHARFOUND_LEN(l);
5336 neighbor = enc_succ_char(tmp, l, enc);
5338 case NEIGHBOR_FOUND:
5342 case NEIGHBOR_WRAPPED:
5345 case NEIGHBOR_NOT_CHAR:
5348 if (rb_enc_precise_mbclen(s, s+l, enc) != l) {
5350 enc_succ_char(s, l, enc);
5352 if (!rb_enc_asciicompat(enc)) {
5353 MEMCPY(carry, s,
char, l);
5356 carry_pos = s - sbeg;
5360 RESIZE_CAPA(str, slen + carry_len);
5361 sbeg = RSTRING_PTR(str);
5362 s = sbeg + carry_pos;
5363 memmove(s + carry_len, s, slen - carry_pos);
5364 memmove(s, carry, carry_len);
5366 STR_SET_LEN(str, slen);
5367 TERM_FILL(&sbeg[slen], rb_enc_mbminlen(enc));
5381rb_str_succ_bang(
VALUE str)
5389all_digits_p(
const char *s,
long len)
5443 VALUE end, exclusive;
5447 return rb_str_upto_each(beg, end,
RTEST(exclusive), str_upto_i,
Qnil);
5453 VALUE current, after_end;
5460 enc = rb_enc_check(beg, end);
5461 ascii = (is_ascii_string(beg) && is_ascii_string(end));
5463 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1 && ascii) {
5464 char c = RSTRING_PTR(beg)[0];
5465 char e = RSTRING_PTR(end)[0];
5467 if (c > e || (excl && c == e))
return beg;
5469 VALUE str = rb_enc_str_new(&c, 1, enc);
5471 if ((*each)(str, arg))
break;
5472 if (!excl && c == e)
break;
5474 if (excl && c == e)
break;
5479 if (ascii &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
ISDIGIT(RSTRING_PTR(end)[0]) &&
5480 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg)) &&
5481 all_digits_p(RSTRING_PTR(end), RSTRING_LEN(end))) {
5486 b = rb_str_to_inum(beg, 10, FALSE);
5487 e = rb_str_to_inum(end, 10, FALSE);
5494 if (excl && bi == ei)
break;
5495 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5500 ID op = excl ?
'<' : idLE;
5501 VALUE args[2], fmt = rb_fstring_lit(
"%.*d");
5506 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5507 b = rb_funcallv(b, succ, 0, 0);
5514 if (n > 0 || (excl && n == 0))
return beg;
5516 after_end = rb_funcallv(end, succ, 0, 0);
5521 next = rb_funcallv(current, succ, 0, 0);
5522 if ((*each)(current, arg))
break;
5523 if (
NIL_P(next))
break;
5527 if (RSTRING_LEN(current) > RSTRING_LEN(end) || RSTRING_LEN(current) == 0)
5542 if (is_ascii_string(beg) &&
ISDIGIT(RSTRING_PTR(beg)[0]) &&
5543 all_digits_p(RSTRING_PTR(beg), RSTRING_LEN(beg))) {
5544 VALUE b, args[2], fmt = rb_fstring_lit(
"%.*d");
5546 b = rb_str_to_inum(beg, 10, FALSE);
5552 if ((*each)(
rb_enc_sprintf(usascii,
"%.*ld", width, bi), arg))
break;
5560 if ((*each)(
rb_str_format(numberof(args), args, fmt), arg))
break;
5561 b = rb_funcallv(b, succ, 0, 0);
5567 VALUE next = rb_funcallv(current, succ, 0, 0);
5568 if ((*each)(current, arg))
break;
5571 if (RSTRING_LEN(current) == 0)
5582 if (!
rb_equal(str, *argp))
return 0;
5596 if (rb_enc_asciicompat(STR_ENC_GET(beg)) &&
5597 rb_enc_asciicompat(STR_ENC_GET(end)) &&
5598 rb_enc_asciicompat(STR_ENC_GET(val))) {
5599 const char *bp = RSTRING_PTR(beg);
5600 const char *ep = RSTRING_PTR(end);
5601 const char *vp = RSTRING_PTR(val);
5602 if (RSTRING_LEN(beg) == 1 && RSTRING_LEN(end) == 1) {
5603 if (RSTRING_LEN(val) == 0 || RSTRING_LEN(val) > 1)
5610 if (ISASCII(b) && ISASCII(e) && ISASCII(v)) {
5611 if (b <= v && v < e)
return Qtrue;
5612 return RBOOL(!
RTEST(exclusive) && v == e);
5619 all_digits_p(bp, RSTRING_LEN(beg)) &&
5620 all_digits_p(ep, RSTRING_LEN(end))) {
5625 rb_str_upto_each(beg, end,
RTEST(exclusive), include_range_i, (
VALUE)&val);
5627 return RBOOL(
NIL_P(val));
5650 return rb_str_subpat(str, indx,
INT2FIX(0));
5653 if (rb_str_index(str, indx, 0) != -1)
5659 long beg,
len = str_strlen(str, NULL);
5671 return str_substr(str, idx, 1, FALSE);
5690rb_str_aref_m(
int argc,
VALUE *argv,
VALUE str)
5694 return rb_str_subpat(str, argv[0], argv[1]);
5697 return rb_str_substr_two_fixnums(str, argv[0], argv[1], TRUE);
5701 return rb_str_aref(str, argv[0]);
5707 char *ptr = RSTRING_PTR(str);
5708 long olen = RSTRING_LEN(str), nlen;
5710 str_modifiable(str);
5711 if (
len > olen)
len = olen;
5713 if (str_embed_capa(str) >= nlen + TERM_LEN(str)) {
5715 int fl = (int)(
RBASIC(str)->flags & (STR_NOEMBED|STR_SHARED|STR_NOFREE));
5717 ptr =
RSTRING(str)->as.embed.ary;
5718 memmove(ptr, oldptr +
len, nlen);
5719 if (fl == STR_NOEMBED)
xfree(oldptr);
5722 if (!STR_SHARED_P(str)) {
5724 rb_enc_cr_str_exact_copy(shared, str);
5729 STR_SET_LEN(str, nlen);
5731 if (!SHARABLE_MIDDLE_SUBSTRING) {
5732 TERM_FILL(ptr + nlen, TERM_LEN(str));
5739rb_str_update_1(
VALUE str,
long beg,
long len,
VALUE val,
long vbeg,
long vlen)
5745 if (beg == 0 && vlen == 0) {
5750 str_modify_keep_cr(str);
5754 RESIZE_CAPA(str, slen + vlen -
len);
5755 sptr = RSTRING_PTR(str);
5764 memmove(sptr + beg + vlen,
5766 slen - (beg +
len));
5768 if (vlen < beg &&
len < 0) {
5772 memmove(sptr + beg, RSTRING_PTR(val) + vbeg, vlen);
5775 STR_SET_LEN(str, slen);
5776 TERM_FILL(&sptr[slen], TERM_LEN(str));
5783 rb_str_update_1(str, beg,
len, val, 0, RSTRING_LEN(val));
5792 int singlebyte = single_byte_optimizable(str);
5798 enc = rb_enc_check(str, val);
5799 slen = str_strlen(str, enc);
5801 if ((slen < beg) || ((beg < 0) && (beg + slen < 0))) {
5810 if (
len > slen - beg) {
5813 p = str_nth(RSTRING_PTR(str),
RSTRING_END(str), beg, enc, singlebyte);
5818 beg = p - RSTRING_PTR(str);
5820 rb_str_update_0(str, beg,
len, val);
5821 rb_enc_associate(str, enc);
5832 long start, end,
len;
5842 if ((nth >= regs->num_regs) || ((nth < 0) && (-nth >= regs->num_regs))) {
5846 nth += regs->num_regs;
5856 enc = rb_enc_check_str(str, val);
5857 rb_str_update_0(str, start,
len, val);
5858 rb_enc_associate(str, enc);
5866 switch (
TYPE(indx)) {
5868 rb_str_subpat_set(str, indx,
INT2FIX(0), val);
5872 beg = rb_str_index(str, indx, 0);
5926rb_str_aset_m(
int argc,
VALUE *argv,
VALUE str)
5930 rb_str_subpat_set(str, argv[0], argv[1], argv[2]);
5938 return rb_str_aset(str, argv[0], argv[1]);
5998rb_str_slice_bang(
int argc,
VALUE *argv,
VALUE str)
6006 str_modify_keep_cr(str);
6014 if ((nth += regs->num_regs) <= 0)
return Qnil;
6016 else if (nth >= regs->num_regs)
return Qnil;
6018 len = END(nth) - beg;
6021 else if (argc == 2) {
6030 beg = p - RSTRING_PTR(str);
6034 beg = rb_str_index(str, indx, 0);
6035 if (beg == -1)
return Qnil;
6036 len = RSTRING_LEN(indx);
6048 beg = p - RSTRING_PTR(str);
6057 beg = p - RSTRING_PTR(str);
6061 rb_enc_cr_str_copy_for_substr(result, str);
6069 char *sptr = RSTRING_PTR(str);
6070 long slen = RSTRING_LEN(str);
6071 if (beg +
len > slen)
6075 slen - (beg +
len));
6077 STR_SET_LEN(str, slen);
6078 TERM_FILL(&sptr[slen], TERM_LEN(str));
6089 switch (OBJ_BUILTIN_TYPE(pat)) {
6108get_pat_quoted(
VALUE pat,
int check)
6112 switch (OBJ_BUILTIN_TYPE(pat)) {
6126 if (check && is_broken_string(pat)) {
6127 rb_exc_raise(rb_reg_check_preprocess(pat));
6133rb_pat_search(
VALUE pat,
VALUE str,
long pos,
int set_backref_str)
6136 pos = rb_str_byteindex(str, pat, pos);
6137 if (set_backref_str) {
6139 str = rb_str_new_frozen_String(str);
6140 rb_backref_set_string(str, pos, RSTRING_LEN(pat));
6149 return rb_reg_search0(pat, str, pos, 0, set_backref_str);
6169rb_str_sub_bang(
int argc,
VALUE *argv,
VALUE str)
6183 hash = rb_check_hash_type(argv[1]);
6189 pat = get_pat_quoted(argv[0], 1);
6191 str_modifiable(str);
6192 beg = rb_pat_search(pat, str, 0, 1);
6206 end0 = beg0 + RSTRING_LEN(pat);
6215 if (iter || !
NIL_P(hash)) {
6216 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6219 repl = rb_obj_as_string(
rb_yield(match0));
6222 repl = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6223 repl = rb_obj_as_string(repl);
6225 str_mod_check(str, p,
len);
6226 rb_check_frozen(str);
6232 enc = rb_enc_compatible(str, repl);
6234 rb_encoding *str_enc = STR_ENC_GET(str);
6235 p = RSTRING_PTR(str);
len = RSTRING_LEN(str);
6239 rb_enc_inspect_name(str_enc),
6240 rb_enc_inspect_name(STR_ENC_GET(repl)));
6242 enc = STR_ENC_GET(repl);
6245 rb_enc_associate(str, enc);
6255 rlen = RSTRING_LEN(repl);
6256 len = RSTRING_LEN(str);
6258 RESIZE_CAPA(str,
len + rlen - plen);
6260 p = RSTRING_PTR(str);
6262 memmove(p + beg0 + rlen, p + beg0 + plen,
len - beg0 - plen);
6264 rp = RSTRING_PTR(repl);
6265 memmove(p + beg0, rp, rlen);
6267 STR_SET_LEN(str,
len);
6268 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
6297 rb_str_sub_bang(argc, argv, str);
6302str_gsub(
int argc,
VALUE *argv,
VALUE str,
int bang)
6305 long beg, beg0, end0;
6306 long offset, blen, slen,
len, last;
6307 enum {STR, ITER, MAP} mode = STR;
6309 int need_backref = -1;
6310 rb_encoding *str_enc;
6319 hash = rb_check_hash_type(argv[1]);
6328 rb_error_arity(argc, 1, 2);
6331 pat = get_pat_quoted(argv[0], 1);
6332 beg = rb_pat_search(pat, str, 0, need_backref);
6334 if (bang)
return Qnil;
6339 blen = RSTRING_LEN(str) + 30;
6341 sp = RSTRING_PTR(str);
6342 slen = RSTRING_LEN(str);
6344 str_enc = STR_ENC_GET(str);
6345 rb_enc_associate(dest, str_enc);
6353 end0 = beg0 + RSTRING_LEN(pat);
6364 val = rb_obj_as_string(
rb_yield(match0));
6367 val = rb_hash_aref(hash,
rb_str_subseq(str, beg0, end0 - beg0));
6368 val = rb_obj_as_string(val);
6370 str_mod_check(str, sp, slen);
6375 else if (need_backref) {
6377 if (need_backref < 0) {
6378 need_backref = val != repl;
6385 len = beg0 - offset;
6399 if (RSTRING_LEN(str) <= end0)
break;
6400 len = rb_enc_fast_mbclen(RSTRING_PTR(str)+end0,
RSTRING_END(str), str_enc);
6402 offset = end0 +
len;
6404 cp = RSTRING_PTR(str) + offset;
6405 if (offset > RSTRING_LEN(str))
break;
6406 beg = rb_pat_search(pat, str, offset, need_backref);
6410 if (RSTRING_LEN(str) > offset) {
6413 rb_pat_search(pat, str, last, 1);
6415 str_shared_replace(str, dest);
6443rb_str_gsub_bang(
int argc,
VALUE *argv,
VALUE str)
6445 str_modify_keep_cr(str);
6446 return str_gsub(argc, argv, str, 1);
6469 return str_gsub(argc, argv, str, 0);
6487 str_modifiable(str);
6488 if (str == str2)
return str;
6492 return str_replace(str, str2);
6507rb_str_clear(
VALUE str)
6511 STR_SET_LEN(str, 0);
6512 RSTRING_PTR(str)[0] = 0;
6513 if (rb_enc_asciicompat(STR_ENC_GET(str)))
6532rb_str_chr(
VALUE str)
6556 pos += RSTRING_LEN(str);
6557 if (pos < 0 || RSTRING_LEN(str) <= pos)
6560 return INT2FIX((
unsigned char)RSTRING_PTR(str)[pos]);
6579 long len = RSTRING_LEN(str);
6580 char *ptr, *head, *left = 0;
6584 if (pos < -
len ||
len <= pos)
6591 char byte = (char)(
NUM2INT(w) & 0xFF);
6593 if (!str_independent(str))
6594 str_make_independent(str);
6595 enc = STR_ENC_GET(str);
6596 head = RSTRING_PTR(str);
6598 if (!STR_EMBED_P(str)) {
6604 if (ISASCII(
byte))
goto end;
6605 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6613 width = rb_enc_precise_mbclen(left, head+
len, enc);
6615 nlen = rb_enc_precise_mbclen(left, head+
len, enc);
6631str_byte_substr(
VALUE str,
long beg,
long len,
int empty)
6633 long n = RSTRING_LEN(str);
6635 if (beg > n ||
len < 0)
return Qnil;
6638 if (beg < 0)
return Qnil;
6643 if (!empty)
return Qnil;
6647 VALUE str2 = str_subseq(str, beg,
len);
6649 str_enc_copy_direct(str2, str);
6651 if (RSTRING_LEN(str2) == 0) {
6652 if (!rb_enc_asciicompat(STR_ENC_GET(str)))
6686 long beg,
len = RSTRING_LEN(str);
6694 return str_byte_substr(str, beg,
len, TRUE);
6699 return str_byte_substr(str, idx, 1, FALSE);
6746rb_str_byteslice(
int argc,
VALUE *argv,
VALUE str)
6751 return str_byte_substr(str, beg,
len, TRUE);
6754 return str_byte_aref(str, argv[0]);
6758str_check_beg_len(
VALUE str,
long *beg,
long *
len)
6760 long end, slen = RSTRING_LEN(str);
6763 if ((slen < *beg) || ((*beg < 0) && (*beg + slen < 0))) {
6772 if (*
len > slen - *beg) {
6776 str_ensure_byte_pos(str, *beg);
6777 str_ensure_byte_pos(str, end);
6802rb_str_bytesplice(
int argc,
VALUE *argv,
VALUE str)
6804 long beg,
len, vbeg, vlen;
6809 if (!(argc == 2 || argc == 3 || argc == 5)) {
6810 rb_raise(rb_eArgError,
"wrong number of arguments (given %d, expected 2, 3, or 5)", argc);
6814 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6815 rb_builtin_class_name(argv[0]));
6822 vlen = RSTRING_LEN(val);
6827 rb_raise(
rb_eTypeError,
"wrong argument type %s (expected Range)",
6828 rb_builtin_class_name(argv[2]));
6840 vlen = RSTRING_LEN(val);
6848 str_check_beg_len(str, &beg, &
len);
6849 str_check_beg_len(val, &vbeg, &vlen);
6850 str_modify_keep_cr(str);
6853 rb_enc_associate(str, rb_enc_check(str, val));
6856 rb_str_update_1(str, beg,
len, val, vbeg, vlen);
6874rb_str_reverse(
VALUE str)
6881 if (RSTRING_LEN(str) <= 1)
return str_duplicate(
rb_cString, str);
6882 enc = STR_ENC_GET(str);
6888 if (RSTRING_LEN(str) > 1) {
6889 if (single_byte_optimizable(str)) {
6896 int clen = rb_enc_fast_mbclen(s, e, enc);
6904 cr = rb_enc_asciicompat(enc) ?
6907 int clen = rb_enc_mbclen(s, e, enc);
6916 STR_SET_LEN(rev, RSTRING_LEN(str));
6917 str_enc_copy_direct(rev, str);
6937rb_str_reverse_bang(
VALUE str)
6939 if (RSTRING_LEN(str) > 1) {
6940 if (single_byte_optimizable(str)) {
6943 str_modify_keep_cr(str);
6944 s = RSTRING_PTR(str);
6953 str_shared_replace(str, rb_str_reverse(str));
6957 str_modify_keep_cr(str);
6982 i = rb_str_index(str, arg, 0);
6984 return RBOOL(i != -1);
7026 rb_raise(rb_eArgError,
"invalid radix %d", base);
7028 return rb_str_to_inum(str, base, FALSE);
7052rb_str_to_f(
VALUE str)
7067rb_str_to_s(
VALUE str)
7077str_cat_char(
VALUE str,
unsigned int c, rb_encoding *enc)
7079 char s[RUBY_MAX_CHAR_LEN];
7080 int n = rb_enc_codelen(c, enc);
7082 rb_enc_mbcput(c, s, enc);
7087#define CHAR_ESC_LEN 13
7090rb_str_buf_cat_escaped_char(
VALUE result,
unsigned int c,
int unicode_p)
7092 char buf[CHAR_ESC_LEN + 1];
7100 snprintf(buf, CHAR_ESC_LEN,
"%c", c);
7102 else if (c < 0x10000) {
7103 snprintf(buf, CHAR_ESC_LEN,
"\\u%04X", c);
7106 snprintf(buf, CHAR_ESC_LEN,
"\\u{%X}", c);
7111 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", c);
7114 snprintf(buf, CHAR_ESC_LEN,
"\\x{%X}", c);
7117 l = (int)strlen(buf);
7123ruby_escaped_char(
int c)
7126 case '\0':
return "\\0";
7127 case '\n':
return "\\n";
7128 case '\r':
return "\\r";
7129 case '\t':
return "\\t";
7130 case '\f':
return "\\f";
7131 case '\013':
return "\\v";
7132 case '\010':
return "\\b";
7133 case '\007':
return "\\a";
7134 case '\033':
return "\\e";
7135 case '\x7f':
return "\\c?";
7141rb_str_escape(
VALUE str)
7144 rb_encoding *enc = rb_enc_from_index(encidx);
7145 const char *p = RSTRING_PTR(str);
7147 const char *prev = p;
7148 char buf[CHAR_ESC_LEN + 1];
7150 int unicode_p = rb_enc_unicode_p(enc);
7151 int asciicompat = rb_enc_asciicompat(enc);
7156 int n = rb_enc_precise_mbclen(p, pend, enc);
7158 if (p > prev) str_buf_cat(result, prev, p - prev);
7159 n = rb_enc_mbminlen(enc);
7161 n = (int)(pend - p);
7163 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7164 str_buf_cat(result, buf, strlen(buf));
7170 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7172 cc = ruby_escaped_char(c);
7174 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7175 str_buf_cat(result, cc, strlen(cc));
7178 else if (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c)) {
7181 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7182 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7186 if (p > prev) str_buf_cat(result, prev, p - prev);
7209 rb_encoding *enc = rb_enc_from_index(encidx);
7210 const char *p, *pend, *prev;
7211 char buf[CHAR_ESC_LEN + 1];
7214 int unicode_p = rb_enc_unicode_p(enc);
7215 int asciicompat = rb_enc_asciicompat(enc);
7219 rb_enc_associate(result, resenc);
7220 str_buf_cat2(result,
"\"");
7228 n = rb_enc_precise_mbclen(p, pend, enc);
7230 if (p > prev) str_buf_cat(result, prev, p - prev);
7231 n = rb_enc_mbminlen(enc);
7233 n = (int)(pend - p);
7235 snprintf(buf, CHAR_ESC_LEN,
"\\x%02X", *p & 0377);
7236 str_buf_cat(result, buf, strlen(buf));
7242 c = rb_enc_mbc_to_codepoint(p, pend, enc);
7244 if ((asciicompat || unicode_p) &&
7245 (c ==
'"'|| c ==
'\\' ||
7250 (cc ==
'$' || cc ==
'@' || cc ==
'{'))))) {
7251 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7252 str_buf_cat2(result,
"\\");
7253 if (asciicompat || enc == resenc) {
7259 case '\n': cc =
'n';
break;
7260 case '\r': cc =
'r';
break;
7261 case '\t': cc =
't';
break;
7262 case '\f': cc =
'f';
break;
7263 case '\013': cc =
'v';
break;
7264 case '\010': cc =
'b';
break;
7265 case '\007': cc =
'a';
break;
7266 case 033: cc =
'e';
break;
7267 default: cc = 0;
break;
7270 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7273 str_buf_cat(result, buf, 2);
7286 (asciicompat && rb_enc_isascii(c, enc) &&
ISPRINT(c))) {
7290 if (p - n > prev) str_buf_cat(result, prev, p - n - prev);
7291 rb_str_buf_cat_escaped_char(result, c, unicode_p);
7296 if (p > prev) str_buf_cat(result, prev, p - prev);
7297 str_buf_cat2(result,
"\"");
7302#define IS_EVSTR(p,e) ((p) < (e) && (*(p) == '$' || *(p) == '@' || *(p) == '{'))
7322 int encidx = rb_enc_get_index(str);
7323 rb_encoding *enc = rb_enc_from_index(encidx);
7325 const char *p, *pend;
7329 static const char nonascii_suffix[] =
".dup.force_encoding(\"%s\")";
7332 if (!rb_enc_asciicompat(enc)) {
7334 len += strlen(enc->name);
7337 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7340 unsigned char c = *p++;
7343 case '"':
case '\\':
7344 case '\n':
case '\r':
7345 case '\t':
case '\f':
7346 case '\013':
case '\010':
case '\007':
case '\033':
7351 clen = IS_EVSTR(p, pend) ? 2 : 1;
7359 if (u8 && c > 0x7F) {
7360 int n = rb_enc_precise_mbclen(p-1, pend, enc);
7362 unsigned int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7365 else if (cc <= 0xFFFFF)
7378 if (clen > LONG_MAX -
len) {
7385 p = RSTRING_PTR(str); pend = p + RSTRING_LEN(str);
7386 q = RSTRING_PTR(result); qend = q +
len + 1;
7390 unsigned char c = *p++;
7392 if (c ==
'"' || c ==
'\\') {
7396 else if (c ==
'#') {
7397 if (IS_EVSTR(p, pend)) *q++ =
'\\';
7400 else if (c ==
'\n') {
7404 else if (c ==
'\r') {
7408 else if (c ==
'\t') {
7412 else if (c ==
'\f') {
7416 else if (c ==
'\013') {
7420 else if (c ==
'\010') {
7424 else if (c ==
'\007') {
7428 else if (c ==
'\033') {
7438 int n = rb_enc_precise_mbclen(p-1, pend, enc) - 1;
7440 int cc = rb_enc_mbc_to_codepoint(p-1, pend, enc);
7443 snprintf(q, qend-q,
"u%04X", cc);
7445 snprintf(q, qend-q,
"u{%X}", cc);
7450 snprintf(q, qend-q,
"x%02X", c);
7456 if (!rb_enc_asciicompat(enc)) {
7457 snprintf(q, qend-q, nonascii_suffix, enc->name);
7461 rb_enc_associate_index(result, encidx);
7467unescape_ascii(
unsigned int c)
7491undump_after_backslash(
VALUE undumped,
const char **ss,
const char *s_end, rb_encoding **penc,
bool *utf8,
bool *binary)
7493 const char *s = *ss;
7497 unsigned char buf[6];
7498 static rb_encoding *enc_utf8 = NULL;
7515 *buf = unescape_ascii(*s);
7528 if (*penc != enc_utf8) {
7530 rb_enc_associate(undumped, enc_utf8);
7547 if (hexlen == 0 || hexlen > 6) {
7553 if (0xd800 <= c && c <= 0xdfff) {
7556 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7566 if (0xd800 <= c && c <= 0xdfff) {
7569 codelen = rb_enc_mbcput(c, (
char *)buf, *penc);
7597static VALUE rb_str_is_ascii_only_p(
VALUE str);
7615str_undump(
VALUE str)
7617 const char *s = RSTRING_PTR(str);
7619 rb_encoding *enc = rb_enc_get(str);
7620 VALUE undumped = rb_enc_str_new(s, 0L, enc);
7622 bool binary =
false;
7626 if (rb_str_is_ascii_only_p(str) ==
Qfalse) {
7629 if (!str_null_check(str, &w)) {
7632 if (RSTRING_LEN(str) < 2)
goto invalid_format;
7633 if (*s !=
'"')
goto invalid_format;
7651 static const char force_encoding_suffix[] =
".force_encoding(\"";
7652 static const char dup_suffix[] =
".dup";
7653 const char *encname;
7658 size =
sizeof(dup_suffix) - 1;
7659 if (s_end - s > size && memcmp(s, dup_suffix, size) == 0) s += size;
7661 size =
sizeof(force_encoding_suffix) - 1;
7662 if (s_end - s <= size)
goto invalid_format;
7663 if (memcmp(s, force_encoding_suffix, size) != 0)
goto invalid_format;
7667 rb_raise(
rb_eRuntimeError,
"dumped string contained Unicode escape but used force_encoding");
7671 s = memchr(s,
'"', s_end-s);
7673 if (!s)
goto invalid_format;
7674 if (s_end - s != 2)
goto invalid_format;
7675 if (s[0] !=
'"' || s[1] !=
')')
goto invalid_format;
7677 encidx = rb_enc_find_index2(encname, (
long)size);
7681 rb_enc_associate_index(undumped, encidx);
7691 undump_after_backslash(undumped, &s, s_end, &enc, &utf8, &binary);
7702 rb_raise(
rb_eRuntimeError,
"invalid dumped string; not wrapped with '\"' nor '\"...\".force_encoding(\"...\")' form");
7706rb_str_check_dummy_enc(rb_encoding *enc)
7708 if (rb_enc_dummy_p(enc)) {
7715str_true_enc(
VALUE str)
7717 rb_encoding *enc = STR_ENC_GET(str);
7718 rb_str_check_dummy_enc(enc);
7722static OnigCaseFoldType
7723check_case_options(
int argc,
VALUE *argv, OnigCaseFoldType flags)
7728 rb_raise(rb_eArgError,
"too many options");
7729 if (argv[0]==sym_turkic) {
7730 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7732 if (argv[1]==sym_lithuanian)
7733 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7735 rb_raise(rb_eArgError,
"invalid second option");
7738 else if (argv[0]==sym_lithuanian) {
7739 flags |= ONIGENC_CASE_FOLD_LITHUANIAN;
7741 if (argv[1]==sym_turkic)
7742 flags |= ONIGENC_CASE_FOLD_TURKISH_AZERI;
7744 rb_raise(rb_eArgError,
"invalid second option");
7748 rb_raise(rb_eArgError,
"too many options");
7749 else if (argv[0]==sym_ascii)
7750 flags |= ONIGENC_CASE_ASCII_ONLY;
7751 else if (argv[0]==sym_fold) {
7752 if ((flags & (ONIGENC_CASE_UPCASE|ONIGENC_CASE_DOWNCASE)) == ONIGENC_CASE_DOWNCASE)
7753 flags ^= ONIGENC_CASE_FOLD|ONIGENC_CASE_DOWNCASE;
7755 rb_raise(rb_eArgError,
"option :fold only allowed for downcasing");
7758 rb_raise(rb_eArgError,
"invalid option");
7763case_option_single_p(OnigCaseFoldType flags, rb_encoding *enc,
VALUE str)
7771#define CASE_MAPPING_ADDITIONAL_LENGTH 20
7772#ifndef CASEMAP_DEBUG
7773# define CASEMAP_DEBUG 0
7781 OnigUChar space[FLEX_ARY_LEN];
7785mapping_buffer_free(
void *p)
7789 while (current_buffer) {
7790 previous_buffer = current_buffer;
7791 current_buffer = current_buffer->next;
7792 ruby_sized_xfree(previous_buffer, previous_buffer->capa);
7798 {0, mapping_buffer_free,},
7799 0, 0, RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_WB_PROTECTED
7803rb_str_casemap(
VALUE source, OnigCaseFoldType *flags, rb_encoding *enc)
7807 const OnigUChar *source_current, *source_end;
7808 int target_length = 0;
7809 VALUE buffer_anchor;
7812 size_t buffer_count = 0;
7813 int buffer_length_or_invalid;
7815 if (RSTRING_LEN(source) == 0)
return str_duplicate(
rb_cString, source);
7817 source_current = (OnigUChar*)RSTRING_PTR(source);
7822 while (source_current < source_end) {
7824 size_t capa = (size_t)(source_end-source_current)*++buffer_count + CASE_MAPPING_ADDITIONAL_LENGTH;
7825 if (CASEMAP_DEBUG) {
7826 fprintf(stderr,
"Buffer allocation, capa is %"PRIuSIZE
"\n",
capa);
7829 *pre_buffer = current_buffer;
7830 pre_buffer = ¤t_buffer->next;
7831 current_buffer->next = NULL;
7832 current_buffer->capa =
capa;
7833 buffer_length_or_invalid = enc->case_map(flags,
7834 &source_current, source_end,
7835 current_buffer->space,
7836 current_buffer->space+current_buffer->capa,
7838 if (buffer_length_or_invalid < 0) {
7839 current_buffer =
DATA_PTR(buffer_anchor);
7841 mapping_buffer_free(current_buffer);
7842 rb_raise(rb_eArgError,
"input string invalid");
7844 target_length += current_buffer->used = buffer_length_or_invalid;
7846 if (CASEMAP_DEBUG) {
7847 fprintf(stderr,
"Buffer count is %"PRIuSIZE
"\n", buffer_count);
7850 if (buffer_count==1) {
7851 target =
rb_str_new((
const char*)current_buffer->space, target_length);
7854 char *target_current;
7857 target_current = RSTRING_PTR(target);
7858 current_buffer =
DATA_PTR(buffer_anchor);
7859 while (current_buffer) {
7860 memcpy(target_current, current_buffer->space, current_buffer->used);
7861 target_current += current_buffer->used;
7862 current_buffer = current_buffer->next;
7865 current_buffer =
DATA_PTR(buffer_anchor);
7867 mapping_buffer_free(current_buffer);
7872 str_enc_copy_direct(target, source);
7879rb_str_ascii_casemap(
VALUE source,
VALUE target, OnigCaseFoldType *flags, rb_encoding *enc)
7881 const OnigUChar *source_current, *source_end;
7882 OnigUChar *target_current, *target_end;
7883 long old_length = RSTRING_LEN(source);
7884 int length_or_invalid;
7886 if (old_length == 0)
return Qnil;
7888 source_current = (OnigUChar*)RSTRING_PTR(source);
7890 if (source == target) {
7891 target_current = (OnigUChar*)source_current;
7892 target_end = (OnigUChar*)source_end;
7895 target_current = (OnigUChar*)RSTRING_PTR(target);
7899 length_or_invalid = onigenc_ascii_only_case_map(flags,
7900 &source_current, source_end,
7901 target_current, target_end, enc);
7902 if (length_or_invalid < 0)
7903 rb_raise(rb_eArgError,
"input string invalid");
7904 if (CASEMAP_DEBUG && length_or_invalid != old_length) {
7905 fprintf(stderr,
"problem with rb_str_ascii_casemap"
7906 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7907 rb_raise(rb_eArgError,
"internal problem with rb_str_ascii_casemap"
7908 "; old_length=%ld, new_length=%d\n", old_length, length_or_invalid);
7911 str_enc_copy(target, source);
7917upcase_single(
VALUE str)
7919 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
7920 bool modified =
false;
7923 unsigned int c = *(
unsigned char*)s;
7925 if (
'a' <= c && c <=
'z') {
7926 *s =
'A' + (c -
'a');
7954rb_str_upcase_bang(
int argc,
VALUE *argv,
VALUE str)
7957 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7959 flags = check_case_options(argc, argv, flags);
7960 str_modify_keep_cr(str);
7961 enc = str_true_enc(str);
7962 if (case_option_single_p(flags, enc, str)) {
7963 if (upcase_single(str))
7964 flags |= ONIGENC_CASE_MODIFIED;
7966 else if (flags&ONIGENC_CASE_ASCII_ONLY)
7967 rb_str_ascii_casemap(str, str, &flags, enc);
7969 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
7971 if (ONIGENC_CASE_MODIFIED&flags)
return str;
7993rb_str_upcase(
int argc,
VALUE *argv,
VALUE str)
7996 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE;
7999 flags = check_case_options(argc, argv, flags);
8000 enc = str_true_enc(str);
8001 if (case_option_single_p(flags, enc, str)) {
8002 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8003 str_enc_copy_direct(ret, str);
8006 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8008 rb_str_ascii_casemap(str, ret, &flags, enc);
8011 ret = rb_str_casemap(str, &flags, enc);
8018downcase_single(
VALUE str)
8020 char *s = RSTRING_PTR(str), *send =
RSTRING_END(str);
8021 bool modified =
false;
8024 unsigned int c = *(
unsigned char*)s;
8026 if (
'A' <= c && c <=
'Z') {
8027 *s =
'a' + (c -
'A');
8056rb_str_downcase_bang(
int argc,
VALUE *argv,
VALUE str)
8059 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8061 flags = check_case_options(argc, argv, flags);
8062 str_modify_keep_cr(str);
8063 enc = str_true_enc(str);
8064 if (case_option_single_p(flags, enc, str)) {
8065 if (downcase_single(str))
8066 flags |= ONIGENC_CASE_MODIFIED;
8068 else if (flags&ONIGENC_CASE_ASCII_ONLY)
8069 rb_str_ascii_casemap(str, str, &flags, enc);
8071 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8073 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8095rb_str_downcase(
int argc,
VALUE *argv,
VALUE str)
8098 OnigCaseFoldType flags = ONIGENC_CASE_DOWNCASE;
8101 flags = check_case_options(argc, argv, flags);
8102 enc = str_true_enc(str);
8103 if (case_option_single_p(flags, enc, str)) {
8104 ret =
rb_str_new(RSTRING_PTR(str), RSTRING_LEN(str));
8105 str_enc_copy_direct(ret, str);
8106 downcase_single(ret);
8108 else if (flags&ONIGENC_CASE_ASCII_ONLY) {
8110 rb_str_ascii_casemap(str, ret, &flags, enc);
8113 ret = rb_str_casemap(str, &flags, enc);
8141rb_str_capitalize_bang(
int argc,
VALUE *argv,
VALUE str)
8144 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8146 flags = check_case_options(argc, argv, flags);
8147 str_modify_keep_cr(str);
8148 enc = str_true_enc(str);
8149 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8150 if (flags&ONIGENC_CASE_ASCII_ONLY)
8151 rb_str_ascii_casemap(str, str, &flags, enc);
8153 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8155 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8179rb_str_capitalize(
int argc,
VALUE *argv,
VALUE str)
8182 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_TITLECASE;
8185 flags = check_case_options(argc, argv, flags);
8186 enc = str_true_enc(str);
8187 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str;
8188 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8190 rb_str_ascii_casemap(str, ret, &flags, enc);
8193 ret = rb_str_casemap(str, &flags, enc);
8220rb_str_swapcase_bang(
int argc,
VALUE *argv,
VALUE str)
8223 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8225 flags = check_case_options(argc, argv, flags);
8226 str_modify_keep_cr(str);
8227 enc = str_true_enc(str);
8228 if (flags&ONIGENC_CASE_ASCII_ONLY)
8229 rb_str_ascii_casemap(str, str, &flags, enc);
8231 str_shared_replace(str, rb_str_casemap(str, &flags, enc));
8233 if (ONIGENC_CASE_MODIFIED&flags)
return str;
8257rb_str_swapcase(
int argc,
VALUE *argv,
VALUE str)
8260 OnigCaseFoldType flags = ONIGENC_CASE_UPCASE | ONIGENC_CASE_DOWNCASE;
8263 flags = check_case_options(argc, argv, flags);
8264 enc = str_true_enc(str);
8265 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return str_duplicate(
rb_cString, str);
8266 if (flags&ONIGENC_CASE_ASCII_ONLY) {
8268 rb_str_ascii_casemap(str, ret, &flags, enc);
8271 ret = rb_str_casemap(str, &flags, enc);
8276typedef unsigned char *USTR;
8280 unsigned int now, max;
8285trnext(
struct tr *t, rb_encoding *enc)
8292 if (t->p == t->pend)
return -1;
8293 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'\\' && t->p + n < t->pend) {
8296 t->now = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8298 if (rb_enc_ascget(t->p, t->pend, &n, enc) ==
'-' && t->p + n < t->pend) {
8300 if (t->p < t->pend) {
8301 unsigned int c = rb_enc_codepoint_len(t->p, t->pend, &n, enc);
8304 if (t->now < 0x80 && c < 0x80) {
8305 rb_raise(rb_eArgError,
8306 "invalid range \"%c-%c\" in string transliteration",
8310 rb_raise(rb_eArgError,
"invalid range in string transliteration");
8314 else if (t->now < c) {
8323 while (ONIGENC_CODE_TO_MBCLEN(enc, ++t->now) <= 0) {
8324 if (t->now == t->max) {
8329 if (t->now < t->max) {
8345 const unsigned int errc = -1;
8346 unsigned int trans[256];
8347 rb_encoding *enc, *e1, *e2;
8348 struct tr trsrc, trrepl;
8350 unsigned int c, c0, last = 0;
8351 int modify = 0, i, l;
8352 unsigned char *s, *send;
8354 int singlebyte = single_byte_optimizable(str);
8358#define CHECK_IF_ASCII(c) \
8359 (void)((cr == ENC_CODERANGE_7BIT && !rb_isascii(c)) ? \
8360 (cr = ENC_CODERANGE_VALID) : 0)
8364 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8365 if (RSTRING_LEN(repl) == 0) {
8366 return rb_str_delete_bang(1, &src, str);
8370 e1 = rb_enc_check(str, src);
8371 e2 = rb_enc_check(str, repl);
8376 enc = rb_enc_check(src, repl);
8378 trsrc.p = RSTRING_PTR(src); trsrc.pend = trsrc.p + RSTRING_LEN(src);
8379 if (RSTRING_LEN(src) > 1 &&
8380 rb_enc_ascget(trsrc.p, trsrc.pend, &l, enc) ==
'^' &&
8381 trsrc.p + l < trsrc.pend) {
8385 trrepl.p = RSTRING_PTR(repl);
8386 trrepl.pend = trrepl.p + RSTRING_LEN(repl);
8387 trsrc.gen = trrepl.gen = 0;
8388 trsrc.now = trrepl.now = 0;
8389 trsrc.max = trrepl.max = 0;
8392 for (i=0; i<256; i++) {
8395 while ((c = trnext(&trsrc, enc)) != errc) {
8404 while ((c = trnext(&trrepl, enc)) != errc)
8407 for (i=0; i<256; i++) {
8408 if (trans[i] != errc) {
8416 for (i=0; i<256; i++) {
8419 while ((c = trnext(&trsrc, enc)) != errc) {
8420 r = trnext(&trrepl, enc);
8421 if (r == errc) r = trrepl.now;
8424 if (rb_enc_codelen(r, enc) != 1) singlebyte = 0;
8435 str_modify_keep_cr(str);
8436 s = (
unsigned char *)RSTRING_PTR(str); send = (
unsigned char *)
RSTRING_END(str);
8437 termlen = rb_enc_mbminlen(enc);
8440 long offset, max = RSTRING_LEN(str);
8441 unsigned int save = -1;
8442 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8447 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8450 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8453 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8455 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8464 if (cflag) c = last;
8467 else if (cflag) c = errc;
8473 if (c != (
unsigned int)-1) {
8479 tlen = rb_enc_codelen(c, enc);
8485 if (enc != e1) may_modify = 1;
8487 if ((offset = t - buf) + tlen > max) {
8488 size_t MAYBE_UNUSED(old) = max + termlen;
8489 max = offset + tlen + (send - s);
8490 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8493 rb_enc_mbcput(c, t, enc);
8494 if (may_modify && memcmp(s, t, tlen) != 0) {
8500 if (!STR_EMBED_P(str)) {
8501 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8503 TERM_FILL((
char *)t, termlen);
8504 RSTRING(str)->as.heap.ptr = (
char *)buf;
8505 STR_SET_LEN(str, t - buf);
8506 STR_SET_NOEMBED(str);
8507 RSTRING(str)->as.heap.aux.capa = max;
8511 c = (
unsigned char)*s;
8512 if (trans[c] != errc) {
8529 long offset, max = (long)((send - s) * 1.2);
8530 unsigned char *buf =
ALLOC_N(
unsigned char, max + termlen), *t = buf;
8535 int r = rb_enc_precise_mbclen((
char *)s, (
char *)send, e1);
8538 rb_raise(rb_eArgError,
"invalid byte sequence in %s", rb_enc_name(e1));
8541 c0 = c = rb_enc_mbc_to_codepoint((
char *)s, (
char *)send, e1);
8543 tlen = enc == e1 ? clen : rb_enc_codelen(c, enc);
8551 if (cflag) c = last;
8554 else if (cflag) c = errc;
8558 c = cflag ? last : errc;
8561 tlen = rb_enc_codelen(c, enc);
8566 if (enc != e1) may_modify = 1;
8568 if ((offset = t - buf) + tlen > max) {
8569 size_t MAYBE_UNUSED(old) = max + termlen;
8570 max = offset + tlen + (long)((send - s) * 1.2);
8571 SIZED_REALLOC_N(buf,
unsigned char, max + termlen, old);
8575 rb_enc_mbcput(c, t, enc);
8576 if (may_modify && memcmp(s, t, tlen) != 0) {
8584 if (!STR_EMBED_P(str)) {
8585 ruby_sized_xfree(STR_HEAP_PTR(str), STR_HEAP_SIZE(str));
8587 TERM_FILL((
char *)t, termlen);
8588 RSTRING(str)->as.heap.ptr = (
char *)buf;
8589 STR_SET_LEN(str, t - buf);
8590 STR_SET_NOEMBED(str);
8591 RSTRING(str)->as.heap.aux.capa = max;
8597 rb_enc_associate(str, enc);
8616 return tr_trans(str, src, repl, 0);
8663 tr_trans(str, src, repl, 0);
8667#define TR_TABLE_MAX (UCHAR_MAX+1)
8668#define TR_TABLE_SIZE (TR_TABLE_MAX+1)
8670tr_setup_table(
VALUE str,
char stable[TR_TABLE_SIZE],
int first,
8671 VALUE *tablep,
VALUE *ctablep, rb_encoding *enc)
8673 const unsigned int errc = -1;
8674 char buf[TR_TABLE_MAX];
8677 VALUE table = 0, ptable = 0;
8678 int i, l, cflag = 0;
8680 tr.p = RSTRING_PTR(str);
tr.pend =
tr.p + RSTRING_LEN(str);
8681 tr.gen =
tr.now =
tr.max = 0;
8683 if (RSTRING_LEN(str) > 1 && rb_enc_ascget(
tr.p,
tr.pend, &l, enc) ==
'^') {
8688 for (i=0; i<TR_TABLE_MAX; i++) {
8691 stable[TR_TABLE_MAX] = cflag;
8693 else if (stable[TR_TABLE_MAX] && !cflag) {
8694 stable[TR_TABLE_MAX] = 0;
8696 for (i=0; i<TR_TABLE_MAX; i++) {
8700 while ((c = trnext(&
tr, enc)) != errc) {
8701 if (c < TR_TABLE_MAX) {
8702 buf[(
unsigned char)c] = !cflag;
8707 if (!table && (first || *tablep || stable[TR_TABLE_MAX])) {
8719 if (table && (!ptable || (cflag ^ !
NIL_P(rb_hash_aref(ptable, key))))) {
8720 rb_hash_aset(table, key,
Qtrue);
8724 for (i=0; i<TR_TABLE_MAX; i++) {
8725 stable[i] = stable[i] && buf[i];
8727 if (!table && !cflag) {
8734tr_find(
unsigned int c,
const char table[TR_TABLE_SIZE],
VALUE del,
VALUE nodel)
8736 if (c < TR_TABLE_MAX) {
8737 return table[c] != 0;
8743 if (!
NIL_P(rb_hash_lookup(del, v)) &&
8744 (!nodel ||
NIL_P(rb_hash_lookup(nodel, v)))) {
8748 else if (nodel && !
NIL_P(rb_hash_lookup(nodel, v))) {
8751 return table[TR_TABLE_MAX] ? TRUE : FALSE;
8765rb_str_delete_bang(
int argc,
VALUE *argv,
VALUE str)
8767 char squeez[TR_TABLE_SIZE];
8768 rb_encoding *enc = 0;
8770 VALUE del = 0, nodel = 0;
8772 int i, ascompat, cr;
8774 if (RSTRING_LEN(str) == 0 || !RSTRING_PTR(str))
return Qnil;
8776 for (i=0; i<argc; i++) {
8780 enc = rb_enc_check(str, s);
8781 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8784 str_modify_keep_cr(str);
8785 ascompat = rb_enc_asciicompat(enc);
8786 s = t = RSTRING_PTR(str);
8793 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
8804 c = rb_enc_codepoint_len(s, send, &clen, enc);
8806 if (tr_find(c, squeez, del, nodel)) {
8810 if (t != s) rb_enc_mbcput(c, t, enc);
8817 TERM_FILL(t, TERM_LEN(str));
8818 STR_SET_LEN(str, t - RSTRING_PTR(str));
8821 if (modify)
return str;
8841rb_str_delete(
int argc,
VALUE *argv,
VALUE str)
8844 rb_str_delete_bang(argc, argv, str);
8858rb_str_squeeze_bang(
int argc,
VALUE *argv,
VALUE str)
8860 char squeez[TR_TABLE_SIZE];
8861 rb_encoding *enc = 0;
8862 VALUE del = 0, nodel = 0;
8863 unsigned char *s, *send, *t;
8865 int ascompat, singlebyte = single_byte_optimizable(str);
8869 enc = STR_ENC_GET(str);
8872 for (i=0; i<argc; i++) {
8876 enc = rb_enc_check(str, s);
8877 if (singlebyte && !single_byte_optimizable(s))
8879 tr_setup_table(s, squeez, i==0, &del, &nodel, enc);
8883 str_modify_keep_cr(str);
8884 s = t = (
unsigned char *)RSTRING_PTR(str);
8885 if (!s || RSTRING_LEN(str) == 0)
return Qnil;
8888 ascompat = rb_enc_asciicompat(enc);
8892 unsigned int c = *s++;
8893 if (c != save || (argc > 0 && !squeez[c])) {
8903 if (ascompat && (c = *s) < 0x80) {
8904 if (c != save || (argc > 0 && !squeez[c])) {
8910 c = rb_enc_codepoint_len((
char *)s, (
char *)send, &clen, enc);
8912 if (c != save || (argc > 0 && !tr_find(c, squeez, del, nodel))) {
8913 if (t != s) rb_enc_mbcput(c, t, enc);
8922 TERM_FILL((
char *)t, TERM_LEN(str));
8923 if ((
char *)t - RSTRING_PTR(str) != RSTRING_LEN(str)) {
8924 STR_SET_LEN(str, (
char *)t - RSTRING_PTR(str));
8928 if (modify)
return str;
8951rb_str_squeeze(
int argc,
VALUE *argv,
VALUE str)
8954 rb_str_squeeze_bang(argc, argv, str);
8972 return tr_trans(str, src, repl, 1);
8995 tr_trans(str, src, repl, 1);
9024rb_str_count(
int argc,
VALUE *argv,
VALUE str)
9026 char table[TR_TABLE_SIZE];
9027 rb_encoding *enc = 0;
9028 VALUE del = 0, nodel = 0, tstr;
9038 enc = rb_enc_check(str, tstr);
9041 if (RSTRING_LEN(tstr) == 1 && rb_enc_asciicompat(enc) &&
9042 (ptstr = RSTRING_PTR(tstr),
9043 ONIGENC_IS_ALLOWED_REVERSE_MATCH(enc, (
const unsigned char *)ptstr, (
const unsigned char *)ptstr+1)) &&
9044 !is_broken_string(str)) {
9046 unsigned char c = rb_enc_codepoint_len(ptstr, ptstr+1, &clen, enc);
9048 s = RSTRING_PTR(str);
9049 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9052 if (*(
unsigned char*)s++ == c) n++;
9058 tr_setup_table(tstr, table, TRUE, &del, &nodel, enc);
9059 for (i=1; i<argc; i++) {
9062 enc = rb_enc_check(str, tstr);
9063 tr_setup_table(tstr, table, FALSE, &del, &nodel, enc);
9066 s = RSTRING_PTR(str);
9067 if (!s || RSTRING_LEN(str) == 0)
return INT2FIX(0);
9069 ascompat = rb_enc_asciicompat(enc);
9073 if (ascompat && (c = *(
unsigned char*)s) < 0x80) {
9081 c = rb_enc_codepoint_len(s, send, &clen, enc);
9082 if (tr_find(c, table, del, nodel)) {
9093rb_fs_check(
VALUE val)
9097 if (
NIL_P(val))
return 0;
9102static const char isspacetable[256] = {
9103 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0,
9104 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9105 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9106 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9107 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9108 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9109 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9110 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9111 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9112 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9113 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9114 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9115 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9116 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9117 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9118 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
9121#define ascii_isspace(c) isspacetable[(unsigned char)(c)]
9124split_string(
VALUE result,
VALUE str,
long beg,
long len,
long empty_count)
9126 if (empty_count >= 0 &&
len == 0) {
9127 return empty_count + 1;
9129 if (empty_count > 0) {
9133 rb_ary_push(result, str_new_empty_String(str));
9134 }
while (--empty_count > 0);
9138 rb_yield(str_new_empty_String(str));
9139 }
while (--empty_count > 0);
9144 rb_ary_push(result, str);
9153 SPLIT_TYPE_AWK, SPLIT_TYPE_STRING, SPLIT_TYPE_REGEXP, SPLIT_TYPE_CHARS
9157literal_split_pattern(
VALUE spat, split_type_t default_type)
9159 rb_encoding *enc = STR_ENC_GET(spat);
9165 return SPLIT_TYPE_CHARS;
9167 else if (rb_enc_asciicompat(enc)) {
9168 if (
len == 1 && ptr[0] ==
' ') {
9169 return SPLIT_TYPE_AWK;
9174 if (rb_enc_ascget(ptr, ptr +
len, &l, enc) ==
' ' &&
len == l) {
9175 return SPLIT_TYPE_AWK;
9178 return default_type;
9191rb_str_split_m(
int argc,
VALUE *argv,
VALUE str)
9196 split_type_t split_type;
9197 long beg, end, i = 0, empty_count = -1;
9202 if (
rb_scan_args(argc, argv,
"02", &spat, &limit) == 2) {
9204 if (lim <= 0) limit =
Qnil;
9205 else if (lim == 1) {
9206 if (RSTRING_LEN(str) == 0)
9217 if (
NIL_P(limit) && !lim) empty_count = 0;
9219 enc = STR_ENC_GET(str);
9220 split_type = SPLIT_TYPE_REGEXP;
9222 spat = get_pat_quoted(spat, 0);
9225 split_type = SPLIT_TYPE_AWK;
9227 else if (!(spat = rb_fs_check(spat))) {
9228 rb_raise(
rb_eTypeError,
"value of $; must be String or Regexp");
9233 if (split_type != SPLIT_TYPE_AWK) {
9238 split_type = literal_split_pattern(tmp, SPLIT_TYPE_REGEXP);
9239 if (split_type == SPLIT_TYPE_AWK) {
9241 split_type = SPLIT_TYPE_STRING;
9246 mustnot_broken(spat);
9247 split_type = literal_split_pattern(spat, SPLIT_TYPE_STRING);
9255#define SPLIT_STR(beg, len) ( \
9256 empty_count = split_string(result, str, beg, len, empty_count), \
9257 str_mod_check(str, str_start, str_len))
9260 char *ptr = RSTRING_PTR(str);
9261 char *
const str_start = ptr;
9262 const long str_len = RSTRING_LEN(str);
9263 char *
const eptr = str_start + str_len;
9264 if (split_type == SPLIT_TYPE_AWK) {
9271 if (is_ascii_string(str)) {
9272 while (ptr < eptr) {
9273 c = (
unsigned char)*ptr++;
9275 if (ascii_isspace(c)) {
9281 if (!
NIL_P(limit) && lim <= i)
break;
9284 else if (ascii_isspace(c)) {
9285 SPLIT_STR(beg, end-beg);
9288 if (!
NIL_P(limit)) ++i;
9296 while (ptr < eptr) {
9299 c = rb_enc_codepoint_len(ptr, eptr, &n, enc);
9308 if (!
NIL_P(limit) && lim <= i)
break;
9312 SPLIT_STR(beg, end-beg);
9315 if (!
NIL_P(limit)) ++i;
9323 else if (split_type == SPLIT_TYPE_STRING) {
9324 char *substr_start = ptr;
9325 char *sptr = RSTRING_PTR(spat);
9326 long slen = RSTRING_LEN(spat);
9329 mustnot_broken(str);
9330 enc = rb_enc_check(str, spat);
9331 while (ptr < eptr &&
9332 (end =
rb_memsearch(sptr, slen, ptr, eptr - ptr, enc)) >= 0) {
9335 if (t != ptr + end) {
9339 SPLIT_STR(substr_start - str_start, (ptr+end) - substr_start);
9340 str_mod_check(spat, sptr, slen);
9343 if (!
NIL_P(limit) && lim <= ++i)
break;
9345 beg = ptr - str_start;
9347 else if (split_type == SPLIT_TYPE_CHARS) {
9350 if (result) result = rb_ary_new_capa(RSTRING_LEN(str));
9351 mustnot_broken(str);
9352 enc = rb_enc_get(str);
9353 while (ptr < eptr &&
9354 (n = rb_enc_precise_mbclen(ptr, eptr, enc)) > 0) {
9355 SPLIT_STR(ptr - str_start, n);
9357 if (!
NIL_P(limit) && lim <= ++i)
break;
9359 beg = ptr - str_start;
9363 long len = RSTRING_LEN(str);
9371 (match ? (rb_match_unbusy(match),
rb_backref_set(match)) : (void)0)) {
9376 if (start == end && BEG(0) == END(0)) {
9381 else if (last_null == 1) {
9382 SPLIT_STR(beg, rb_enc_fast_mbclen(ptr+beg, eptr, enc));
9389 start += rb_enc_fast_mbclen(ptr+start,eptr,enc);
9395 SPLIT_STR(beg, end-beg);
9396 beg = start = END(0);
9400 for (idx=1; idx < regs->num_regs; idx++) {
9401 if (BEG(idx) == -1)
continue;
9402 SPLIT_STR(BEG(idx), END(idx)-BEG(idx));
9404 if (!
NIL_P(limit) && lim <= ++i)
break;
9406 if (match) rb_match_unbusy(match);
9408 if (RSTRING_LEN(str) > 0 && (!
NIL_P(limit) || RSTRING_LEN(str) > beg || lim < 0)) {
9409 SPLIT_STR(beg, RSTRING_LEN(str)-beg);
9412 return result ? result : str;
9422 return rb_str_split_m(1, &sep, str);
9425#define WANTARRAY(m, size) (!rb_block_given_p() ? rb_ary_new_capa(size) : 0)
9431 rb_ary_push(ary, e);
9440#define ENUM_ELEM(ary, e) enumerator_element(ary, e)
9443chomp_newline(
const char *p,
const char *e, rb_encoding *enc)
9445 const char *prev = rb_enc_prev_char(p, e, e, enc);
9448 prev = rb_enc_prev_char(p, e, e, enc);
9449 if (prev && rb_enc_ascget(prev, e, NULL, enc) ==
'\r')
9461 RSTRING_LEN(rs) != 1 ||
9462 RSTRING_PTR(rs)[0] !=
'\n')) {
9468#define rb_rs get_rs()
9475 const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
9476 long pos,
len, rslen;
9482 static ID keywords[1];
9487 chomp = (!UNDEF_P(chomp) &&
RTEST(chomp));
9491 if (!ENUM_ELEM(ary, str)) {
9499 if (!RSTRING_LEN(str))
goto end;
9501 ptr = subptr = RSTRING_PTR(str);
9503 len = RSTRING_LEN(str);
9505 rslen = RSTRING_LEN(rs);
9508 enc = rb_enc_get(str);
9510 enc = rb_enc_check(str, rs);
9515 const char *eol = NULL;
9517 while (subend < pend) {
9518 long chomp_rslen = 0;
9520 if (rb_enc_ascget(subend, pend, &n, enc) !=
'\r')
9522 rslen = n + rb_enc_mbclen(subend + n, pend, enc);
9524 if (eol == subend)
break;
9528 chomp_rslen = -rslen;
9532 if (!subptr) subptr = subend;
9536 }
while (subend < pend);
9538 if (rslen == 0) chomp_rslen = 0;
9540 subend - subptr + (chomp ? chomp_rslen : rslen));
9541 if (ENUM_ELEM(ary, line)) {
9542 str_mod_check(str, ptr,
len);
9544 subptr = eol = NULL;
9549 rsptr = RSTRING_PTR(rs);
9550 if (RSTRING_LEN(rs) == rb_enc_mbminlen(enc) &&
9559 rsptr = RSTRING_PTR(rs);
9560 rslen = RSTRING_LEN(rs);
9563 while (subptr < pend) {
9564 pos =
rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
9568 if (hit != adjusted) {
9572 subend = hit += rslen;
9575 subend = chomp_newline(subptr, subend, enc);
9582 if (ENUM_ELEM(ary, line)) {
9583 str_mod_check(str, ptr,
len);
9588 if (subptr != pend) {
9591 pend = chomp_newline(subptr, pend, enc);
9593 else if (pend - subptr >= rslen &&
9594 memcmp(pend - rslen, rsptr, rslen) == 0) {
9599 ENUM_ELEM(ary, line);
9620rb_str_each_line(
int argc,
VALUE *argv,
VALUE str)
9623 return rb_str_enumerate_lines(argc, argv, str, 0);
9636rb_str_lines(
int argc,
VALUE *argv,
VALUE str)
9638 VALUE ary = WANTARRAY(
"lines", 0);
9639 return rb_str_enumerate_lines(argc, argv, str, ary);
9653 for (i=0; i<RSTRING_LEN(str); i++) {
9654 ENUM_ELEM(ary,
INT2FIX((
unsigned char)RSTRING_PTR(str)[i]));
9672rb_str_each_byte(
VALUE str)
9675 return rb_str_enumerate_bytes(str, 0);
9687rb_str_bytes(
VALUE str)
9689 VALUE ary = WANTARRAY(
"bytes", RSTRING_LEN(str));
9690 return rb_str_enumerate_bytes(str, ary);
9708 ptr = RSTRING_PTR(str);
9709 len = RSTRING_LEN(str);
9710 enc = rb_enc_get(str);
9713 for (i = 0; i <
len; i += n) {
9714 n = rb_enc_fast_mbclen(ptr + i, ptr +
len, enc);
9719 for (i = 0; i <
len; i += n) {
9720 n = rb_enc_mbclen(ptr + i, ptr +
len, enc);
9741rb_str_each_char(
VALUE str)
9744 return rb_str_enumerate_chars(str, 0);
9756rb_str_chars(
VALUE str)
9759 return rb_str_enumerate_chars(str, ary);
9763rb_str_enumerate_codepoints(
VALUE str,
VALUE ary)
9768 const char *ptr, *end;
9771 if (single_byte_optimizable(str))
9772 return rb_str_enumerate_bytes(str, ary);
9775 ptr = RSTRING_PTR(str);
9777 enc = STR_ENC_GET(str);
9780 c = rb_enc_codepoint_len(ptr, end, &n, enc);
9801rb_str_each_codepoint(
VALUE str)
9804 return rb_str_enumerate_codepoints(str, 0);
9816rb_str_codepoints(
VALUE str)
9819 return rb_str_enumerate_codepoints(str, ary);
9823get_reg_grapheme_cluster(rb_encoding *enc)
9825 int encidx = rb_enc_to_index(enc);
9827 const OnigUChar source_ascii[] =
"\\X";
9828 const OnigUChar *source = source_ascii;
9829 size_t source_len =
sizeof(source_ascii) - 1;
9832#define CHARS_16BE(x) (OnigUChar)((x)>>8), (OnigUChar)(x)
9833#define CHARS_16LE(x) (OnigUChar)(x), (OnigUChar)((x)>>8)
9834#define CHARS_32BE(x) CHARS_16BE((x)>>16), CHARS_16BE(x)
9835#define CHARS_32LE(x) CHARS_16LE(x), CHARS_16LE((x)>>16)
9836#define CASE_UTF(e) \
9837 case ENCINDEX_UTF_##e: { \
9838 static const OnigUChar source_UTF_##e[] = {CHARS_##e('\\'), CHARS_##e('X')}; \
9839 source = source_UTF_##e; \
9840 source_len = sizeof(source_UTF_##e); \
9843 CASE_UTF(16BE); CASE_UTF(16LE); CASE_UTF(32BE); CASE_UTF(32LE);
9851 regex_t *reg_grapheme_cluster;
9853 int r = onig_new(®_grapheme_cluster, source, source + source_len,
9854 ONIG_OPTION_DEFAULT, enc, OnigDefaultSyntax, &einfo);
9856 UChar message[ONIG_MAX_ERROR_MESSAGE_LEN];
9857 onig_error_code_to_str(message, r, &einfo);
9858 rb_fatal(
"cannot compile grapheme cluster regexp: %s", (
char *)message);
9861 return reg_grapheme_cluster;
9865get_cached_reg_grapheme_cluster(rb_encoding *enc)
9867 int encidx = rb_enc_to_index(enc);
9868 static regex_t *reg_grapheme_cluster_utf8 = NULL;
9871 if (!reg_grapheme_cluster_utf8) {
9872 reg_grapheme_cluster_utf8 = get_reg_grapheme_cluster(enc);
9875 return reg_grapheme_cluster_utf8;
9884 size_t grapheme_cluster_count = 0;
9885 rb_encoding *enc = get_encoding(str);
9886 const char *ptr, *end;
9888 if (!rb_enc_unicode_p(enc)) {
9892 bool cached_reg_grapheme_cluster =
true;
9893 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9894 if (!reg_grapheme_cluster) {
9895 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9896 cached_reg_grapheme_cluster =
false;
9899 ptr = RSTRING_PTR(str);
9903 OnigPosition
len = onig_match(reg_grapheme_cluster,
9904 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9905 (
const OnigUChar *)ptr, NULL, 0);
9906 if (
len <= 0)
break;
9907 grapheme_cluster_count++;
9911 if (!cached_reg_grapheme_cluster) {
9912 onig_free(reg_grapheme_cluster);
9915 return SIZET2NUM(grapheme_cluster_count);
9919rb_str_enumerate_grapheme_clusters(
VALUE str,
VALUE ary)
9922 rb_encoding *enc = get_encoding(str);
9923 const char *ptr0, *ptr, *end;
9925 if (!rb_enc_unicode_p(enc)) {
9926 return rb_str_enumerate_chars(str, ary);
9931 bool cached_reg_grapheme_cluster =
true;
9932 regex_t *reg_grapheme_cluster = get_cached_reg_grapheme_cluster(enc);
9933 if (!reg_grapheme_cluster) {
9934 reg_grapheme_cluster = get_reg_grapheme_cluster(enc);
9935 cached_reg_grapheme_cluster =
false;
9938 ptr0 = ptr = RSTRING_PTR(str);
9942 OnigPosition
len = onig_match(reg_grapheme_cluster,
9943 (
const OnigUChar *)ptr, (
const OnigUChar *)end,
9944 (
const OnigUChar *)ptr, NULL, 0);
9945 if (
len <= 0)
break;
9950 if (!cached_reg_grapheme_cluster) {
9951 onig_free(reg_grapheme_cluster);
9971rb_str_each_grapheme_cluster(
VALUE str)
9974 return rb_str_enumerate_grapheme_clusters(str, 0);
9986rb_str_grapheme_clusters(
VALUE str)
9989 return rb_str_enumerate_grapheme_clusters(str, ary);
9993chopped_length(
VALUE str)
9995 rb_encoding *enc = STR_ENC_GET(str);
9996 const char *p, *p2, *beg, *end;
9998 beg = RSTRING_PTR(str);
9999 end = beg + RSTRING_LEN(str);
10000 if (beg >= end)
return 0;
10001 p = rb_enc_prev_char(beg, end, end, enc);
10003 if (p > beg && rb_enc_ascget(p, end, 0, enc) ==
'\n') {
10004 p2 = rb_enc_prev_char(beg, p, end, enc);
10005 if (p2 && rb_enc_ascget(p2, end, 0, enc) ==
'\r') p = p2;
10021rb_str_chop_bang(
VALUE str)
10023 str_modify_keep_cr(str);
10024 if (RSTRING_LEN(str) > 0) {
10026 len = chopped_length(str);
10027 STR_SET_LEN(str,
len);
10028 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10047rb_str_chop(
VALUE str)
10053smart_chomp(
VALUE str,
const char *e,
const char *p)
10055 rb_encoding *enc = rb_enc_get(str);
10056 if (rb_enc_mbminlen(enc) > 1) {
10061 pp = e - rb_enc_mbminlen(enc);
10064 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10072 if (--e > p && *(e-1) ==
'\r') {
10089 char *pp, *e, *rsptr;
10091 char *
const p = RSTRING_PTR(str);
10092 long len = RSTRING_LEN(str);
10094 if (
len == 0)
return 0;
10097 return smart_chomp(str, e, p);
10100 enc = rb_enc_get(str);
10103 if (rb_enc_mbminlen(enc) > 1) {
10108 pp -= rb_enc_mbminlen(enc);
10111 if (rb_enc_ascget(pp, e, 0, enc) ==
'\r') {
10118 while (e > p && *(e-1) ==
'\n') {
10120 if (e > p && *(e-1) ==
'\r')
10126 if (rslen >
len)
return len;
10128 enc = rb_enc_get(rs);
10129 newline = rsptr[rslen-1];
10130 if (rslen == rb_enc_mbminlen(enc)) {
10132 if (newline ==
'\n')
10133 return smart_chomp(str, e, p);
10137 return smart_chomp(str, e, p);
10141 enc = rb_enc_check(str, rs);
10142 if (is_broken_string(rs)) {
10146 if (p[
len-1] == newline &&
10148 memcmp(rsptr, pp, rslen) == 0)) {
10149 if (at_char_boundary(p, pp, e, enc))
10150 return len - rslen;
10162chomp_rs(
int argc,
const VALUE *argv)
10166 VALUE rs = argv[0];
10178 long olen = RSTRING_LEN(str);
10179 long len = chompped_length(str, rs);
10180 if (
len >= olen)
return Qnil;
10181 str_modify_keep_cr(str);
10182 STR_SET_LEN(str,
len);
10183 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
10200rb_str_chomp_bang(
int argc,
VALUE *argv,
VALUE str)
10203 str_modifiable(str);
10204 if (RSTRING_LEN(str) == 0 && argc < 2)
return Qnil;
10205 rs = chomp_rs(argc, argv);
10207 return rb_str_chomp_string(str, rs);
10220rb_str_chomp(
int argc,
VALUE *argv,
VALUE str)
10222 VALUE rs = chomp_rs(argc, argv);
10228lstrip_offset(
VALUE str,
const char *s,
const char *e, rb_encoding *enc)
10230 const char *
const start = s;
10232 if (!s || s >= e)
return 0;
10235 if (single_byte_optimizable(str)) {
10236 while (s < e && (*s ==
'\0' || ascii_isspace(*s))) s++;
10241 unsigned int cc = rb_enc_codepoint_len(s, e, &n, enc);
10261rb_str_lstrip_bang(
VALUE str)
10265 long olen, loffset;
10267 str_modify_keep_cr(str);
10268 enc = STR_ENC_GET(str);
10270 loffset = lstrip_offset(str, start, start+olen, enc);
10272 long len = olen-loffset;
10273 s = start + loffset;
10274 memmove(start, s,
len);
10275 STR_SET_LEN(str,
len);
10276 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10299rb_str_lstrip(
VALUE str)
10304 loffset = lstrip_offset(str, start, start+
len, STR_ENC_GET(str));
10305 if (loffset <= 0)
return str_duplicate(
rb_cString, str);
10310rstrip_offset(
VALUE str,
const char *s,
const char *e, rb_encoding *enc)
10314 rb_str_check_dummy_enc(enc);
10318 if (!s || s >= e)
return 0;
10322 if (single_byte_optimizable(str)) {
10324 while (s < t && ((c = *(t-1)) ==
'\0' || ascii_isspace(c))) t--;
10329 while ((tp = rb_enc_prev_char(s, t, e, enc)) != NULL) {
10349rb_str_rstrip_bang(
VALUE str)
10353 long olen, roffset;
10355 str_modify_keep_cr(str);
10356 enc = STR_ENC_GET(str);
10358 roffset = rstrip_offset(str, start, start+olen, enc);
10360 long len = olen - roffset;
10362 STR_SET_LEN(str,
len);
10363 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10386rb_str_rstrip(
VALUE str)
10390 long olen, roffset;
10392 enc = STR_ENC_GET(str);
10394 roffset = rstrip_offset(str, start, start+olen, enc);
10396 if (roffset <= 0)
return str_duplicate(
rb_cString, str);
10412rb_str_strip_bang(
VALUE str)
10415 long olen, loffset, roffset;
10418 str_modify_keep_cr(str);
10419 enc = STR_ENC_GET(str);
10421 loffset = lstrip_offset(str, start, start+olen, enc);
10422 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10424 if (loffset > 0 || roffset > 0) {
10425 long len = olen-roffset;
10428 memmove(start, start + loffset,
len);
10430 STR_SET_LEN(str,
len);
10431 TERM_FILL(start+
len, rb_enc_mbminlen(enc));
10454rb_str_strip(
VALUE str)
10457 long olen, loffset, roffset;
10458 rb_encoding *enc = STR_ENC_GET(str);
10461 loffset = lstrip_offset(str, start, start+olen, enc);
10462 roffset = rstrip_offset(str, start+loffset, start+olen, enc);
10464 if (loffset <= 0 && roffset <= 0)
return str_duplicate(
rb_cString, str);
10469scan_once(
VALUE str,
VALUE pat,
long *start,
int set_backref_str)
10472 long end, pos = rb_pat_search(pat, str, *start, set_backref_str);
10478 end = pos + RSTRING_LEN(pat);
10488 rb_encoding *enc = STR_ENC_GET(str);
10492 if (RSTRING_LEN(str) > end)
10493 *start = end + rb_enc_fast_mbclen(RSTRING_PTR(str) + end,
10502 if (!regs || regs->num_regs == 1) {
10508 for (
int i = 1; i < regs->num_regs; i++) {
10514 rb_ary_push(result, s);
10569 long last = -1, prev = 0;
10570 char *p = RSTRING_PTR(str);
long len = RSTRING_LEN(str);
10572 pat = get_pat_quoted(pat, 1);
10573 mustnot_broken(str);
10577 while (!
NIL_P(result = scan_once(str, pat, &start, 0))) {
10580 rb_ary_push(ary, result);
10582 if (last >= 0) rb_pat_search(pat, str, last, 1);
10587 while (!
NIL_P(result = scan_once(str, pat, &start, 1))) {
10591 str_mod_check(str, p,
len);
10593 if (last >= 0) rb_pat_search(pat, str, last, 1);
10617rb_str_hex(
VALUE str)
10619 return rb_str_to_inum(str, 16, FALSE);
10644rb_str_oct(
VALUE str)
10646 return rb_str_to_inum(str, -8, FALSE);
10649#ifndef HAVE_CRYPT_R
10654 rb_nativethread_lock_t lock;
10655} crypt_mutex = {PTHREAD_MUTEX_INITIALIZER};
10658crypt_mutex_initialize(
void)
10729# define CRYPT_END() ALLOCV_END(databuf)
10731 extern char *crypt(
const char *,
const char *);
10732# define CRYPT_END() rb_nativethread_lock_unlock(&crypt_mutex.lock)
10735 const char *s, *saltp;
10738 char salt_8bit_clean[3];
10742 mustnot_wchar(str);
10743 mustnot_wchar(salt);
10745 saltp = RSTRING_PTR(salt);
10746 if (RSTRING_LEN(salt) < 2 || !saltp[0] || !saltp[1]) {
10747 rb_raise(rb_eArgError,
"salt too short (need >=2 bytes)");
10751 if (!ISASCII((
unsigned char)saltp[0]) || !ISASCII((
unsigned char)saltp[1])) {
10752 salt_8bit_clean[0] = saltp[0] & 0x7f;
10753 salt_8bit_clean[1] = saltp[1] & 0x7f;
10754 salt_8bit_clean[2] =
'\0';
10755 saltp = salt_8bit_clean;
10760# ifdef HAVE_STRUCT_CRYPT_DATA_INITIALIZED
10761 data->initialized = 0;
10763 res = crypt_r(s, saltp, data);
10765 crypt_mutex_initialize();
10767 res = crypt(s, saltp);
10808 char *ptr, *p, *pend;
10811 unsigned long sum0 = 0;
10816 ptr = p = RSTRING_PTR(str);
10817 len = RSTRING_LEN(str);
10823 str_mod_check(str, ptr,
len);
10826 sum0 += (
unsigned char)*p;
10837 if (bits < (
int)
sizeof(
long)*CHAR_BIT) {
10838 sum0 &= (((
unsigned long)1)<<bits)-1;
10858rb_str_justify(
int argc,
VALUE *argv,
VALUE str,
char jflag)
10862 long width,
len, flen = 1, fclen = 1;
10865 const char *f =
" ";
10866 long n, size, llen, rlen, llen2 = 0, rlen2 = 0;
10868 int singlebyte = 1, cr;
10872 enc = STR_ENC_GET(str);
10873 termlen = rb_enc_mbminlen(enc);
10877 enc = rb_enc_check(str, pad);
10878 f = RSTRING_PTR(pad);
10879 flen = RSTRING_LEN(pad);
10880 fclen = str_strlen(pad, enc);
10881 singlebyte = single_byte_optimizable(pad);
10882 if (flen == 0 || fclen == 0) {
10883 rb_raise(rb_eArgError,
"zero width padding");
10886 len = str_strlen(str, enc);
10887 if (width < 0 || len >= width)
return str_duplicate(
rb_cString, str);
10889 llen = (jflag ==
'l') ? 0 : ((jflag ==
'r') ? n : n/2);
10893 llen2 = str_offset(f, f + flen, llen % fclen, enc, singlebyte);
10894 rlen2 = str_offset(f, f + flen, rlen % fclen, enc, singlebyte);
10896 size = RSTRING_LEN(str);
10897 if ((
len = llen / fclen + rlen / fclen) >= LONG_MAX / flen ||
10898 (
len *= flen) >= LONG_MAX - llen2 - rlen2 ||
10899 (
len += llen2 + rlen2) >= LONG_MAX - size) {
10900 rb_raise(rb_eArgError,
"argument too big");
10904 p = RSTRING_PTR(res);
10906 memset(p, *f, llen);
10910 while (llen >= fclen) {
10916 memcpy(p, f, llen2);
10920 memcpy(p, RSTRING_PTR(str), size);
10923 memset(p, *f, rlen);
10927 while (rlen >= fclen) {
10933 memcpy(p, f, rlen2);
10937 TERM_FILL(p, termlen);
10938 STR_SET_LEN(res, p-RSTRING_PTR(res));
10961rb_str_ljust(
int argc,
VALUE *argv,
VALUE str)
10963 return rb_str_justify(argc, argv, str,
'l');
10977rb_str_rjust(
int argc,
VALUE *argv,
VALUE str)
10979 return rb_str_justify(argc, argv, str,
'r');
10994rb_str_center(
int argc,
VALUE *argv,
VALUE str)
10996 return rb_str_justify(argc, argv, str,
'c');
11012 sep = get_pat_quoted(sep, 0);
11024 pos = rb_str_index(str, sep, 0);
11025 if (pos < 0)
goto failed;
11030 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11033 return rb_ary_new3(3, str_duplicate(
rb_cString, str), str_new_empty_String(str), str_new_empty_String(str));
11047 long pos = RSTRING_LEN(str);
11049 sep = get_pat_quoted(sep, 0);
11062 pos = rb_str_rindex(str, sep, pos);
11071 RSTRING_LEN(str)-pos-RSTRING_LEN(sep)));
11073 return rb_ary_new3(3, str_new_empty_String(str), str_new_empty_String(str), str_duplicate(
rb_cString, str));
11085rb_str_start_with(
int argc,
VALUE *argv,
VALUE str)
11089 for (i=0; i<argc; i++) {
11090 VALUE tmp = argv[i];
11092 if (rb_reg_start_with_p(tmp, str))
11096 const char *p, *s, *e;
11101 enc = rb_enc_check(str, tmp);
11102 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11103 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11104 p = RSTRING_PTR(str);
11107 if (!at_char_right_boundary(p, s, e, enc))
11109 if (memcmp(p, RSTRING_PTR(tmp), tlen) == 0)
11125rb_str_end_with(
int argc,
VALUE *argv,
VALUE str)
11129 for (i=0; i<argc; i++) {
11130 VALUE tmp = argv[i];
11131 const char *p, *s, *e;
11136 enc = rb_enc_check(str, tmp);
11137 if ((tlen = RSTRING_LEN(tmp)) == 0)
return Qtrue;
11138 if ((slen = RSTRING_LEN(str)) < tlen)
continue;
11139 p = RSTRING_PTR(str);
11142 if (!at_char_boundary(p, s, e, enc))
11144 if (memcmp(s, RSTRING_PTR(tmp), tlen) == 0)
11160deleted_prefix_length(
VALUE str,
VALUE prefix)
11162 const char *strptr, *prefixptr;
11163 long olen, prefixlen;
11164 rb_encoding *enc = rb_enc_get(str);
11168 if (!is_broken_string(prefix) ||
11169 !rb_enc_asciicompat(enc) ||
11170 !rb_enc_asciicompat(rb_enc_get(prefix))) {
11171 enc = rb_enc_check(str, prefix);
11175 prefixlen = RSTRING_LEN(prefix);
11176 if (prefixlen <= 0)
return 0;
11177 olen = RSTRING_LEN(str);
11178 if (olen < prefixlen)
return 0;
11179 strptr = RSTRING_PTR(str);
11180 prefixptr = RSTRING_PTR(prefix);
11181 if (memcmp(strptr, prefixptr, prefixlen) != 0)
return 0;
11182 if (is_broken_string(prefix)) {
11183 if (!is_broken_string(str)) {
11187 const char *strend = strptr + olen;
11188 const char *after_prefix = strptr + prefixlen;
11189 if (!at_char_right_boundary(strptr, after_prefix, strend, enc)) {
11209rb_str_delete_prefix_bang(
VALUE str,
VALUE prefix)
11212 str_modify_keep_cr(str);
11214 prefixlen = deleted_prefix_length(str, prefix);
11215 if (prefixlen <= 0)
return Qnil;
11229rb_str_delete_prefix(
VALUE str,
VALUE prefix)
11233 prefixlen = deleted_prefix_length(str, prefix);
11234 if (prefixlen <= 0)
return str_duplicate(
rb_cString, str);
11236 return rb_str_subseq(str, prefixlen, RSTRING_LEN(str) - prefixlen);
11249deleted_suffix_length(
VALUE str,
VALUE suffix)
11251 const char *strptr, *suffixptr;
11252 long olen, suffixlen;
11256 if (is_broken_string(suffix))
return 0;
11257 enc = rb_enc_check(str, suffix);
11260 suffixlen = RSTRING_LEN(suffix);
11261 if (suffixlen <= 0)
return 0;
11262 olen = RSTRING_LEN(str);
11263 if (olen < suffixlen)
return 0;
11264 strptr = RSTRING_PTR(str);
11265 suffixptr = RSTRING_PTR(suffix);
11266 const char *strend = strptr + olen;
11267 const char *before_suffix = strend - suffixlen;
11268 if (memcmp(before_suffix, suffixptr, suffixlen) != 0)
return 0;
11269 if (!at_char_boundary(strptr, before_suffix, strend, enc))
return 0;
11284rb_str_delete_suffix_bang(
VALUE str,
VALUE suffix)
11286 long olen, suffixlen,
len;
11287 str_modifiable(str);
11289 suffixlen = deleted_suffix_length(str, suffix);
11290 if (suffixlen <= 0)
return Qnil;
11292 olen = RSTRING_LEN(str);
11293 str_modify_keep_cr(str);
11294 len = olen - suffixlen;
11295 STR_SET_LEN(str,
len);
11296 TERM_FILL(&RSTRING_PTR(str)[
len], TERM_LEN(str));
11312rb_str_delete_suffix(
VALUE str,
VALUE suffix)
11316 suffixlen = deleted_suffix_length(str, suffix);
11317 if (suffixlen <= 0)
return str_duplicate(
rb_cString, str);
11319 return rb_str_subseq(str, 0, RSTRING_LEN(str) - suffixlen);
11326 rb_raise(
rb_eTypeError,
"value of %"PRIsVALUE
" must be String", rb_id2str(
id));
11334 val = rb_fs_check(val);
11337 "value of %"PRIsVALUE
" must be String or Regexp",
11341 rb_warn_deprecated(
"'$;'", NULL);
11358 str_modifiable(str);
11360 rb_encoding *encoding = rb_to_encoding(enc);
11361 int idx = rb_enc_to_index(encoding);
11368 rb_enc_associate_index(str, idx);
11392 if (STR_EMBED_P(str)) {
11393 str2 = str_alloc_embed(
rb_cString, RSTRING_LEN(str) + TERM_LEN(str));
11398 str_replace_shared_without_enc(str2, str);
11400 if (rb_enc_asciicompat(STR_ENC_GET(str))) {
11433rb_str_valid_encoding_p(
VALUE str)
11453rb_str_is_ascii_only_p(
VALUE str)
11463 static const char ellipsis[] =
"...";
11464 const long ellipsislen =
sizeof(ellipsis) - 1;
11465 rb_encoding *
const enc = rb_enc_get(str);
11466 const long blen = RSTRING_LEN(str);
11467 const char *
const p = RSTRING_PTR(str), *e = p + blen;
11468 VALUE estr, ret = 0;
11471 if (
len * rb_enc_mbminlen(enc) >= blen ||
11475 else if (
len <= ellipsislen ||
11477 if (rb_enc_asciicompat(enc)) {
11479 rb_enc_associate(ret, enc);
11486 else if (ret =
rb_str_subseq(str, 0, e - p), rb_enc_asciicompat(enc)) {
11491 rb_enc_from_encoding(enc), 0,
Qnil);
11498str_compat_and_valid(
VALUE str, rb_encoding *enc)
11504 rb_raise(rb_eArgError,
"replacement must be valid byte sequence '%+"PRIsVALUE
"'", str);
11507 rb_encoding *e = STR_ENC_GET(str);
11510 rb_enc_inspect_name(enc), rb_enc_inspect_name(e));
11516static VALUE enc_str_scrub(rb_encoding *enc,
VALUE str,
VALUE repl,
int cr);
11521 rb_encoding *enc = STR_ENC_GET(str);
11526rb_enc_str_scrub(rb_encoding *enc,
VALUE str,
VALUE repl)
11529 if (enc == STR_ENC_GET(str)) {
11534 return enc_str_scrub(enc, str, repl, cr);
11538enc_str_scrub(rb_encoding *enc,
VALUE str,
VALUE repl,
int cr)
11542 const char *rep, *p, *e, *p1, *sp;
11548 rb_raise(rb_eArgError,
"both of block and replacement given");
11555 if (!
NIL_P(repl)) {
11556 repl = str_compat_and_valid(repl, enc);
11559 if (rb_enc_dummy_p(enc)) {
11562 encidx = rb_enc_to_index(enc);
11564#define DEFAULT_REPLACE_CHAR(str) do { \
11565 RBIMPL_ATTR_NONSTRING() static const char replace[sizeof(str)-1] = str; \
11566 rep = replace; replen = (int)sizeof(replace); \
11569 slen = RSTRING_LEN(str);
11570 p = RSTRING_PTR(str);
11575 if (rb_enc_asciicompat(enc)) {
11581 else if (!
NIL_P(repl)) {
11582 rep = RSTRING_PTR(repl);
11583 replen = RSTRING_LEN(repl);
11587 DEFAULT_REPLACE_CHAR(
"\xEF\xBF\xBD");
11591 DEFAULT_REPLACE_CHAR(
"?");
11596 p = search_nonascii(p, e);
11601 int ret = rb_enc_precise_mbclen(p, e, enc);
11620 if (e - p < clen) clen = e - p;
11627 for (; clen > 1; clen--) {
11628 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11639 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11640 str_mod_check(str, sp, slen);
11641 repl = str_compat_and_valid(repl, enc);
11648 p = search_nonascii(p, e);
11674 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11675 str_mod_check(str, sp, slen);
11676 repl = str_compat_and_valid(repl, enc);
11685 long mbminlen = rb_enc_mbminlen(enc);
11689 else if (!
NIL_P(repl)) {
11690 rep = RSTRING_PTR(repl);
11691 replen = RSTRING_LEN(repl);
11693 else if (encidx == ENCINDEX_UTF_16BE) {
11694 DEFAULT_REPLACE_CHAR(
"\xFF\xFD");
11696 else if (encidx == ENCINDEX_UTF_16LE) {
11697 DEFAULT_REPLACE_CHAR(
"\xFD\xFF");
11699 else if (encidx == ENCINDEX_UTF_32BE) {
11700 DEFAULT_REPLACE_CHAR(
"\x00\x00\xFF\xFD");
11702 else if (encidx == ENCINDEX_UTF_32LE) {
11703 DEFAULT_REPLACE_CHAR(
"\xFD\xFF\x00\x00");
11706 DEFAULT_REPLACE_CHAR(
"?");
11710 int ret = rb_enc_precise_mbclen(p, e, enc);
11723 if (e - p < clen) clen = e - p;
11724 if (clen <= mbminlen * 2) {
11729 for (; clen > mbminlen; clen-=mbminlen) {
11730 ret = rb_enc_precise_mbclen(q, q + clen, enc);
11740 repl =
rb_yield(rb_enc_str_new(p, clen, enc));
11741 str_mod_check(str, sp, slen);
11742 repl = str_compat_and_valid(repl, enc);
11767 repl =
rb_yield(rb_enc_str_new(p, e-p, enc));
11768 str_mod_check(str, sp, slen);
11769 repl = str_compat_and_valid(repl, enc);
11805str_scrub_bang(
int argc,
VALUE *argv,
VALUE str)
11813static ID id_normalize;
11814static ID id_normalized_p;
11815static VALUE mUnicodeNormalize;
11818unicode_normalize_common(
int argc,
VALUE *argv,
VALUE str,
ID id)
11820 static int UnicodeNormalizeRequired = 0;
11823 if (!UnicodeNormalizeRequired) {
11824 rb_require(
"unicode_normalize/normalize.rb");
11825 UnicodeNormalizeRequired = 1;
11829 return rb_funcallv(mUnicodeNormalize,
id, argc+1, argv2);
11866rb_str_unicode_normalize(
int argc,
VALUE *argv,
VALUE str)
11868 return unicode_normalize_common(argc, argv, str, id_normalize);
11882rb_str_unicode_normalize_bang(
int argc,
VALUE *argv,
VALUE str)
11884 return rb_str_replace(str, unicode_normalize_common(argc, argv, str, id_normalize));
11911rb_str_unicode_normalized_p(
int argc,
VALUE *argv,
VALUE str)
11913 return unicode_normalize_common(argc, argv, str, id_normalized_p);
12045#define sym_equal rb_obj_equal
12048sym_printable(
const char *s,
const char *send, rb_encoding *enc)
12052 int c = rb_enc_precise_mbclen(s, send, enc);
12056 c = rb_enc_mbc_to_codepoint(s, send, enc);
12064rb_str_symname_p(
VALUE sym)
12072 enc = STR_ENC_GET(sym);
12073 ptr = RSTRING_PTR(sym);
12074 len = RSTRING_LEN(sym);
12075 if ((resenc != enc && !rb_str_is_ascii_only_p(sym)) ||
len != (
long)strlen(ptr) ||
12083rb_str_quote_unprintable(
VALUE str)
12088 rb_encoding *resenc;
12093 enc = STR_ENC_GET(str);
12094 ptr = RSTRING_PTR(str);
12095 len = RSTRING_LEN(str);
12096 if ((resenc != enc && !rb_str_is_ascii_only_p(str)) ||
12097 !sym_printable(ptr, ptr +
len, enc)) {
12098 return rb_str_escape(str);
12104rb_id_quote_unprintable(
ID id)
12106 VALUE str = rb_id2str(
id);
12107 if (!rb_str_symname_p(str)) {
12108 return rb_str_escape(str);
12126sym_inspect(
VALUE sym)
12133 if (!rb_str_symname_p(str)) {
12135 len = RSTRING_LEN(str);
12136 rb_str_resize(str,
len + 1);
12137 dest = RSTRING_PTR(str);
12138 memmove(dest + 1, dest,
len);
12141 rb_encoding *enc = STR_ENC_GET(str);
12142 VALUE orig_str = str;
12144 len = RSTRING_LEN(orig_str);
12145 str = rb_enc_str_new(0,
len + 1, enc);
12148 ptr = RSTRING_PTR(orig_str);
12149 dest = RSTRING_PTR(str);
12150 memcpy(dest + 1, ptr,
len);
12170rb_sym_proc_call(
ID mid,
int argc,
const VALUE *argv,
int kw_splat,
VALUE passed_proc)
12175 rb_raise(rb_eArgError,
"no receiver given");
12272 return rb_str_match(
rb_sym2str(sym), other);
12287sym_match_m(
int argc,
VALUE *argv,
VALUE sym)
12289 return rb_str_match_m(argc, argv,
rb_sym2str(sym));
12302sym_match_m_p(
int argc,
VALUE *argv,
VALUE sym)
12304 return rb_str_match_m_p(argc, argv, sym);
12322 return rb_str_aref_m(argc, argv,
rb_sym2str(sym));
12333sym_length(
VALUE sym)
12347sym_empty(
VALUE sym)
12381sym_downcase(
int argc,
VALUE *argv,
VALUE sym)
12397sym_capitalize(
int argc,
VALUE *argv,
VALUE sym)
12413sym_swapcase(
int argc,
VALUE *argv,
VALUE sym)
12427sym_start_with(
int argc,
VALUE *argv,
VALUE sym)
12429 return rb_str_start_with(argc, argv,
rb_sym2str(sym));
12442sym_end_with(
int argc,
VALUE *argv,
VALUE sym)
12444 return rb_str_end_with(argc, argv,
rb_sym2str(sym));
12456sym_encoding(
VALUE sym)
12462string_for_symbol(
VALUE name)
12467 rb_raise(
rb_eTypeError,
"%+"PRIsVALUE
" is not a symbol nor a string",
12481 name = string_for_symbol(name);
12482 return rb_intern_str(name);
12491 name = string_for_symbol(name);
12515 return rb_fstring(str);
12521 struct RString fake_str = {RBASIC_INIT};
12522 return register_fstring(setup_fake_str(&fake_str, ptr,
len, ENCINDEX_US_ASCII),
true,
false);
12534 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12535 rb_enc_autoload(enc);
12538 struct RString fake_str = {RBASIC_INIT};
12539 return register_fstring(rb_setup_fake_str(&fake_str, ptr,
len, enc),
true,
false);
12543rb_enc_literal_str(
const char *ptr,
long len, rb_encoding *enc)
12545 if (enc != NULL && UNLIKELY(rb_enc_autoload_p(enc))) {
12546 rb_enc_autoload(enc);
12549 struct RString fake_str = {RBASIC_INIT};
12550 return register_fstring(rb_setup_fake_str(&fake_str, ptr,
len, enc),
true,
true);
12561rb_yjit_str_concat_codepoint(
VALUE str,
VALUE codepoint)
12566 if (RB_LIKELY(code >= 0 && code < 0xff)) {
12567 rb_str_buf_cat_byte(str, (
char) code);
12747 rb_gc_register_address(&
rb_fs);
#define RUBY_ASSERT_ALWAYS(expr,...)
A variant of RUBY_ASSERT that does not interface with RUBY_DEBUG.
#define RUBY_ASSERT_BUILTIN_TYPE(obj, type)
A variant of RUBY_ASSERT that asserts when either RUBY_DEBUG or built-in type of obj is type.
#define RUBY_ASSERT(...)
Asserts that the given expression is truthy if and only if RUBY_DEBUG is truthy.
@ RUBY_ENC_CODERANGE_7BIT
The object holds 0 to 127 inclusive and nothing else.
static enum ruby_coderange_type RB_ENC_CODERANGE_AND(enum ruby_coderange_type a, enum ruby_coderange_type b)
"Mix" two code ranges into one.
#define ISSPACE
@old{rb_isspace}
static int rb_isspace(int c)
Our own locale-insensitive version of isspace(3).
#define ISDIGIT
@old{rb_isdigit}
#define ISALPHA
@old{rb_isalpha}
#define TOLOWER
@old{rb_tolower}
#define ISPRINT
@old{rb_isprint}
#define rb_define_method(klass, mid, func, arity)
Defines klass#mid.
#define rb_define_singleton_method(klass, mid, func, arity)
Defines klass.mid.
static bool rb_enc_is_newline(const char *p, const char *e, rb_encoding *enc)
Queries if the passed pointer points to a newline character.
static bool rb_enc_isprint(OnigCodePoint c, rb_encoding *enc)
Identical to rb_isprint(), except it additionally takes an encoding.
static bool rb_enc_isctype(OnigCodePoint c, OnigCtype t, rb_encoding *enc)
Queries if the passed code point is of passed character type in the passed encoding.
VALUE rb_enc_sprintf(rb_encoding *enc, const char *fmt,...)
Identical to rb_sprintf(), except it additionally takes an encoding.
static VALUE RB_OBJ_FROZEN_RAW(VALUE obj)
This is an implementation detail of RB_OBJ_FROZEN().
static VALUE RB_FL_TEST_RAW(VALUE obj, VALUE flags)
This is an implementation detail of RB_FL_TEST().
@ RUBY_FL_FREEZE
This flag has something to do with data immutability.
void rb_include_module(VALUE klass, VALUE module)
Includes a module to a class.
VALUE rb_define_class(const char *name, VALUE super)
Defines a top-level class.
VALUE rb_define_module(const char *name)
Defines a top-level module.
void rb_define_alias(VALUE klass, const char *name1, const char *name2)
Defines an alias of a method.
void rb_undef_method(VALUE klass, const char *name)
Defines an undef of a method.
int rb_scan_args(int argc, const VALUE *argv, const char *fmt,...)
Retrieves argument from argc and argv to given VALUE references according to the format string.
int rb_block_given_p(void)
Determines if the current method is given a block.
int rb_get_kwargs(VALUE keyword_hash, const ID *table, int required, int optional, VALUE *values)
Keyword argument deconstructor.
#define TYPE(_)
Old name of rb_type.
#define ENCODING_SET_INLINED(obj, i)
Old name of RB_ENCODING_SET_INLINED.
#define RB_INTEGER_TYPE_P
Old name of rb_integer_type_p.
#define ENC_CODERANGE_7BIT
Old name of RUBY_ENC_CODERANGE_7BIT.
#define ENC_CODERANGE_VALID
Old name of RUBY_ENC_CODERANGE_VALID.
#define FL_UNSET_RAW
Old name of RB_FL_UNSET_RAW.
#define rb_str_buf_cat2
Old name of rb_usascii_str_new_cstr.
#define FL_EXIVAR
Old name of RUBY_FL_EXIVAR.
#define ALLOCV
Old name of RB_ALLOCV.
#define T_STRING
Old name of RUBY_T_STRING.
#define ENC_CODERANGE_CLEAN_P(cr)
Old name of RB_ENC_CODERANGE_CLEAN_P.
#define ENC_CODERANGE_AND(a, b)
Old name of RB_ENC_CODERANGE_AND.
#define xfree
Old name of ruby_xfree.
#define Qundef
Old name of RUBY_Qundef.
#define INT2FIX
Old name of RB_INT2FIX.
#define OBJ_FROZEN
Old name of RB_OBJ_FROZEN.
#define rb_str_cat2
Old name of rb_str_cat_cstr.
#define UNREACHABLE
Old name of RBIMPL_UNREACHABLE.
#define ID2SYM
Old name of RB_ID2SYM.
#define T_BIGNUM
Old name of RUBY_T_BIGNUM.
#define OBJ_FREEZE
Old name of RB_OBJ_FREEZE.
#define T_FIXNUM
Old name of RUBY_T_FIXNUM.
#define UNREACHABLE_RETURN
Old name of RBIMPL_UNREACHABLE_RETURN.
#define SYM2ID
Old name of RB_SYM2ID.
#define ENC_CODERANGE(obj)
Old name of RB_ENC_CODERANGE.
#define CLASS_OF
Old name of rb_class_of.
#define ENC_CODERANGE_UNKNOWN
Old name of RUBY_ENC_CODERANGE_UNKNOWN.
#define SIZET2NUM
Old name of RB_SIZE2NUM.
#define FIXABLE
Old name of RB_FIXABLE.
#define xmalloc
Old name of ruby_xmalloc.
#define ENCODING_GET(obj)
Old name of RB_ENCODING_GET.
#define LONG2FIX
Old name of RB_INT2FIX.
#define ENC_CODERANGE_MASK
Old name of RUBY_ENC_CODERANGE_MASK.
#define ZALLOC_N
Old name of RB_ZALLOC_N.
#define ALLOC_N
Old name of RB_ALLOC_N.
#define MBCLEN_CHARFOUND_LEN(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_LEN.
#define FL_TEST_RAW
Old name of RB_FL_TEST_RAW.
#define FL_SET
Old name of RB_FL_SET.
#define rb_ary_new3
Old name of rb_ary_new_from_args.
#define ENCODING_INLINE_MAX
Old name of RUBY_ENCODING_INLINE_MAX.
#define LONG2NUM
Old name of RB_LONG2NUM.
#define FL_ANY_RAW
Old name of RB_FL_ANY_RAW.
#define MBCLEN_INVALID_P(ret)
Old name of ONIGENC_MBCLEN_INVALID_P.
#define Qtrue
Old name of RUBY_Qtrue.
#define ST2FIX
Old name of RB_ST2FIX.
#define MBCLEN_NEEDMORE_P(ret)
Old name of ONIGENC_MBCLEN_NEEDMORE_P.
#define FIXNUM_MAX
Old name of RUBY_FIXNUM_MAX.
#define NUM2INT
Old name of RB_NUM2INT.
#define Qnil
Old name of RUBY_Qnil.
#define Qfalse
Old name of RUBY_Qfalse.
#define FIX2LONG
Old name of RB_FIX2LONG.
#define ENC_CODERANGE_BROKEN
Old name of RUBY_ENC_CODERANGE_BROKEN.
#define scan_hex(s, l, e)
Old name of ruby_scan_hex.
#define NIL_P
Old name of RB_NIL_P.
#define ALLOCV_N
Old name of RB_ALLOCV_N.
#define MBCLEN_CHARFOUND_P(ret)
Old name of ONIGENC_MBCLEN_CHARFOUND_P.
#define FL_WB_PROTECTED
Old name of RUBY_FL_WB_PROTECTED.
#define DBL2NUM
Old name of rb_float_new.
#define BUILTIN_TYPE
Old name of RB_BUILTIN_TYPE.
#define ENCODING_SHIFT
Old name of RUBY_ENCODING_SHIFT.
#define FL_TEST
Old name of RB_FL_TEST.
#define FL_FREEZE
Old name of RUBY_FL_FREEZE.
#define NUM2LONG
Old name of RB_NUM2LONG.
#define ENCODING_GET_INLINED(obj)
Old name of RB_ENCODING_GET_INLINED.
#define ENC_CODERANGE_CLEAR(obj)
Old name of RB_ENC_CODERANGE_CLEAR.
#define FL_UNSET
Old name of RB_FL_UNSET.
#define UINT2NUM
Old name of RB_UINT2NUM.
#define ENCODING_IS_ASCII8BIT(obj)
Old name of RB_ENCODING_IS_ASCII8BIT.
#define FIXNUM_P
Old name of RB_FIXNUM_P.
#define CONST_ID
Old name of RUBY_CONST_ID.
#define rb_ary_new2
Old name of rb_ary_new_capa.
#define ENC_CODERANGE_SET(obj, cr)
Old name of RB_ENC_CODERANGE_SET.
#define ENCODING_CODERANGE_SET(obj, encindex, cr)
Old name of RB_ENCODING_CODERANGE_SET.
#define FL_SET_RAW
Old name of RB_FL_SET_RAW.
#define SYMBOL_P
Old name of RB_SYMBOL_P.
#define OBJ_FROZEN_RAW
Old name of RB_OBJ_FROZEN_RAW.
#define T_REGEXP
Old name of RUBY_T_REGEXP.
#define ENCODING_MASK
Old name of RUBY_ENCODING_MASK.
void rb_category_warn(rb_warning_category_t category, const char *fmt,...)
Identical to rb_category_warning(), except it reports unless $VERBOSE is nil.
void rb_syserr_fail(int e, const char *mesg)
Raises appropriate exception that represents a C errno.
VALUE rb_eRangeError
RangeError exception.
VALUE rb_eTypeError
TypeError exception.
VALUE rb_eEncCompatError
Encoding::CompatibilityError exception.
VALUE rb_eRuntimeError
RuntimeError exception.
VALUE rb_eIndexError
IndexError exception.
@ RB_WARN_CATEGORY_DEPRECATED
Warning is for deprecated features.
VALUE rb_any_to_s(VALUE obj)
Generates a textual representation of the given object.
VALUE rb_obj_alloc(VALUE klass)
Allocates an instance of the given class.
VALUE rb_class_new_instance_pass_kw(int argc, const VALUE *argv, VALUE klass)
Identical to rb_class_new_instance(), except it passes the passed keywords if any to the #initialize ...
VALUE rb_obj_frozen_p(VALUE obj)
Just calls RB_OBJ_FROZEN() inside.
double rb_str_to_dbl(VALUE str, int mode)
Identical to rb_cstr_to_dbl(), except it accepts a Ruby's string instead of C's.
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
VALUE rb_obj_dup(VALUE obj)
Duplicates the given object.
VALUE rb_cSymbol
Symbol class.
VALUE rb_equal(VALUE lhs, VALUE rhs)
This function is an optimised version of calling #==.
VALUE rb_obj_freeze(VALUE obj)
Just calls rb_obj_freeze_inline() inside.
VALUE rb_mComparable
Comparable module.
VALUE rb_cString
String class.
VALUE rb_to_int(VALUE val)
Identical to rb_check_to_int(), except it raises in case of conversion mismatch.
#define RB_OBJ_WRITE(old, slot, young)
Declaration of a "back" pointer.
rb_encoding * rb_utf8_encoding(void)
Queries the encoding that represents UTF-8.
rb_encoding * rb_ascii8bit_encoding(void)
Queries the encoding that represents ASCII-8BIT a.k.a.
rb_encoding * rb_filesystem_encoding(void)
Queries the "filesystem" encoding.
rb_encoding * rb_default_internal_encoding(void)
Queries the "default internal" encoding.
static char * rb_enc_left_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the left boundary of a character.
int rb_utf8_encindex(void)
Identical to rb_utf8_encoding(), except it returns the encoding's index instead of the encoding itsel...
int rb_ascii8bit_encindex(void)
Identical to rb_ascii8bit_encoding(), except it returns the encoding's index instead of the encoding ...
static char * rb_enc_right_char_head(const char *s, const char *p, const char *e, rb_encoding *enc)
Queries the right boundary of a character.
static unsigned int rb_enc_codepoint(const char *p, const char *e, rb_encoding *enc)
Queries the code point of character pointed by the passed pointer.
rb_encoding * rb_default_external_encoding(void)
Queries the "default external" encoding.
static int rb_enc_mbmaxlen(rb_encoding *enc)
Queries the maximum number of bytes that the passed encoding needs to represent a character.
static int RB_ENCODING_GET_INLINED(VALUE obj)
Queries the encoding of the passed object.
rb_encoding * rb_locale_encoding(void)
Queries the encoding that represents the current locale.
rb_encoding * rb_usascii_encoding(void)
Queries the encoding that represents US-ASCII.
static int rb_enc_code_to_mbclen(int c, rb_encoding *enc)
Identical to rb_enc_codelen(), except it returns 0 for invalid code points.
static char * rb_enc_step_back(const char *s, const char *p, const char *e, int n, rb_encoding *enc)
Scans the string backwards for n characters.
int rb_usascii_encindex(void)
Identical to rb_usascii_encoding(), except it returns the encoding's index instead of the encoding it...
VALUE rb_str_conv_enc(VALUE str, rb_encoding *from, rb_encoding *to)
Encoding conversion main routine.
int rb_enc_str_coderange(VALUE str)
Scans the passed string to collect its code range.
VALUE rb_enc_str_new_static(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it takes a C string literal.
char * rb_enc_nth(const char *head, const char *tail, long nth, rb_encoding *enc)
Queries the n-th character.
VALUE rb_str_conv_enc_opts(VALUE str, rb_encoding *from, rb_encoding *to, int ecflags, VALUE ecopts)
Identical to rb_str_conv_enc(), except it additionally takes IO encoder options.
VALUE rb_enc_interned_str(const char *ptr, long len, rb_encoding *enc)
Identical to rb_enc_str_new(), except it returns a "f"string.
long rb_memsearch(const void *x, long m, const void *y, long n, rb_encoding *enc)
Looks for the passed string in the passed buffer.
long rb_enc_strlen(const char *head, const char *tail, rb_encoding *enc)
Counts the number of characters of the passed string, according to the passed encoding.
VALUE rb_enc_str_buf_cat(VALUE str, const char *ptr, long len, rb_encoding *enc)
Identical to rb_str_cat(), except it additionally takes an encoding.
VALUE rb_enc_str_new_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_export_to_enc(VALUE obj, rb_encoding *enc)
Identical to rb_str_export(), except it additionally takes an encoding.
VALUE rb_external_str_new_with_enc(const char *ptr, long len, rb_encoding *enc)
Identical to rb_external_str_new(), except it additionally takes an encoding.
int rb_enc_str_asciionly_p(VALUE str)
Queries if the passed string is "ASCII only".
VALUE rb_enc_interned_str_cstr(const char *ptr, rb_encoding *enc)
Identical to rb_enc_str_new_cstr(), except it returns a "f"string.
long rb_str_coderange_scan_restartable(const char *str, const char *end, rb_encoding *enc, int *cr)
Scans the passed string until it finds something odd.
int rb_enc_symname2_p(const char *name, long len, rb_encoding *enc)
Identical to rb_enc_symname_p(), except it additionally takes the passed string's length.
rb_econv_result_t rb_econv_convert(rb_econv_t *ec, const unsigned char **source_buffer_ptr, const unsigned char *source_buffer_end, unsigned char **destination_buffer_ptr, unsigned char *destination_buffer_end, int flags)
Converts a string from an encoding to another.
rb_econv_result_t
return value of rb_econv_convert()
@ econv_finished
The conversion stopped after converting everything.
@ econv_destination_buffer_full
The conversion stopped because there is no destination.
rb_econv_t * rb_econv_open_opts(const char *source_encoding, const char *destination_encoding, int ecflags, VALUE ecopts)
Identical to rb_econv_open(), except it additionally takes a hash of optional strings.
VALUE rb_str_encode(VALUE str, VALUE to, int ecflags, VALUE ecopts)
Converts the contents of the passed string from its encoding to the passed one.
void rb_econv_close(rb_econv_t *ec)
Destructs a converter.
VALUE rb_funcall(VALUE recv, ID mid, int n,...)
Calls a method.
VALUE rb_funcall_with_block_kw(VALUE recv, ID mid, int argc, const VALUE *argv, VALUE procval, int kw_splat)
Identical to rb_funcallv_with_block(), except you can specify how to handle the last element of the g...
#define RGENGC_WB_PROTECTED_STRING
This is a compile-time flag to enable/disable write barrier for struct RString.
VALUE rb_ary_new(void)
Allocates a new, empty array.
#define RETURN_SIZED_ENUMERATOR(obj, argc, argv, size_fn)
This roughly resembles return enum_for(__callee__) unless block_given?.
#define RETURN_ENUMERATOR(obj, argc, argv)
Identical to RETURN_SIZED_ENUMERATOR(), except its size is unknown.
#define UNLIMITED_ARGUMENTS
This macro is used in conjunction with rb_check_arity().
static int rb_check_arity(int argc, int min, int max)
Ensures that the passed integer is in the passed range.
VALUE rb_hash_new(void)
Creates a new, empty hash object.
VALUE rb_fs
The field separator character for inputs, or the $;.
VALUE rb_default_rs
This is the default value of rb_rs, i.e.
VALUE rb_backref_get(void)
Queries the last match, or Regexp.last_match, or the $~.
VALUE rb_sym_all_symbols(void)
Collects every single bits of symbols that have ever interned in the entire history of the current pr...
void rb_backref_set(VALUE md)
Updates $~.
VALUE rb_range_beg_len(VALUE range, long *begp, long *lenp, long len, int err)
Deconstructs a numerical range.
int rb_reg_backref_number(VALUE match, VALUE backref)
Queries the index of the given named capture.
int rb_reg_options(VALUE re)
Queries the options of the passed regular expression.
VALUE rb_reg_match(VALUE re, VALUE str)
This is the match operator.
void rb_match_busy(VALUE md)
Asserts that the given MatchData is "occupied".
VALUE rb_reg_nth_match(int n, VALUE md)
Queries the nth captured substring.
void rb_str_free(VALUE str)
Destroys the given string for no reason.
VALUE rb_str_new_shared(VALUE str)
Identical to rb_str_new_cstr(), except it takes a Ruby's string instead of C's.
VALUE rb_str_plus(VALUE lhs, VALUE rhs)
Generates a new string, concatenating the former to the latter.
#define rb_utf8_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "UTF-8" encoding.
#define rb_hash_end(h)
Just another name of st_hash_end.
#define rb_hash_uint32(h, i)
Just another name of st_hash_uint32.
VALUE rb_str_append(VALUE dst, VALUE src)
Identical to rb_str_buf_append(), except it converts the right hand side before concatenating.
VALUE rb_filesystem_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "filesystem" encoding.
VALUE rb_sym_to_s(VALUE sym)
This is an rb_sym2str() + rb_str_dup() combo.
VALUE rb_str_times(VALUE str, VALUE num)
Repetition of a string.
VALUE rb_external_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "defaultexternal" encoding.
VALUE rb_str_tmp_new(long len)
Allocates a "temporary" string.
long rb_str_offset(VALUE str, long pos)
"Inverse" of rb_str_sublen().
VALUE rb_str_succ(VALUE orig)
Searches for the "successor" of a string.
int rb_str_hash_cmp(VALUE str1, VALUE str2)
Compares two strings.
VALUE rb_str_subseq(VALUE str, long beg, long len)
Identical to rb_str_substr(), except the numbers are interpreted as byte offsets instead of character...
VALUE rb_str_ellipsize(VALUE str, long len)
Shortens str and adds three dots, an ellipsis, if it is longer than len characters.
st_index_t rb_memhash(const void *ptr, long len)
This is a universal hash function.
#define rb_str_new(str, len)
Allocates an instance of rb_cString.
void rb_str_shared_replace(VALUE dst, VALUE src)
Replaces the contents of the former with the latter.
#define rb_str_buf_cat
Just another name of rb_str_cat.
VALUE rb_str_new_static(const char *ptr, long len)
Identical to rb_str_new(), except it takes a C string literal.
#define rb_usascii_str_new(str, len)
Identical to rb_str_new, except it generates a string of "US ASCII" encoding.
size_t rb_str_capacity(VALUE str)
Queries the capacity of the given string.
VALUE rb_str_new_frozen(VALUE str)
Creates a frozen copy of the string, if necessary.
VALUE rb_str_dup(VALUE str)
Duplicates a string.
st_index_t rb_str_hash(VALUE str)
Calculates a hash value of a string.
VALUE rb_str_cat(VALUE dst, const char *src, long srclen)
Destructively appends the passed contents to the string.
VALUE rb_str_locktmp(VALUE str)
Obtains a "temporary lock" of the string.
long rb_str_strlen(VALUE str)
Counts the number of characters (not bytes) that are stored inside of the given string.
VALUE rb_str_resurrect(VALUE str)
I guess there is no use case of this function in extension libraries, but this is a routine identical...
#define rb_str_buf_new_cstr(str)
Identical to rb_str_new_cstr, except done differently.
#define rb_usascii_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "US ASCII" encoding.
VALUE rb_str_replace(VALUE dst, VALUE src)
Replaces the contents of the former object with the stringised contents of the latter.
char * rb_str_subpos(VALUE str, long beg, long *len)
Identical to rb_str_substr(), except it returns a C's string instead of Ruby's.
rb_gvar_setter_t rb_str_setter
This is a rb_gvar_setter_t that refutes non-string assignments.
VALUE rb_interned_str_cstr(const char *ptr)
Identical to rb_interned_str(), except it assumes the passed pointer is a pointer to a C's string.
VALUE rb_filesystem_str_new_cstr(const char *ptr)
Identical to rb_filesystem_str_new(), except it assumes the passed pointer is a pointer to a C string...
#define rb_external_str_new_cstr(str)
Identical to rb_str_new_cstr, except it generates a string of "defaultexternal" encoding.
VALUE rb_str_buf_append(VALUE dst, VALUE src)
Identical to rb_str_cat_cstr(), except it takes Ruby's string instead of C's.
long rb_str_sublen(VALUE str, long pos)
Byte offset to character offset conversion.
VALUE rb_str_equal(VALUE str1, VALUE str2)
Equality of two strings.
void rb_str_set_len(VALUE str, long len)
Overwrites the length of the string.
VALUE rb_str_inspect(VALUE str)
Generates a "readable" version of the receiver.
void rb_must_asciicompat(VALUE obj)
Asserts that the given string's encoding is (Ruby's definition of) ASCII compatible.
VALUE rb_interned_str(const char *ptr, long len)
Identical to rb_str_new(), except it returns an infamous "f"string.
int rb_str_cmp(VALUE lhs, VALUE rhs)
Compares two strings, as in strcmp(3).
VALUE rb_str_concat(VALUE dst, VALUE src)
Identical to rb_str_append(), except it also accepts an integer as a codepoint.
int rb_str_comparable(VALUE str1, VALUE str2)
Checks if two strings are comparable each other or not.
#define rb_strlen_lit(str)
Length of a string literal.
VALUE rb_str_buf_cat_ascii(VALUE dst, const char *src)
Identical to rb_str_cat_cstr(), except it additionally assumes the source string be a NUL terminated ...
VALUE rb_str_freeze(VALUE str)
This is the implementation of String#freeze.
void rb_str_update(VALUE dst, long beg, long len, VALUE src)
Replaces some (or all) of the contents of the given string.
VALUE rb_str_scrub(VALUE str, VALUE repl)
"Cleanses" the string.
#define rb_locale_str_new_cstr(str)
Identical to rb_external_str_new_cstr, except it generates a string of "locale" encoding instead of "...
VALUE rb_str_new_with_class(VALUE obj, const char *ptr, long len)
Identical to rb_str_new(), except it takes the class of the allocating object.
#define rb_str_dup_frozen
Just another name of rb_str_new_frozen.
VALUE rb_check_string_type(VALUE obj)
Try converting an object to its stringised representation using its to_str method,...
VALUE rb_str_substr(VALUE str, long beg, long len)
This is the implementation of two-argumented String#slice.
#define rb_str_cat_cstr(buf, str)
Identical to rb_str_cat(), except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_unlocktmp(VALUE str)
Releases a lock formerly obtained by rb_str_locktmp().
VALUE rb_utf8_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "UTF-8" encoding instead of "binary...
#define rb_utf8_str_new(str, len)
Identical to rb_str_new, except it generates a string of "UTF-8" encoding.
void rb_str_modify_expand(VALUE str, long capa)
Identical to rb_str_modify(), except it additionally expands the capacity of the receiver.
VALUE rb_str_dump(VALUE str)
"Inverse" of rb_eval_string().
VALUE rb_locale_str_new(const char *ptr, long len)
Identical to rb_str_new(), except it generates a string of "locale" encoding.
VALUE rb_str_buf_new(long capa)
Allocates a "string buffer".
VALUE rb_str_length(VALUE)
Identical to rb_str_strlen(), except it returns the value in rb_cInteger.
#define rb_str_new_cstr(str)
Identical to rb_str_new, except it assumes the passed pointer is a pointer to a C string.
VALUE rb_str_drop_bytes(VALUE str, long len)
Shrinks the given string for the given number of bytes.
VALUE rb_str_split(VALUE str, const char *delim)
Divides the given string based on the given delimiter.
VALUE rb_usascii_str_new_static(const char *ptr, long len)
Identical to rb_str_new_static(), except it generates a string of "US ASCII" encoding instead of "bin...
VALUE rb_str_intern(VALUE str)
Identical to rb_to_symbol(), except it assumes the receiver being an instance of RString.
VALUE rb_ivar_set(VALUE obj, ID name, VALUE val)
Identical to rb_iv_set(), except it accepts the name as an ID instead of a C string.
VALUE rb_ivar_defined(VALUE obj, ID name)
Queries if the instance variable is defined at the object.
int rb_respond_to(VALUE obj, ID mid)
Queries if the object responds to the method.
void rb_undef_alloc_func(VALUE klass)
Deletes the allocator function of a class.
void rb_define_alloc_func(VALUE klass, rb_alloc_func_t func)
Sets the allocator function of a class.
static ID rb_intern_const(const char *str)
This is a "tiny optimisation" over rb_intern().
VALUE rb_sym2str(VALUE symbol)
Obtain a frozen string representation of a symbol (not including the leading colon).
VALUE rb_to_symbol(VALUE name)
Identical to rb_intern_str(), except it generates a dynamic symbol if necessary.
int capa
Designed capacity of the buffer.
int off
Offset inside of ptr.
int len
Length of the buffer.
long rb_reg_search(VALUE re, VALUE str, long pos, int dir)
Runs the passed regular expression over the passed string.
VALUE rb_reg_regcomp(VALUE str)
Creates a new instance of rb_cRegexp.
VALUE rb_reg_regsub(VALUE repl, VALUE src, struct re_registers *regs, VALUE rexp)
Substitution.
VALUE rb_str_format(int argc, const VALUE *argv, VALUE fmt)
Formats a string.
VALUE rb_yield(VALUE val)
Yields the block.
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
#define ALLOCA_N(type, n)
#define MEMZERO(p, type, n)
Handy macro to erase a region of memory.
#define RB_GC_GUARD(v)
Prevents premature destruction of local objects.
void rb_define_hooked_variable(const char *q, VALUE *w, type *e, void_type *r)
Define a function-backended global variable.
VALUE type(ANYARGS)
ANYARGS-ed function type.
int st_foreach(st_table *q, int_type *w, st_data_t e)
Iteration over the given table.
VALUE rb_ensure(type *q, VALUE w, type *e, VALUE r)
An equivalent of ensure clause.
Defines RBIMPL_ATTR_NONSTRING.
static int RARRAY_LENINT(VALUE ary)
Identical to rb_array_len(), except it differs for the return type.
#define RARRAY_CONST_PTR
Just another name of rb_array_const_ptr.
static VALUE RBASIC_CLASS(VALUE obj)
Queries the class of an object.
#define RBASIC(obj)
Convenient casting macro.
#define DATA_PTR(obj)
Convenient getter macro.
static struct re_registers * RMATCH_REGS(VALUE match)
Queries the raw re_registers.
static VALUE RREGEXP_SRC(VALUE rexp)
Convenient getter function.
#define StringValue(v)
Ensures that the parameter object is a String.
VALUE rb_str_export_locale(VALUE obj)
Identical to rb_str_export(), except it converts into the locale encoding instead.
char * rb_string_value_cstr(volatile VALUE *ptr)
Identical to rb_string_value_ptr(), except it additionally checks for the contents for viability as a...
static int RSTRING_LENINT(VALUE str)
Identical to RSTRING_LEN(), except it differs for the return type.
static char * RSTRING_END(VALUE str)
Queries the end of the contents pointer of the string.
#define RSTRING_GETMEM(str, ptrvar, lenvar)
Convenient macro to obtain the contents and length at once.
VALUE rb_string_value(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it fills the passed pointer with the converted object.
static long RSTRING_LEN(VALUE str)
Queries the length of the string.
#define RSTRING(obj)
Convenient casting macro.
VALUE rb_str_export(VALUE obj)
Identical to rb_str_to_str(), except it additionally converts the string into default external encodi...
char * rb_string_value_ptr(volatile VALUE *ptr)
Identical to rb_str_to_str(), except it returns the converted string's backend memory region.
VALUE rb_str_to_str(VALUE obj)
Identical to rb_check_string_type(), except it raises exceptions in case of conversion failures.
static char * RSTRING_PTR(VALUE str)
Queries the contents pointer of the string.
#define StringValueCStr(v)
Identical to StringValuePtr, except it additionally checks for the contents for viability as a C stri...
#define TypedData_Wrap_Struct(klass, data_type, sval)
Converts sval, a pointer to your struct, into a Ruby object.
struct rb_data_type_struct rb_data_type_t
This is the struct that holds necessary info for a struct.
VALUE rb_require(const char *feature)
Identical to rb_require_string(), except it takes C's string instead of Ruby's.
#define errno
Ractor-aware version of errno.
#define RB_NUM2SSIZE
Converts an instance of rb_cInteger into C's ssize_t.
#define RTEST
This is an old name of RB_TEST.
#define _(args)
This was a transition path from K&R to ANSI.
VALUE flags
Per-object flags.
struct RBasic basic
Basic part, including flags and class.
long capa
Capacity of *ptr.
long len
Length of the string, not including terminating NUL character.
union RString::@157025041137035241047331270155043025061071337053::@157067065136062356112324002106172053054013023024::@365170260060164113275356137374160141226332013204 aux
Auxiliary info.
struct RString::@157025041137035241047331270155043025061071337053::@153056146250355212360325351117351053336274231135 embed
Embedded contents.
VALUE shared
Parent of the string.
char * ptr
Pointer to the contents of the string.
union RString::@157025041137035241047331270155043025061071337053 as
String's specific fields.
struct RString::@157025041137035241047331270155043025061071337053::@157067065136062356112324002106172053054013023024 heap
Strings that use separated memory region for contents use this pattern.
void rb_nativethread_lock_lock(rb_nativethread_lock_t *lock)
Blocks until the current thread obtains a lock.
uintptr_t ID
Type that represents a Ruby identifier such as a variable name.
uintptr_t VALUE
Type that represents a Ruby object.
static enum ruby_value_type rb_type(VALUE obj)
Identical to RB_BUILTIN_TYPE(), except it can also accept special constants.
static void Check_Type(VALUE v, enum ruby_value_type t)
Identical to RB_TYPE_P(), except it raises exceptions on predication failure.
static bool RB_TYPE_P(VALUE obj, enum ruby_value_type t)
Queries if the given object is of given type.
ruby_value_type
C-level type of an object.