artichoke_backend/extn/core/string/
mod.rs1use core::ops::Deref;
2use std::ffi::{c_char, c_void};
3use std::ptr::NonNull;
4
5use artichoke_core::value::Value as _;
6use spinoso_exception::TypeError;
7#[doc(inline)]
8pub use spinoso_string::{Encoding, RawParts, String};
9
10use crate::Artichoke;
11use crate::convert::{BoxUnboxVmValue, UnboxedValueGuard};
12use crate::error::Error;
13use crate::sys;
14use crate::types::Ruby;
15use crate::value::Value;
16
17mod ffi;
18pub(in crate::extn) mod mruby;
19pub(super) mod trampoline;
20
21const ENCODING_FLAG_BITPOS: usize = 5;
22
23impl BoxUnboxVmValue for String {
24 type Unboxed = Self;
25 type Guarded = String;
26
27 const RUBY_TYPE: &'static str = "String";
28
29 #[expect(
30 clippy::cast_possible_truncation,
31 clippy::cast_sign_loss,
32 reason = "mruby stores sizes as int64_t instead of size_t"
33 )]
34 unsafe fn unbox_from_value<'a>(
35 value: &'a mut Value,
36 interp: &mut Artichoke,
37 ) -> Result<UnboxedValueGuard<'a, Self::Guarded>, Error> {
38 let _ = interp;
39
40 if value.ruby_type() != Ruby::String {
43 let mut message = std::string::String::from("uninitialized ");
44 message.push_str(Self::RUBY_TYPE);
45 return Err(TypeError::from(message).into());
46 }
47
48 let value = value.inner();
49 let string = sys::mrb_sys_basic_ptr(value).cast::<sys::RString>();
52
53 let Some(ptr) = NonNull::<c_char>::new((*string).as_.heap.ptr) else {
54 return Ok(UnboxedValueGuard::new(String::new()));
56 };
57 let length = (*string).as_.heap.len as usize;
58 let capacity = (*string).as_.heap.aux.capa as usize;
59
60 let flags = string.as_ref().unwrap().flags();
62 let encoding_flag = flags & (0b1111 << ENCODING_FLAG_BITPOS);
63 let encoding = (encoding_flag >> ENCODING_FLAG_BITPOS) as u8;
64 let encoding = Encoding::try_from_flag(encoding).map_err(|_| TypeError::with_message("Unknown encoding"))?;
65
66 let s = String::from_raw_parts_with_encoding(
67 RawParts {
68 ptr: ptr.cast::<u8>().as_mut(),
69 length,
70 capacity,
71 },
72 encoding,
73 );
74 Ok(UnboxedValueGuard::new(s))
75 }
76
77 #[expect(
78 clippy::cast_possible_wrap,
79 reason = "mruby stores sizes as int64_t instead of size_t"
80 )]
81 fn alloc_value(value: Self::Unboxed, interp: &mut Artichoke) -> Result<Value, Error> {
82 let encoding = value.encoding();
83 let RawParts { ptr, length, capacity } = String::into_raw_parts(value);
84 let value = unsafe {
85 interp.with_ffi_boundary(|mrb| {
86 sys::mrb_sys_alloc_rstring(
87 mrb,
88 ptr.cast::<c_char>(),
89 length as sys::mrb_int,
90 capacity as sys::mrb_int,
91 )
92 })?
93 };
94 let string = unsafe { sys::mrb_sys_basic_ptr(value).cast::<sys::RString>() };
95 unsafe {
96 let flags = string.as_ref().unwrap().flags();
97 let encoding_bits = encoding.to_flag();
98 let flags_with_zeroed_encoding = flags & !(0b1111 << ENCODING_FLAG_BITPOS);
99 let flags_with_encoding = flags_with_zeroed_encoding | (u32::from(encoding_bits) << ENCODING_FLAG_BITPOS);
100 string.as_mut().unwrap().set_flags(flags_with_encoding);
101 }
102 Ok(interp.protect(value.into()))
103 }
104
105 #[expect(
106 clippy::cast_possible_wrap,
107 reason = "mruby stores sizes as int64_t instead of size_t"
108 )]
109 fn box_into_value(value: Self::Unboxed, into: Value, interp: &mut Artichoke) -> Result<Value, Error> {
110 assert_eq!(
116 into.ruby_type(),
117 Ruby::String,
118 "Tried to box String into {:?} value",
119 into.ruby_type()
120 );
121
122 let encoding = value.encoding();
123 let RawParts { ptr, length, capacity } = String::into_raw_parts(value);
124 let string = unsafe {
125 sys::mrb_sys_repack_into_rstring(
126 ptr.cast::<c_char>(),
127 length as sys::mrb_int,
128 capacity as sys::mrb_int,
129 into.inner(),
130 )
131 };
132 unsafe {
133 let flags = string.as_ref().unwrap().flags();
134 let encoding_bits = encoding.to_flag();
135 let flags_with_zeroed_encoding = flags & !(0b1111 << ENCODING_FLAG_BITPOS);
136 let flags_with_encoding = flags_with_zeroed_encoding | (u32::from(encoding_bits) << ENCODING_FLAG_BITPOS);
137 string.as_mut().unwrap().set_flags(flags_with_encoding);
138 }
139
140 Ok(interp.protect(into))
141 }
142
143 fn free(data: *mut c_void) {
144 let _ = data;
151 unreachable!("<String as BoxUnboxVmValue>::free is never called");
152 }
153}
154
155impl Deref for UnboxedValueGuard<'_, String> {
156 type Target = String;
157
158 fn deref(&self) -> &Self::Target {
159 self.as_inner_ref()
160 }
161}
162
163#[cfg(test)]
164mod tests {
165 use crate::test::prelude::*;
166
167 const SUBJECT: &str = "String";
168 #[cfg(feature = "core-regexp")]
169 const FUNCTIONAL_TEST: &[u8] = include_bytes!("string_functional_test.rb");
170
171 #[test]
172 #[cfg(feature = "core-regexp")]
173 fn functional() {
174 let mut interp = interpreter();
175 let result = interp.eval(FUNCTIONAL_TEST);
176 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
177 let result = interp.eval(b"spec");
178 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
179 }
180
181 #[test]
182 fn modifying_and_repacking_encoding_zeroes_old_encoding_flags() {
183 let mut interp = interpreter();
184 let test = "be = ''.b ; be << '😀' ; raise 'unexpected encoding' unless be.length == 1";
190 let result = interp.eval(test.as_bytes());
191 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
192 }
193
194 #[test]
195 #[cfg(feature = "core-regexp")]
196 fn start_with_regex() {
197 let mut interp = interpreter();
198 let test = r"
202 raise 'start_with? gives incorrect result' unless 'abcd test-123'.start_with?(/test-(\d+)/) == false;
203 raise 'start_with? should clear Regexp.last_match' unless Regexp.last_match == nil
204 raise 'start_with? should clear $1' unless $1 == nil
205 ";
206 let result = interp.eval(test.as_bytes());
207 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
208 }
209
210 #[test]
211 fn allocated_but_uninitialized_string_can_be_garbage_collected() {
212 let mut interp = interpreter();
213 let test = r"
214 1_000_000.times do
215 String.allocate
216 end
217 ";
218 let result = interp.eval(test.as_bytes());
219 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
220 interp.full_gc().unwrap();
221 }
222
223 #[test]
224 fn allocated_but_uninitialized_string_can_be_read() {
225 let mut interp = interpreter();
226 let test = r#"
242 s = String.allocate
243 raise 'String.allocate is not an instance of String' unless s.is_a?(String)
244 raise 'String.allocate.inspect is not a String' unless s.inspect.is_a?(String)
245 raise 'String.allocate is not empty' unless s.empty?
246 raise 'String.allocate.size is not 0' unless s.size == 0
247 raise 'String.allocate.inspect is not empty' unless s.inspect == '""'
248 "#;
249 let result = interp.eval(test.as_bytes());
250 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
251 }
252
253 #[test]
254 fn string_allocate_can_be_modified() {
255 let mut interp = interpreter();
256 let test = r"
269 s = String.allocate
270 s << 'hello'
271 s << 'world'
272 raise 'String.allocate was not grown to correct size' unless s.size == 10
273 raise 'String.allocate was not appendable' unless s == 'helloworld'
274 ";
275 let result = interp.eval(test.as_bytes());
276 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
277 }
278
279 #[test]
280 #[should_panic = "String.allocate.encoding is not binary"]
281 fn freshly_allocated_string_has_binary_encoding() {
282 let mut interp = interpreter();
283 let test = r#"
300 s = String.new
301 raise 'String.allocate.encoding is not binary' unless s.encoding == Encoding::BINARY
302 s << "abc"
303 raise 'String.allocate.encoding is not binary after appending ASCII' unless s.encoding == Encoding::BINARY
304 s << "❤️"
305 raise 'String.allocate.encoding is not UTF-8 after appending UTF-8' unless s.encoding == Encoding::UTF_8
306 "#;
307 let result = interp.eval(test.as_bytes());
308 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
309 }
310
311 #[test]
312 fn reinitializing_a_frozen_string_with_no_args_is_permitted() {
313 let mut interp = interpreter();
314 let test = r"
315 raise 'reinitializing empty frozen string failed' unless String.new.freeze.send(:initialize) == ''
316 raise 'reinitializing non-empty frozen string failed' unless String.new('hello').freeze.send(:initialize) == 'hello'
317 ";
318 let result = interp.eval(test.as_bytes());
319 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
320 }
321
322 #[test]
323 fn reinitializing_a_frozen_string_with_args_raises_frozen_error() {
324 let mut interp = interpreter();
325 let test = r"
326 begin
327 String.new.freeze.send(:initialize, 'world')
328 rescue FrozenError
329 # expected
330 else
331 raise 'reinitializing frozen empty string with args did not raise FrozenError'
332 end
333
334 begin
335 String.new('hello').freeze.send(:initialize, 'world')
336 rescue FrozenError
337 # expected
338 else
339 raise 'reinitializing frozen non-empty string with args did not raise FrozenError'
340 end
341 ";
342 let result = interp.eval(test.as_bytes());
343 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
344 }
345
346 #[test]
347 fn reinitializing_a_string_with_no_args_is_a_noop() {
348 let mut interp = interpreter();
349 let test = "
350 s = String.new
351 s.send(:initialize)
352 raise 'reinitializing empty string failed' unless s == ''
353
354 s = String.new('hello')
355 s.send(:initialize)
356 raise 'reinitializing non-empty string failed' unless s == 'hello'
357 ";
358 let result = interp.eval(test.as_bytes());
359 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
360 }
361
362 #[test]
363 fn reinitializing_a_string_with_args_replaces_the_string_contents() {
364 let mut interp = interpreter();
365 let test = "
366 s = String.new
367 s.send(:initialize, 'world')
368 raise 'reinitializing empty string with args failed' unless s == 'world'
369
370 s = String.new('hello')
371 s.send(:initialize, 'world')
372 raise 'reinitializing non-empty string with args failed' unless s == 'world'
373 ";
374 let result = interp.eval(test.as_bytes());
375 unwrap_or_panic_with_backtrace(&mut interp, SUBJECT, result);
376 }
377}