flyweights/
lib.rs

1// Copyright 2022 The Fuchsia Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5//! Types implementing the [flyweight pattern](https://en.wikipedia.org/wiki/Flyweight_pattern)
6//! for reusing object allocations.
7
8#![warn(missing_docs)]
9
10mod raw;
11
12use ahash::AHashSet;
13use bstr::{BStr, BString};
14use serde::de::{Deserializer, Visitor};
15use serde::ser::Serializer;
16use serde::{Deserialize, Serialize};
17use std::borrow::Borrow;
18use std::fmt::{Debug, Display, Formatter, Result as FmtResult};
19use std::hash::{Hash, Hasher};
20use std::ops::Deref;
21use std::ptr::NonNull;
22use std::sync::Mutex;
23
24/// The global string cache for `FlyStr`.
25///
26/// If a live `FlyStr` contains an `Storage`, the `Storage` must also be in this cache and it must
27/// have a refcount of >= 2.
28static CACHE: std::sync::LazyLock<Mutex<AHashSet<Storage>>> =
29    std::sync::LazyLock::new(|| Mutex::new(AHashSet::new()));
30
31/// Wrapper type for stored strings that lets us query the cache without an owned value.
32#[repr(transparent)]
33struct Storage(NonNull<raw::Payload>);
34
35// SAFETY: FlyStr storage is always safe to send across threads.
36unsafe impl Send for Storage {}
37// SAFETY: FlyStr storage is always safe to share across threads.
38unsafe impl Sync for Storage {}
39
40impl PartialEq for Storage {
41    #[inline]
42    fn eq(&self, other: &Self) -> bool {
43        self.as_bytes() == other.as_bytes()
44    }
45}
46
47impl Eq for Storage {}
48
49impl Hash for Storage {
50    #[inline]
51    fn hash<H: Hasher>(&self, state: &mut H) {
52        self.as_bytes().hash(state)
53    }
54}
55
56impl Storage {
57    #[inline]
58    fn inc_ref(&self) -> usize {
59        // SAFETY: `Storage` always points to a valid `Payload`.
60        unsafe { raw::Payload::inc_ref(self.0.as_ptr()) }
61    }
62
63    #[inline]
64    fn as_bytes(&self) -> &[u8] {
65        // SAFETY: `Storage` always points to a valid `Payload`.
66        unsafe { &*raw::Payload::bytes(self.0.as_ptr()) }
67    }
68}
69
70impl Borrow<[u8]> for Storage {
71    #[inline]
72    fn borrow(&self) -> &[u8] {
73        self.as_bytes()
74    }
75}
76
77/// An immutable string type which only stores a single copy of each string allocated. Internally
78/// represented as a shared pointer to the backing allocation. Occupies a single pointer width.
79///
80/// # Small strings
81///
82/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
83/// performed.
84///
85/// # Performance
86///
87/// It's slower to construct than a regular `String` but trades that for reduced standing memory
88/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
89/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
90/// sound by virtue of the type guaranteeing that only one `FlyStr` pointer value will exist at
91/// any time for a given string's contents.
92///
93/// As with any performance optimization, you should only use this type if you can measure the
94/// benefit it provides to your program. Pay careful attention to creating `FlyStr`s in hot paths
95/// as it may regress runtime performance.
96///
97/// # Allocation lifecycle
98///
99/// Intended for long-running system services with user-provided values, `FlyStr`s are removed from
100/// the global cache when the last reference to them is dropped. While this incurs some overhead
101/// it is important to prevent the value cache from becoming a denial-of-service vector.
102#[derive(Clone, Eq, Hash, PartialEq)]
103pub struct FlyStr(RawRepr);
104
105#[cfg(feature = "json_schema")]
106impl schemars::JsonSchema for FlyStr {
107    fn schema_name() -> String {
108        str::schema_name()
109    }
110
111    fn json_schema(generator: &mut schemars::gen::SchemaGenerator) -> schemars::schema::Schema {
112        str::json_schema(generator)
113    }
114
115    fn is_referenceable() -> bool {
116        false
117    }
118}
119
120static_assertions::assert_eq_size!(FlyStr, usize);
121
122impl FlyStr {
123    /// Create a `FlyStr`, allocating it in the cache if the value is not already cached.
124    ///
125    /// # Performance
126    ///
127    /// Creating an instance of this type requires accessing the global cache of strings, which
128    /// involves taking a lock. When multiple threads are allocating lots of strings there may be
129    /// contention.
130    ///
131    /// Each string allocated is hashed for lookup in the cache.
132    #[inline]
133    pub fn new(s: impl AsRef<str> + Into<String>) -> Self {
134        Self(RawRepr::new_str(s))
135    }
136
137    /// Returns the underlying string slice.
138    #[inline]
139    pub fn as_str(&self) -> &str {
140        // SAFETY: Every FlyStr is constructed from valid UTF-8 bytes.
141        unsafe { std::str::from_utf8_unchecked(self.0.as_bytes()) }
142    }
143}
144
145impl Default for FlyStr {
146    #[inline]
147    fn default() -> Self {
148        Self::new("")
149    }
150}
151
152impl From<&'_ str> for FlyStr {
153    #[inline]
154    fn from(s: &str) -> Self {
155        Self::new(s)
156    }
157}
158
159impl From<&'_ String> for FlyStr {
160    #[inline]
161    fn from(s: &String) -> Self {
162        Self::new(&**s)
163    }
164}
165
166impl From<String> for FlyStr {
167    #[inline]
168    fn from(s: String) -> Self {
169        Self::new(s)
170    }
171}
172
173impl From<Box<str>> for FlyStr {
174    #[inline]
175    fn from(s: Box<str>) -> Self {
176        Self::new(s)
177    }
178}
179
180impl From<&Box<str>> for FlyStr {
181    #[inline]
182    fn from(s: &Box<str>) -> Self {
183        Self::new(&**s)
184    }
185}
186
187impl TryFrom<FlyByteStr> for FlyStr {
188    type Error = std::str::Utf8Error;
189
190    #[inline]
191    fn try_from(b: FlyByteStr) -> Result<FlyStr, Self::Error> {
192        // The internals of both FlyStr and FlyByteStr are the same, but it's only sound to return
193        // a FlyStr if the RawRepr contains/points to valid UTF-8.
194        std::str::from_utf8(b.as_bytes())?;
195        Ok(FlyStr(b.0))
196    }
197}
198
199impl Into<String> for FlyStr {
200    #[inline]
201    fn into(self) -> String {
202        self.as_str().to_owned()
203    }
204}
205
206impl Into<String> for &'_ FlyStr {
207    #[inline]
208    fn into(self) -> String {
209        self.as_str().to_owned()
210    }
211}
212
213impl Deref for FlyStr {
214    type Target = str;
215
216    #[inline]
217    fn deref(&self) -> &Self::Target {
218        self.as_str()
219    }
220}
221
222impl AsRef<str> for FlyStr {
223    #[inline]
224    fn as_ref(&self) -> &str {
225        self.as_str()
226    }
227}
228
229impl PartialOrd for FlyStr {
230    #[inline]
231    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
232        Some(self.cmp(other))
233    }
234}
235impl Ord for FlyStr {
236    #[inline]
237    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
238        self.as_str().cmp(other.as_str())
239    }
240}
241
242impl PartialEq<str> for FlyStr {
243    #[inline]
244    fn eq(&self, other: &str) -> bool {
245        self.as_str() == other
246    }
247}
248
249impl PartialEq<&'_ str> for FlyStr {
250    #[inline]
251    fn eq(&self, other: &&str) -> bool {
252        self.as_str() == *other
253    }
254}
255
256impl PartialEq<String> for FlyStr {
257    #[inline]
258    fn eq(&self, other: &String) -> bool {
259        self.as_str() == &**other
260    }
261}
262
263impl PartialEq<FlyByteStr> for FlyStr {
264    #[inline]
265    fn eq(&self, other: &FlyByteStr) -> bool {
266        self.0 == other.0
267    }
268}
269
270impl PartialEq<&'_ FlyByteStr> for FlyStr {
271    #[inline]
272    fn eq(&self, other: &&FlyByteStr) -> bool {
273        self.0 == other.0
274    }
275}
276
277impl PartialOrd<str> for FlyStr {
278    #[inline]
279    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
280        self.as_str().partial_cmp(other)
281    }
282}
283
284impl PartialOrd<&str> for FlyStr {
285    #[inline]
286    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
287        self.as_str().partial_cmp(*other)
288    }
289}
290
291impl PartialOrd<FlyByteStr> for FlyStr {
292    #[inline]
293    fn partial_cmp(&self, other: &FlyByteStr) -> Option<std::cmp::Ordering> {
294        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
295    }
296}
297
298impl PartialOrd<&'_ FlyByteStr> for FlyStr {
299    #[inline]
300    fn partial_cmp(&self, other: &&FlyByteStr) -> Option<std::cmp::Ordering> {
301        BStr::new(self.as_str()).partial_cmp(other.as_bstr())
302    }
303}
304
305impl Debug for FlyStr {
306    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
307        Debug::fmt(self.as_str(), f)
308    }
309}
310
311impl Display for FlyStr {
312    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
313        Display::fmt(self.as_str(), f)
314    }
315}
316
317impl Serialize for FlyStr {
318    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
319        serializer.serialize_str(self.as_str())
320    }
321}
322
323impl<'d> Deserialize<'d> for FlyStr {
324    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
325        deserializer.deserialize_str(FlyStrVisitor)
326    }
327}
328
329struct FlyStrVisitor;
330
331impl Visitor<'_> for FlyStrVisitor {
332    type Value = FlyStr;
333    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
334        formatter.write_str("a string")
335    }
336
337    fn visit_borrowed_str<'de, E>(self, v: &'de str) -> Result<Self::Value, E>
338    where
339        E: serde::de::Error,
340    {
341        Ok(v.into())
342    }
343
344    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
345    where
346        E: serde::de::Error,
347    {
348        Ok(v.into())
349    }
350
351    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
352    where
353        E: serde::de::Error,
354    {
355        Ok(v.into())
356    }
357}
358
359macro_rules! new_raw_repr {
360    ($borrowed_bytes:expr) => {
361        if $borrowed_bytes.len() <= MAX_INLINE_SIZE {
362            RawRepr::new_inline($borrowed_bytes)
363        } else {
364            let mut cache = CACHE.lock().unwrap();
365            if let Some(existing) = cache.get($borrowed_bytes) {
366                RawRepr::from_storage(existing)
367            } else {
368                RawRepr::new_for_storage(&mut cache, $borrowed_bytes)
369            }
370        }
371    };
372}
373
374/// An immutable bytestring type which only stores a single copy of each string allocated.
375/// Internally represented as a shared pointer to the backing allocation. Occupies a single pointer width.
376///
377/// # Small strings
378///
379/// Very short strings are stored inline in the pointer with bit-tagging, so no allocations are
380/// performed.
381///
382/// # Performance
383///
384/// It's slower to construct than a regular `BString` but trades that for reduced standing memory
385/// usage by deduplicating strings. `PartialEq` and `Hash` are implemented on the underlying pointer
386/// value rather than the pointed-to data for faster equality comparisons and indexing, which is
387/// sound by virtue of the type guaranteeing that only one `FlyByteStr` pointer value will exist at
388/// any time for a given string's contents.
389///
390/// As with any performance optimization, you should only use this type if you can measure the
391/// benefit it provides to your program. Pay careful attention to creating `FlyByteStr`s in hot
392/// paths as it may regress runtime performance.
393///
394/// # Allocation lifecycle
395///
396/// Intended for long-running system services with user-provided values, `FlyByteStr`s are removed
397/// from the global cache when the last reference to them is dropped. While this incurs some
398/// overhead it is important to prevent the value cache from becoming a denial-of-service vector.
399#[derive(Clone, Eq, Hash, PartialEq)]
400pub struct FlyByteStr(RawRepr);
401
402static_assertions::assert_eq_size!(FlyByteStr, usize);
403
404impl FlyByteStr {
405    /// Create a `FlyByteStr`, allocating it in the cache if the value is not already cached.
406    ///
407    /// # Performance
408    ///
409    /// Creating an instance of this type requires accessing the global cache of strings, which
410    /// involves taking a lock. When multiple threads are allocating lots of strings there may be
411    /// contention.
412    ///
413    /// Each string allocated is hashed for lookup in the cache.
414    pub fn new(s: impl AsRef<[u8]> + Into<Vec<u8>>) -> Self {
415        Self(RawRepr::new(s))
416    }
417
418    /// Returns the underlying bytestring slice.
419    #[inline]
420    pub fn as_bstr(&self) -> &BStr {
421        BStr::new(self.0.as_bytes())
422    }
423
424    /// Returns the underlying byte slice.
425    #[inline]
426    pub fn as_bytes(&self) -> &[u8] {
427        self.0.as_bytes()
428    }
429}
430
431impl Default for FlyByteStr {
432    #[inline]
433    fn default() -> Self {
434        Self::new(b"")
435    }
436}
437
438impl From<&'_ [u8]> for FlyByteStr {
439    #[inline]
440    fn from(s: &[u8]) -> Self {
441        Self::new(s)
442    }
443}
444
445impl From<&'_ BStr> for FlyByteStr {
446    #[inline]
447    fn from(s: &BStr) -> Self {
448        let bytes: &[u8] = s.as_ref();
449        Self(new_raw_repr!(bytes))
450    }
451}
452
453impl From<&'_ str> for FlyByteStr {
454    #[inline]
455    fn from(s: &str) -> Self {
456        Self::new(s)
457    }
458}
459
460impl From<&'_ Vec<u8>> for FlyByteStr {
461    #[inline]
462    fn from(s: &Vec<u8>) -> Self {
463        Self(new_raw_repr!(&s[..]))
464    }
465}
466
467impl From<&'_ String> for FlyByteStr {
468    #[inline]
469    fn from(s: &String) -> Self {
470        Self::new(&**s)
471    }
472}
473
474impl From<Vec<u8>> for FlyByteStr {
475    #[inline]
476    fn from(s: Vec<u8>) -> Self {
477        Self::new(s)
478    }
479}
480
481impl From<String> for FlyByteStr {
482    #[inline]
483    fn from(s: String) -> Self {
484        Self::new(s)
485    }
486}
487
488impl From<Box<[u8]>> for FlyByteStr {
489    #[inline]
490    fn from(s: Box<[u8]>) -> Self {
491        Self::new(s)
492    }
493}
494
495impl From<Box<str>> for FlyByteStr {
496    #[inline]
497    fn from(s: Box<str>) -> Self {
498        Self(new_raw_repr!(s.as_bytes()))
499    }
500}
501
502impl From<&'_ Box<[u8]>> for FlyByteStr {
503    #[inline]
504    fn from(s: &'_ Box<[u8]>) -> Self {
505        Self(new_raw_repr!(&**s))
506    }
507}
508
509impl From<&Box<str>> for FlyByteStr {
510    #[inline]
511    fn from(s: &Box<str>) -> Self {
512        Self::new(&**s)
513    }
514}
515
516impl Into<BString> for FlyByteStr {
517    #[inline]
518    fn into(self) -> BString {
519        self.as_bstr().to_owned()
520    }
521}
522
523impl Into<Vec<u8>> for FlyByteStr {
524    #[inline]
525    fn into(self) -> Vec<u8> {
526        self.as_bytes().to_owned()
527    }
528}
529
530impl From<FlyStr> for FlyByteStr {
531    #[inline]
532    fn from(s: FlyStr) -> FlyByteStr {
533        Self(s.0)
534    }
535}
536
537impl TryInto<String> for FlyByteStr {
538    type Error = std::string::FromUtf8Error;
539
540    #[inline]
541    fn try_into(self) -> Result<String, Self::Error> {
542        String::from_utf8(self.into())
543    }
544}
545
546impl Deref for FlyByteStr {
547    type Target = BStr;
548
549    #[inline]
550    fn deref(&self) -> &Self::Target {
551        self.as_bstr()
552    }
553}
554
555impl AsRef<BStr> for FlyByteStr {
556    #[inline]
557    fn as_ref(&self) -> &BStr {
558        self.as_bstr()
559    }
560}
561
562impl PartialOrd for FlyByteStr {
563    #[inline]
564    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
565        Some(self.cmp(other))
566    }
567}
568impl Ord for FlyByteStr {
569    #[inline]
570    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
571        self.as_bstr().cmp(other.as_bstr())
572    }
573}
574
575impl PartialEq<[u8]> for FlyByteStr {
576    #[inline]
577    fn eq(&self, other: &[u8]) -> bool {
578        self.as_bytes() == other
579    }
580}
581
582impl PartialEq<BStr> for FlyByteStr {
583    #[inline]
584    fn eq(&self, other: &BStr) -> bool {
585        self.as_bytes() == other
586    }
587}
588
589impl PartialEq<str> for FlyByteStr {
590    #[inline]
591    fn eq(&self, other: &str) -> bool {
592        self.as_bytes() == other.as_bytes()
593    }
594}
595
596impl PartialEq<&'_ [u8]> for FlyByteStr {
597    #[inline]
598    fn eq(&self, other: &&[u8]) -> bool {
599        self.as_bytes() == *other
600    }
601}
602
603impl PartialEq<&'_ BStr> for FlyByteStr {
604    #[inline]
605    fn eq(&self, other: &&BStr) -> bool {
606        self.as_bstr() == *other
607    }
608}
609
610impl PartialEq<&'_ str> for FlyByteStr {
611    #[inline]
612    fn eq(&self, other: &&str) -> bool {
613        self.as_bytes() == other.as_bytes()
614    }
615}
616
617impl PartialEq<String> for FlyByteStr {
618    #[inline]
619    fn eq(&self, other: &String) -> bool {
620        self.as_bytes() == other.as_bytes()
621    }
622}
623
624impl PartialEq<FlyStr> for FlyByteStr {
625    #[inline]
626    fn eq(&self, other: &FlyStr) -> bool {
627        self.0 == other.0
628    }
629}
630
631impl PartialEq<&'_ FlyStr> for FlyByteStr {
632    #[inline]
633    fn eq(&self, other: &&FlyStr) -> bool {
634        self.0 == other.0
635    }
636}
637
638impl PartialOrd<str> for FlyByteStr {
639    #[inline]
640    fn partial_cmp(&self, other: &str) -> Option<std::cmp::Ordering> {
641        self.as_bstr().partial_cmp(other)
642    }
643}
644
645impl PartialOrd<&str> for FlyByteStr {
646    #[inline]
647    fn partial_cmp(&self, other: &&str) -> Option<std::cmp::Ordering> {
648        self.as_bstr().partial_cmp(other)
649    }
650}
651
652impl PartialOrd<FlyStr> for FlyByteStr {
653    #[inline]
654    fn partial_cmp(&self, other: &FlyStr) -> Option<std::cmp::Ordering> {
655        self.as_bstr().partial_cmp(other.as_str())
656    }
657}
658
659impl Debug for FlyByteStr {
660    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
661        Debug::fmt(self.as_bstr(), f)
662    }
663}
664
665impl Display for FlyByteStr {
666    fn fmt(&self, f: &mut Formatter<'_>) -> FmtResult {
667        Display::fmt(self.as_bstr(), f)
668    }
669}
670
671impl Serialize for FlyByteStr {
672    fn serialize<S: Serializer>(&self, serializer: S) -> Result<S::Ok, S::Error> {
673        serializer.serialize_bytes(self.as_bytes())
674    }
675}
676
677impl<'d> Deserialize<'d> for FlyByteStr {
678    fn deserialize<D: Deserializer<'d>>(deserializer: D) -> Result<Self, D::Error> {
679        deserializer.deserialize_bytes(FlyByteStrVisitor)
680    }
681}
682
683struct FlyByteStrVisitor;
684
685impl<'de> Visitor<'de> for FlyByteStrVisitor {
686    type Value = FlyByteStr;
687    fn expecting(&self, formatter: &mut Formatter<'_>) -> FmtResult {
688        formatter.write_str("a string, a bytestring, or a sequence of bytes")
689    }
690
691    fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
692    where
693        E: serde::de::Error,
694    {
695        Ok(FlyByteStr::from(v))
696    }
697
698    fn visit_string<E>(self, v: String) -> Result<Self::Value, E>
699    where
700        E: serde::de::Error,
701    {
702        Ok(FlyByteStr::from(v))
703    }
704
705    fn visit_bytes<E>(self, v: &[u8]) -> Result<Self::Value, E>
706    where
707        E: serde::de::Error,
708    {
709        Ok(FlyByteStr::from(v))
710    }
711
712    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
713    where
714        A: serde::de::SeqAccess<'de>,
715    {
716        let mut bytes = vec![];
717        while let Some(b) = seq.next_element::<u8>()? {
718            bytes.push(b);
719        }
720        Ok(FlyByteStr::from(bytes))
721    }
722}
723
724#[repr(C)] // Guarantee predictable field ordering.
725union RawRepr {
726    /// Strings longer than MAX_INLINE_SIZE are allocated using `raw::Payload`, which have a thin
727    /// pointer representation. This means that `heap` variants of `Storage` will always have the
728    /// pointer contents aligned and this variant will never have its least significant bit set.
729    ///
730    /// We store a `NonNull` so we can have guaranteed pointer layout.
731    heap: NonNull<raw::Payload>,
732
733    /// Strings shorter than or equal in length to MAX_INLINE_SIZE are stored in this union variant.
734    /// The first byte is reserved for the size of the inline string, and the remaining bytes are
735    /// used for the string itself. The first byte has its least significant bit set to 1 to
736    /// distinguish inline strings from heap-allocated ones, and the size is stored in the remaining
737    /// 7 bits.
738    inline: InlineRepr,
739}
740
741// The inline variant should not cause us to occupy more space than the heap variant alone.
742static_assertions::assert_eq_size!(NonNull<raw::Payload>, RawRepr);
743
744// Alignment of the payload pointers must be >1 in order to have space for the mask bit at the
745// bottom.
746static_assertions::const_assert!(std::mem::align_of::<raw::Payload>() > 1);
747
748// The short string optimization makes little-endian layout assumptions with the first byte being
749// the least significant.
750static_assertions::assert_type_eq_all!(byteorder::NativeEndian, byteorder::LittleEndian);
751
752/// An enum with an actual discriminant that allows us to limit the reach of unsafe code in the
753/// implementation without affecting the stored size of `RawRepr`.
754enum SafeRepr<'a> {
755    Heap(NonNull<raw::Payload>),
756    Inline(&'a InlineRepr),
757}
758
759// SAFETY: FlyStr can be dropped from any thread.
760unsafe impl Send for RawRepr {}
761// SAFETY: FlyStr has an immutable public API.
762unsafe impl Sync for RawRepr {}
763
764impl RawRepr {
765    fn new_str(s: impl AsRef<str>) -> Self {
766        let borrowed = s.as_ref();
767        new_raw_repr!(borrowed.as_bytes())
768    }
769
770    fn new(s: impl AsRef<[u8]>) -> Self {
771        let borrowed = s.as_ref();
772        new_raw_repr!(borrowed)
773    }
774
775    #[inline]
776    fn new_inline(s: &[u8]) -> Self {
777        assert!(s.len() <= MAX_INLINE_SIZE);
778        let new = Self { inline: InlineRepr::new(s) };
779        assert!(new.is_inline(), "least significant bit must be 1 for inline strings");
780        new
781    }
782
783    #[inline]
784    fn from_storage(storage: &Storage) -> Self {
785        if storage.inc_ref() == 0 {
786            // Another thread is trying to lock the cache and free this string. They already
787            // released their refcount, so give it back to them. This will prevent this thread
788            // and other threads from attempting to free the string if they drop the refcount back
789            // down.
790            storage.inc_ref();
791        }
792        Self { heap: storage.0 }
793    }
794
795    #[inline]
796    fn new_for_storage(cache: &mut AHashSet<Storage>, bytes: &[u8]) -> Self {
797        assert!(bytes.len() > MAX_INLINE_SIZE);
798        // `Payload::alloc` starts the refcount at 1.
799        let new_storage = raw::Payload::alloc(bytes);
800
801        let for_cache = Storage(new_storage);
802        let new = Self { heap: new_storage };
803        assert!(!new.is_inline(), "least significant bit must be 0 for heap strings");
804        cache.insert(for_cache);
805        new
806    }
807
808    #[inline]
809    fn is_inline(&self) -> bool {
810        // SAFETY: it is always OK to interpret a pointer as byte array as long as we don't expect
811        // to retain provenance.
812        (unsafe { self.inline.masked_len } & 1) == 1
813    }
814
815    #[inline]
816    fn project(&self) -> SafeRepr<'_> {
817        if self.is_inline() {
818            // SAFETY: Just checked that this is the inline variant.
819            SafeRepr::Inline(unsafe { &self.inline })
820        } else {
821            // SAFETY: Just checked that this is the heap variant.
822            SafeRepr::Heap(unsafe { self.heap })
823        }
824    }
825
826    #[inline]
827    fn as_bytes(&self) -> &[u8] {
828        match self.project() {
829            // SAFETY: FlyStr owns the payload stored as a NonNull, it is live as long as `FlyStr`.
830            SafeRepr::Heap(ptr) => unsafe { &*raw::Payload::bytes(ptr.as_ptr()) },
831            SafeRepr::Inline(i) => i.as_bytes(),
832        }
833    }
834}
835
836impl PartialEq for RawRepr {
837    #[inline]
838    fn eq(&self, other: &Self) -> bool {
839        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
840        // to retain provenance.
841        let lhs = unsafe { &self.inline };
842        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
843        // to retain provenance.
844        let rhs = unsafe { &other.inline };
845        lhs.eq(rhs)
846    }
847}
848impl Eq for RawRepr {}
849
850impl Hash for RawRepr {
851    fn hash<H: Hasher>(&self, h: &mut H) {
852        // SAFETY: it is always OK to interpret a pointer as a byte array as long as we don't expect
853        // to retain provenance.
854        let this = unsafe { &self.inline };
855        this.hash(h);
856    }
857}
858
859impl Clone for RawRepr {
860    fn clone(&self) -> Self {
861        match self.project() {
862            SafeRepr::Heap(ptr) => {
863                // SAFETY: We own this payload, we know it's live because we are.
864                unsafe {
865                    raw::Payload::inc_ref(ptr.as_ptr());
866                }
867                Self { heap: ptr }
868            }
869            SafeRepr::Inline(&inline) => Self { inline },
870        }
871    }
872}
873
874impl Drop for RawRepr {
875    fn drop(&mut self) {
876        if !self.is_inline() {
877            // SAFETY: We checked above that this is the heap repr.
878            let heap = unsafe { self.heap };
879
880            // Decrementing the refcount before locking the cache causes the following failure mode:
881            //
882            // 1. We drop the refcount to 0.
883            // 2. Another thread finds the string we're about to drop and increments the refcount
884            //    back up to 1.
885            // 3. That thread drops its refcount, and also sees the refcount drop to 0.
886            // 4. That thread locks the cache, removes the value, and drops the payload. This leaves
887            //    us with a dangling pointer to the dropped payload.
888            // 5. We lock the cache and go to look up our value in the cache. Our payload pointer is
889            //    dangling now, but we don't know that. If we try to read through our dangling
890            //    pointer, we cause UB.
891            //
892            // To account for this failure mode and still optimistically drop our refcount, we
893            // modify the procedure slightly:
894            //
895            // 1. We drop the refcount to 0.
896            // 2. Another thread finds the string we're about to drop and increments the refcount
897            //    back up to 1. It notices that the refcount incremented from 0 to 1, and so knows
898            //    that our thread will try to drop it. While still holding the cache lock, that
899            //    thread increments the refcount again from 1 to 2. This "gives back" the refcount
900            //    to our thread.
901            // 3. That thread drops its refcount, and sees the refcount drop to 1. It won't try to
902            //    drop the payload this time.
903            // 4. We lock the cache, and decrement the refcount a second time. If it decremented
904            //    from 0 or 1, then we know that no other threads are currently holding references
905            //    to it and we can safely drop it ourselves.
906
907            // SAFETY: The payload is live.
908            let prev_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
909
910            // If we held the final refcount outside of the cache, try to remove the string.
911            if prev_refcount == 1 {
912                let mut cache = CACHE.lock().unwrap();
913
914                let current_refcount = unsafe { raw::Payload::dec_ref(heap.as_ptr()) };
915                if current_refcount <= 1 {
916                    // If the refcount was still 0 after acquiring the cache lock, no other thread
917                    // looked up this payload between optimistically decrementing the refcount and
918                    // now. If the refcount was 1, then another thread did, but dropped its refcount
919                    // before we got the cache lock. Either way, we can safely remove the string
920                    // from the cache and free it.
921
922                    let bytes = unsafe { &*raw::Payload::bytes(heap.as_ptr()) };
923                    assert!(
924                        cache.remove(bytes),
925                        "cache did not contain bytes, but this thread didn't remove them yet",
926                    );
927
928                    // Get out of the critical section as soon as possible
929                    drop(cache);
930
931                    // SAFETY: The payload is live.
932                    unsafe { raw::Payload::dealloc(heap.as_ptr()) };
933                } else {
934                    // Another thread looked up this payload, made a reference to it, and gave our
935                    // refcount back to us for a minmium refcount of 2. We re-removed our refcount,
936                    // giving a minimum of one. This means it's no longer our responsibility to
937                    // deallocate the string, so we ended up needlessly locking the cache.
938                }
939            }
940        }
941    }
942}
943
944#[derive(Clone, Copy, Hash, PartialEq)]
945#[repr(C)] // Preserve field ordering.
946struct InlineRepr {
947    /// The first byte, which corresponds to the LSB of a pointer in the other variant.
948    ///
949    /// When the first bit is `1` the rest of this byte stores the length of the inline string.
950    masked_len: u8,
951    /// Inline string contents.
952    contents: [u8; MAX_INLINE_SIZE],
953}
954
955/// We can store small strings up to 1 byte less than the size of the pointer to the heap-allocated
956/// string.
957const MAX_INLINE_SIZE: usize = std::mem::size_of::<NonNull<raw::Payload>>() - 1;
958
959// Guard rail to make sure we never end up with an incorrect inline size encoding. Ensure that
960// MAX_INLINE_SIZE is always smaller than the maximum size we can represent in a byte with the LSB
961// reserved.
962static_assertions::const_assert!((std::u8::MAX >> 1) as usize >= MAX_INLINE_SIZE);
963
964impl InlineRepr {
965    #[inline]
966    fn new(s: &[u8]) -> Self {
967        assert!(s.len() <= MAX_INLINE_SIZE);
968
969        // Set the first byte to the length of the inline string with LSB masked to 1.
970        let masked_len = ((s.len() as u8) << 1) | 1;
971
972        let mut contents = [0u8; MAX_INLINE_SIZE];
973        contents[..s.len()].copy_from_slice(s);
974
975        Self { masked_len, contents }
976    }
977
978    #[inline]
979    fn as_bytes(&self) -> &[u8] {
980        let len = self.masked_len >> 1;
981        &self.contents[..len as usize]
982    }
983}
984
985#[cfg(test)]
986mod tests {
987    use super::*;
988    use static_assertions::{const_assert, const_assert_eq};
989    use std::collections::BTreeSet;
990    use test_case::test_case;
991
992    // These tests all manipulate the process-global cache in the parent module. On target devices
993    // we run each test case in its own process, so the test cases can't pollute each other. On
994    // host, we run tests with a process for each suite (which is the Rust upstream default), and
995    // we need to manually isolate the tests from each other.
996    #[cfg(not(target_os = "fuchsia"))]
997    use serial_test::serial;
998
999    fn reset_global_cache() {
1000        // We still want subsequent tests to be able to run if one in the same process panics.
1001        match CACHE.lock() {
1002            Ok(mut c) => *c = AHashSet::new(),
1003            Err(e) => *e.into_inner() = AHashSet::new(),
1004        }
1005    }
1006    fn num_strings_in_global_cache() -> usize {
1007        CACHE.lock().unwrap().len()
1008    }
1009
1010    impl RawRepr {
1011        fn refcount(&self) -> Option<usize> {
1012            match self.project() {
1013                SafeRepr::Heap(ptr) => {
1014                    // SAFETY: The payload is live as long as the repr is live.
1015                    let count = unsafe { raw::Payload::refcount(ptr.as_ptr()) };
1016                    Some(count)
1017                }
1018                SafeRepr::Inline(_) => None,
1019            }
1020        }
1021    }
1022
1023    const SHORT_STRING: &str = "hello";
1024    const_assert!(SHORT_STRING.len() < MAX_INLINE_SIZE);
1025
1026    const MAX_LEN_SHORT_STRING: &str = "hello!!";
1027    const_assert_eq!(MAX_LEN_SHORT_STRING.len(), MAX_INLINE_SIZE);
1028
1029    const MIN_LEN_LONG_STRING: &str = "hello!!!";
1030    const_assert_eq!(MIN_LEN_LONG_STRING.len(), MAX_INLINE_SIZE + 1);
1031
1032    const LONG_STRING: &str = "hello, world!!!!!!!!!!!!!!!!!!!!";
1033    const_assert!(LONG_STRING.len() > MAX_INLINE_SIZE);
1034
1035    const SHORT_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28";
1036    const_assert!(SHORT_NON_UTF8.len() < MAX_INLINE_SIZE);
1037
1038    const LONG_NON_UTF8: &[u8] = b"\xF0\x28\x8C\x28\xF0\x28\x8C\x28";
1039    const_assert!(LONG_NON_UTF8.len() > MAX_INLINE_SIZE);
1040
1041    #[test_case("" ; "empty string")]
1042    #[test_case(SHORT_STRING ; "short strings")]
1043    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1044    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1045    #[test_case(LONG_STRING ; "long strings")]
1046    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1047    fn string_formatting_is_equivalent_to_str(original: &str) {
1048        reset_global_cache();
1049
1050        let cached = FlyStr::new(original);
1051        assert_eq!(format!("{original}"), format!("{cached}"));
1052        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1053
1054        let cached = FlyByteStr::new(original);
1055        assert_eq!(format!("{original}"), format!("{cached}"));
1056        assert_eq!(format!("{original:?}"), format!("{cached:?}"));
1057    }
1058
1059    #[test_case("" ; "empty string")]
1060    #[test_case(SHORT_STRING ; "short strings")]
1061    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1062    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1063    #[test_case(LONG_STRING ; "long strings")]
1064    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1065    fn string_equality_works(contents: &str) {
1066        reset_global_cache();
1067
1068        let cached = FlyStr::new(contents);
1069        let bytes_cached = FlyByteStr::new(contents);
1070        assert_eq!(cached, cached.clone(), "must be equal to itself");
1071        assert_eq!(cached, contents, "must be equal to the original");
1072        assert_eq!(cached, contents.to_owned(), "must be equal to an owned copy of the original");
1073        assert_eq!(cached, bytes_cached);
1074
1075        // test inequality too
1076        assert_ne!(cached, "goodbye");
1077        assert_ne!(bytes_cached, "goodbye");
1078    }
1079
1080    #[test_case("", SHORT_STRING ; "empty and short string")]
1081    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1082    #[test_case(MAX_LEN_SHORT_STRING, MIN_LEN_LONG_STRING ; "short and long strings")]
1083    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1084    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1085    fn string_comparison_works(lesser_contents: &str, greater_contents: &str) {
1086        reset_global_cache();
1087
1088        let lesser = FlyStr::new(lesser_contents);
1089        let lesser_bytes = FlyByteStr::from(lesser_contents);
1090        let greater = FlyStr::new(greater_contents);
1091        let greater_bytes = FlyByteStr::from(greater_contents);
1092
1093        // lesser as method receiver
1094        assert!(lesser < greater);
1095        assert!(lesser < greater_bytes);
1096        assert!(lesser_bytes < greater);
1097        assert!(lesser_bytes < greater_bytes);
1098        assert!(lesser <= greater);
1099        assert!(lesser <= greater_bytes);
1100        assert!(lesser_bytes <= greater);
1101        assert!(lesser_bytes <= greater_bytes);
1102
1103        // greater as method receiver
1104        assert!(greater > lesser);
1105        assert!(greater > lesser_bytes);
1106        assert!(greater_bytes > lesser);
1107        assert!(greater >= lesser);
1108        assert!(greater >= lesser_bytes);
1109        assert!(greater_bytes >= lesser);
1110        assert!(greater_bytes >= lesser_bytes);
1111    }
1112
1113    #[test_case("" ; "empty string")]
1114    #[test_case(SHORT_STRING ; "short strings")]
1115    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1116    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1117    fn no_allocations_for_short_strings(contents: &str) {
1118        reset_global_cache();
1119        assert_eq!(num_strings_in_global_cache(), 0);
1120
1121        let original = FlyStr::new(contents);
1122        assert_eq!(num_strings_in_global_cache(), 0);
1123        assert_eq!(original.0.refcount(), None);
1124
1125        let cloned = original.clone();
1126        assert_eq!(num_strings_in_global_cache(), 0);
1127        assert_eq!(cloned.0.refcount(), None);
1128
1129        let deduped = FlyStr::new(contents);
1130        assert_eq!(num_strings_in_global_cache(), 0);
1131        assert_eq!(deduped.0.refcount(), None);
1132    }
1133
1134    #[test_case("" ; "empty string")]
1135    #[test_case(SHORT_STRING ; "short strings")]
1136    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1137    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1138    fn no_allocations_for_short_bytestrings(contents: &str) {
1139        reset_global_cache();
1140        assert_eq!(num_strings_in_global_cache(), 0);
1141
1142        let original = FlyByteStr::new(contents);
1143        assert_eq!(num_strings_in_global_cache(), 0);
1144        assert_eq!(original.0.refcount(), None);
1145
1146        let cloned = original.clone();
1147        assert_eq!(num_strings_in_global_cache(), 0);
1148        assert_eq!(cloned.0.refcount(), None);
1149
1150        let deduped = FlyByteStr::new(contents);
1151        assert_eq!(num_strings_in_global_cache(), 0);
1152        assert_eq!(deduped.0.refcount(), None);
1153    }
1154
1155    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1156    #[test_case(LONG_STRING ; "long strings")]
1157    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1158    fn only_one_copy_allocated_for_long_strings(contents: &str) {
1159        reset_global_cache();
1160
1161        assert_eq!(num_strings_in_global_cache(), 0);
1162
1163        let original = FlyStr::new(contents);
1164        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1165        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1166
1167        let cloned = original.clone();
1168        assert_eq!(num_strings_in_global_cache(), 1, "cloning just incremented refcount");
1169        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1170
1171        let deduped = FlyStr::new(contents);
1172        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1173        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1174    }
1175
1176    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1177    #[test_case(LONG_STRING ; "long strings")]
1178    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1179    fn only_one_copy_allocated_for_long_bytestrings(contents: &str) {
1180        reset_global_cache();
1181
1182        assert_eq!(num_strings_in_global_cache(), 0);
1183
1184        let original = FlyByteStr::new(contents);
1185        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1186        assert_eq!(original.0.refcount(), Some(1), "one copy on stack");
1187
1188        let cloned = original.clone();
1189        assert_eq!(num_strings_in_global_cache(), 1, "cloning just incremented refcount");
1190        assert_eq!(cloned.0.refcount(), Some(2), "two copies on stack");
1191
1192        let deduped = FlyByteStr::new(contents);
1193        assert_eq!(num_strings_in_global_cache(), 1, "new string was deduped");
1194        assert_eq!(deduped.0.refcount(), Some(3), "three copies on stack");
1195    }
1196
1197    #[test]
1198    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1199    fn utf8_and_bytestrings_share_the_cache() {
1200        reset_global_cache();
1201
1202        assert_eq!(num_strings_in_global_cache(), 0, "cache is empty");
1203
1204        let _utf8 = FlyStr::from(MIN_LEN_LONG_STRING);
1205        assert_eq!(num_strings_in_global_cache(), 1, "string was allocated");
1206
1207        let _bytes = FlyByteStr::from(MIN_LEN_LONG_STRING);
1208        assert_eq!(num_strings_in_global_cache(), 1, "bytestring was pulled from cache");
1209    }
1210
1211    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1212    #[test_case(LONG_STRING ; "long strings")]
1213    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1214    fn cached_strings_dropped_when_refs_dropped(contents: &str) {
1215        reset_global_cache();
1216
1217        let alloced = FlyStr::new(contents);
1218        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1219        drop(alloced);
1220        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1221    }
1222
1223    #[test_case(MIN_LEN_LONG_STRING ; "barely long strings")]
1224    #[test_case(LONG_STRING ; "long strings")]
1225    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1226    fn cached_bytestrings_dropped_when_refs_dropped(contents: &str) {
1227        reset_global_cache();
1228
1229        let alloced = FlyByteStr::new(contents);
1230        assert_eq!(num_strings_in_global_cache(), 1, "only one string allocated");
1231        drop(alloced);
1232        assert_eq!(num_strings_in_global_cache(), 0, "last reference dropped");
1233    }
1234
1235    #[test_case("", SHORT_STRING ; "empty and short string")]
1236    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1237    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1238    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1239    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1240    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1241    fn equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1242        reset_global_cache();
1243
1244        let first = FlyStr::new(first);
1245        let second = FlyStr::new(second);
1246
1247        let mut set = AHashSet::new();
1248        set.insert(first.clone());
1249        assert!(set.contains(&first));
1250        assert!(!set.contains(&second));
1251
1252        // re-insert the same cmstring
1253        set.insert(first);
1254        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1255
1256        set.insert(second.clone());
1257        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1258        assert!(set.contains(&second));
1259
1260        // re-insert the second cmstring
1261        set.insert(second);
1262        assert_eq!(set.len(), 2);
1263    }
1264
1265    #[test_case("", SHORT_STRING ; "empty and short string")]
1266    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1267    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1268    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1269    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1270    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1271    fn byte_equality_and_hashing_with_pointer_value_works_correctly(first: &str, second: &str) {
1272        reset_global_cache();
1273
1274        let first = FlyByteStr::new(first);
1275        let second = FlyByteStr::new(second);
1276
1277        let mut set = AHashSet::new();
1278        set.insert(first.clone());
1279        assert!(set.contains(&first));
1280        assert!(!set.contains(&second));
1281
1282        // re-insert the same string
1283        set.insert(first);
1284        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1285
1286        set.insert(second.clone());
1287        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1288        assert!(set.contains(&second));
1289
1290        // re-insert the second string
1291        set.insert(second);
1292        assert_eq!(set.len(), 2);
1293    }
1294
1295    #[test_case("", SHORT_STRING ; "empty and short string")]
1296    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1297    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1298    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1299    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1300    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1301    fn comparison_for_btree_storage_works(first: &str, second: &str) {
1302        reset_global_cache();
1303
1304        let first = FlyStr::new(first);
1305        let second = FlyStr::new(second);
1306
1307        let mut set = BTreeSet::new();
1308        set.insert(first.clone());
1309        assert!(set.contains(&first));
1310        assert!(!set.contains(&second));
1311
1312        // re-insert the same cmstring
1313        set.insert(first);
1314        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1315
1316        set.insert(second.clone());
1317        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1318        assert!(set.contains(&second));
1319
1320        // re-insert the second cmstring
1321        set.insert(second);
1322        assert_eq!(set.len(), 2);
1323    }
1324
1325    #[test_case("", SHORT_STRING ; "empty and short string")]
1326    #[test_case(SHORT_STRING, MAX_LEN_SHORT_STRING ; "two short strings")]
1327    #[test_case(SHORT_STRING, LONG_STRING ; "short and long strings")]
1328    #[test_case(LONG_STRING, MAX_LEN_SHORT_STRING ; "long and max-len-short strings")]
1329    #[test_case(MIN_LEN_LONG_STRING, LONG_STRING ; "barely long and long strings")]
1330    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1331    fn byte_comparison_for_btree_storage_works(first: &str, second: &str) {
1332        reset_global_cache();
1333
1334        let first = FlyByteStr::new(first);
1335        let second = FlyByteStr::new(second);
1336
1337        let mut set = BTreeSet::new();
1338        set.insert(first.clone());
1339        assert!(set.contains(&first));
1340        assert!(!set.contains(&second));
1341
1342        // re-insert the same string
1343        set.insert(first);
1344        assert_eq!(set.len(), 1, "set did not grow because the same string was inserted as before");
1345
1346        set.insert(second.clone());
1347        assert_eq!(set.len(), 2, "inserting a different string must mutate the set");
1348        assert!(set.contains(&second));
1349
1350        // re-insert the second string
1351        set.insert(second);
1352        assert_eq!(set.len(), 2);
1353    }
1354
1355    #[test_case("" ; "empty string")]
1356    #[test_case(SHORT_STRING ; "short strings")]
1357    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1358    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1359    #[test_case(LONG_STRING ; "long strings")]
1360    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1361    fn serde_works(contents: &str) {
1362        reset_global_cache();
1363
1364        let s = FlyStr::new(contents);
1365        let as_json = serde_json::to_string(&s).unwrap();
1366        assert_eq!(as_json, format!("\"{contents}\""));
1367        assert_eq!(s, serde_json::from_str::<FlyStr>(&as_json).unwrap());
1368    }
1369
1370    #[test_case("" ; "empty string")]
1371    #[test_case(SHORT_STRING ; "short strings")]
1372    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1373    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1374    #[test_case(LONG_STRING ; "long strings")]
1375    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1376    fn serde_works_bytestring(contents: &str) {
1377        reset_global_cache();
1378
1379        let s = FlyByteStr::new(contents);
1380        let as_json = serde_json::to_string(&s).unwrap();
1381        assert_eq!(s, serde_json::from_str::<FlyByteStr>(&as_json).unwrap());
1382    }
1383
1384    #[test_case(SHORT_NON_UTF8 ; "short non-utf8 bytestring")]
1385    #[test_case(LONG_NON_UTF8 ; "long non-utf8 bytestring")]
1386    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1387    fn non_utf8_works(contents: &[u8]) {
1388        reset_global_cache();
1389
1390        let res: Result<FlyStr, _> = FlyByteStr::from(contents).try_into();
1391        res.unwrap_err();
1392    }
1393
1394    #[test_case("" ; "empty string")]
1395    #[test_case(SHORT_STRING ; "short strings")]
1396    #[test_case(MAX_LEN_SHORT_STRING ; "max len short strings")]
1397    #[test_case(MIN_LEN_LONG_STRING ; "min len long strings")]
1398    #[test_case(LONG_STRING ; "long strings")]
1399    #[cfg_attr(not(target_os = "fuchsia"), serial)]
1400    fn flystr_to_flybytestr_and_back(contents: &str) {
1401        reset_global_cache();
1402
1403        let bytestr = FlyByteStr::from(contents);
1404        let flystr = FlyStr::try_from(bytestr.clone()).unwrap();
1405        assert_eq!(bytestr, flystr);
1406        let bytestr2 = FlyByteStr::from(flystr.clone());
1407        assert_eq!(bytestr, bytestr2);
1408    }
1409}