@@ -113,14 +113,6 @@ where
113113 s. as_ref ( ) . to_owned ( ) . into ( )
114114 }
115115}
116- impl < T > From < ( & T , PyStrKind ) > for PyStr
117- where
118- T : AsRef < [ u8 ] > + ?Sized ,
119- {
120- fn from ( ( s, k) : ( & T , PyStrKind ) ) -> PyStr {
121- ( s. as_ref ( ) . to_owned ( ) . into_boxed_slice ( ) , k) . into ( )
122- }
123- }
124116
125117impl From < String > for PyStr {
126118 fn from ( s : String ) -> PyStr {
@@ -134,10 +126,7 @@ impl From<Box<str>> for PyStr {
134126 // doing the check is ~10x faster for ascii, and is actually only 2% slower worst case for
135127 // non-ascii; see https://github.com/RustPython/RustPython/pull/2586#issuecomment-844611532
136128 let is_ascii = value. is_ascii ( ) ;
137- let bytes = unsafe {
138- // SAFETY: Box<str> and Box<[u8]> have same layout
139- Box :: from_raw ( Box :: into_raw ( value) as _ )
140- } ;
129+ let bytes = value. into_boxed_bytes ( ) ;
141130 let kind = if is_ascii {
142131 PyStrKind :: Ascii
143132 } else {
@@ -152,24 +141,6 @@ impl From<Box<str>> for PyStr {
152141 }
153142}
154143
155- impl From < ( Vec < u8 > , PyStrKind ) > for PyStr {
156- fn from ( ( s, kind) : ( Vec < u8 > , PyStrKind ) ) -> PyStr {
157- ( s. into_boxed_slice ( ) , kind) . into ( )
158- }
159- }
160-
161- impl From < ( Box < [ u8 ] > , PyStrKind ) > for PyStr {
162- fn from ( ( bytes, kind) : ( Box < [ u8 ] > , PyStrKind ) ) -> PyStr {
163- let s = Self {
164- bytes,
165- kind : kind. new_data ( ) ,
166- hash : Radium :: new ( hash:: SENTINEL ) ,
167- } ;
168- debug_assert ! ( matches!( s. kind, PyStrKindData :: Ascii ) || !s. as_str( ) . is_ascii( ) ) ;
169- s
170- }
171- }
172-
173144pub type PyStrRef = PyRef < PyStr > ;
174145
175146impl fmt:: Display for PyStr {
@@ -241,7 +212,11 @@ impl PyStrIterator {
241212 fn reduce ( & self , vm : & VirtualMachine ) -> PyResult {
242213 let iter = vm. get_attribute ( vm. builtins . clone ( ) , "iter" ) ?;
243214 Ok ( vm. ctx . new_tuple ( match self . status . load ( ) {
244- Exhausted => vec ! [ iter, vm. ctx. new_tuple( vec![ vm. ctx. new_ascii_str( b"" ) ] ) ] ,
215+ Exhausted => vec ! [
216+ iter,
217+ vm. ctx
218+ . new_tuple( vec![ vm. ctx. new_ascii_literal( crate :: utils:: ascii!( "" ) ) ] ) ,
219+ ] ,
245220 Active => vec ! [
246221 iter,
247222 vm. ctx. new_tuple( vec![ self . string. clone( ) . into_object( ) ] ) ,
@@ -322,13 +297,20 @@ impl SlotConstructor for PyStr {
322297}
323298
324299impl PyStr {
325- /// SAFETY: Given 's ' must be valid data for given 'kind'
326- unsafe fn new_str_unchecked ( s : String , kind : PyStrKind ) -> Self {
327- Self {
328- bytes : Box :: from_raw ( Box :: into_raw ( s . into_boxed_str ( ) ) as _ ) ,
300+ /// SAFETY: Given 'bytes ' must be valid data for given 'kind'
301+ pub ( crate ) unsafe fn new_str_unchecked ( bytes : Vec < u8 > , kind : PyStrKind ) -> Self {
302+ let s = Self {
303+ bytes : bytes . into_boxed_slice ( ) ,
329304 kind : kind. new_data ( ) ,
330305 hash : Radium :: new ( hash:: SENTINEL ) ,
331- }
306+ } ;
307+ debug_assert ! ( matches!( s. kind, PyStrKindData :: Ascii ) || !s. as_str( ) . is_ascii( ) ) ;
308+ s
309+ }
310+
311+ /// SAFETY: Given 'bytes' must be ascii
312+ unsafe fn new_ascii_unchecked ( bytes : Vec < u8 > ) -> Self {
313+ Self :: new_str_unchecked ( bytes, PyStrKind :: Ascii )
332314 }
333315
334316 fn new_substr ( & self , s : String ) -> Self {
@@ -338,8 +320,8 @@ impl PyStr {
338320 PyStrKind :: Utf8
339321 } ;
340322 unsafe {
341- // SAFETY: kind is safely calculated for substring
342- Self :: new_str_unchecked ( s, kind)
323+ // SAFETY: kind is properly decided for substring
324+ Self :: new_str_unchecked ( s. into_bytes ( ) , kind)
343325 }
344326 }
345327
@@ -367,11 +349,11 @@ impl PyStr {
367349 #[ pymethod( magic) ]
368350 fn add ( zelf : PyRef < Self > , other : PyObjectRef , vm : & VirtualMachine ) -> PyResult {
369351 if let Some ( other) = other. payload :: < PyStr > ( ) {
370- let kind = zelf. kind . kind ( ) | other. kind . kind ( ) ;
371352 let bytes = zelf. as_str ( ) . py_add ( other. as_ref ( ) ) ;
372353 Ok ( unsafe {
373- // SAFETY: safe kind operation
374- Self :: new_str_unchecked ( bytes, kind)
354+ // SAFETY: `kind` is safely decided
355+ let kind = zelf. kind . kind ( ) | other. kind . kind ( ) ;
356+ Self :: new_str_unchecked ( bytes. into_bytes ( ) , kind)
375357 }
376358 . into_pyobject ( vm) )
377359 } else if let Some ( radd) = vm. get_method ( other. clone ( ) , "__radd__" ) {
@@ -625,18 +607,23 @@ impl PyStr {
625607 |v, s, vm| {
626608 v. as_bytes ( )
627609 . split_str ( s)
628- . map ( |s| vm. ctx . new_ascii_str ( s) )
610+ . map ( |s| {
611+ unsafe { PyStr :: new_ascii_unchecked ( s. to_owned ( ) ) } . into_pyobject ( vm)
612+ } )
629613 . collect ( )
630614 } ,
631615 |v, s, n, vm| {
632616 v. as_bytes ( )
633617 . splitn_str ( n, s)
634- . map ( |s| vm. ctx . new_ascii_str ( s) )
618+ . map ( |s| {
619+ unsafe { PyStr :: new_ascii_unchecked ( s. to_owned ( ) ) } . into_pyobject ( vm)
620+ } )
635621 . collect ( )
636622 } ,
637623 |v, n, vm| {
638- v. as_bytes ( )
639- . py_split_whitespace ( n, |s| vm. ctx . new_ascii_str ( s) )
624+ v. as_bytes ( ) . py_split_whitespace ( n, |s| {
625+ unsafe { PyStr :: new_ascii_unchecked ( s. to_owned ( ) ) } . into_pyobject ( vm)
626+ } )
640627 } ,
641628 ) ,
642629 PyStrKind :: Utf8 => self . as_str ( ) . py_split (
@@ -993,7 +980,7 @@ impl PyStr {
993980 if has_mid {
994981 sep. into_object ( )
995982 } else {
996- vm. ctx . new_ascii_str ( b"" )
983+ vm. ctx . new_ascii_literal ( crate :: utils :: ascii! ( "" ) )
997984 } ,
998985 self . new_substr ( back) ,
999986 )
@@ -1012,7 +999,7 @@ impl PyStr {
1012999 if has_mid {
10131000 sep. into_object ( )
10141001 } else {
1015- vm. ctx . new_ascii_str ( b"" )
1002+ vm. ctx . new_ascii_literal ( crate :: utils :: ascii! ( "" ) )
10161003 } ,
10171004 self . new_substr ( back) ,
10181005 )
@@ -1410,8 +1397,10 @@ impl PySliceableSequence for PyStr {
14101397 // this is an ascii string
14111398 let mut v = self . bytes [ range] . to_vec ( ) ;
14121399 v. reverse ( ) ;
1413- // TODO: from_utf8_unchecked?
1414- String :: from_utf8 ( v) . unwrap ( )
1400+ unsafe {
1401+ // SAFETY: an ascii string is always utf8
1402+ String :: from_utf8_unchecked ( v)
1403+ }
14151404 } else {
14161405 let mut s = String :: with_capacity ( self . bytes . len ( ) ) ;
14171406 s. extend (
@@ -1556,7 +1545,11 @@ mod tests {
15561545 table. set_item ( "a" , vm. ctx . new_utf8_str ( "🎅" ) , & vm) . unwrap ( ) ;
15571546 table. set_item ( "b" , vm. ctx . none ( ) , & vm) . unwrap ( ) ;
15581547 table
1559- . set_item ( "c" , vm. ctx . new_ascii_str ( b"xda" ) , & vm)
1548+ . set_item (
1549+ "c" ,
1550+ vm. ctx . new_ascii_literal ( crate :: utils:: ascii!( "xda" ) ) ,
1551+ & vm,
1552+ )
15601553 . unwrap ( ) ;
15611554 let translated = PyStr :: maketrans (
15621555 table. into_object ( ) ,
0 commit comments