1- extern crate unicode_categories;
2- extern crate unicode_xid;
3-
41use std:: cell:: Cell ;
52use std:: char;
63use std:: fmt;
@@ -10,10 +7,10 @@ use std::str::FromStr;
107use std:: string:: ToString ;
118
129use num_traits:: ToPrimitive ;
10+ use unic:: ucd:: category:: GeneralCategory ;
11+ use unic:: ucd:: ident:: { is_xid_continue, is_xid_start} ;
1312use unic:: ucd:: is_cased;
1413use unicode_casing:: CharExt ;
15- use unicode_categories:: UnicodeCategories ;
16- use unicode_xid:: UnicodeXID ;
1714
1815use super :: objbytes:: { PyBytes , PyBytesRef } ;
1916use super :: objdict:: PyDict ;
@@ -366,16 +363,7 @@ impl PyString {
366363 formatted. push_str ( & format ! ( "\\ x{:02x}" , c as u32 ) ) ;
367364 } else if c. is_ascii ( ) {
368365 formatted. push ( c) ;
369- } else if c. is_other ( ) || c. is_separator ( ) {
370- // According to python following categories aren't printable:
371- // * Cc (Other, Control)
372- // * Cf (Other, Format)
373- // * Cs (Other, Surrogate)
374- // * Co (Other, Private Use)
375- // * Cn (Other, Not Assigned)
376- // * Zl Separator, Line ('\u2028', LINE SEPARATOR)
377- // * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
378- // * Zs (Separator, Space) other than ASCII space('\x20').
366+ } else if !char_is_printable ( c) {
379367 let code = c as u32 ;
380368 let escaped = if code < 0xff {
381369 format ! ( "\\ U{:02x}" , code)
@@ -742,10 +730,9 @@ impl PyString {
742730 /// * Zs (Separator, Space) other than ASCII space('\x20').
743731 #[ pymethod]
744732 fn isprintable ( & self , _vm : & VirtualMachine ) -> bool {
745- self . value . chars ( ) . all ( |c| match c {
746- '\u{0020}' => true ,
747- _ => !( c. is_other_control ( ) | c. is_separator ( ) ) ,
748- } )
733+ self . value
734+ . chars ( )
735+ . all ( |c| c == '\u{0020}' || char_is_printable ( c) )
749736 }
750737
751738 // cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
@@ -1094,13 +1081,9 @@ impl PyString {
10941081 #[ pymethod]
10951082 fn isidentifier ( & self , _vm : & VirtualMachine ) -> bool {
10961083 let mut chars = self . value . chars ( ) ;
1097- let is_identifier_start = match chars. next ( ) {
1098- Some ( '_' ) => true ,
1099- Some ( c) => UnicodeXID :: is_xid_start ( c) ,
1100- None => false ,
1101- } ;
1084+ let is_identifier_start = chars. next ( ) . map_or ( false , |c| c == '_' || is_xid_start ( c) ) ;
11021085 // a string is not an identifier if it has whitespace or starts with a number
1103- is_identifier_start && chars. all ( UnicodeXID :: is_xid_continue)
1086+ is_identifier_start && chars. all ( is_xid_continue)
11041087 }
11051088
11061089 // https://docs.python.org/3/library/stdtypes.html#str.translate
@@ -1706,6 +1689,20 @@ fn adjust_indices(
17061689 }
17071690}
17081691
1692+ // According to python following categories aren't printable:
1693+ // * Cc (Other, Control)
1694+ // * Cf (Other, Format)
1695+ // * Cs (Other, Surrogate)
1696+ // * Co (Other, Private Use)
1697+ // * Cn (Other, Not Assigned)
1698+ // * Zl Separator, Line ('\u2028', LINE SEPARATOR)
1699+ // * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
1700+ // * Zs (Separator, Space) other than ASCII space('\x20').
1701+ fn char_is_printable ( c : char ) -> bool {
1702+ let cat = GeneralCategory :: of ( c) ;
1703+ !( cat. is_other ( ) || cat. is_separator ( ) )
1704+ }
1705+
17091706#[ cfg( test) ]
17101707mod tests {
17111708 use super :: * ;
0 commit comments