forked from RustPython/RustPython
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathunicodedata.rs
More file actions
94 lines (80 loc) · 3.04 KB
/
unicodedata.rs
File metadata and controls
94 lines (80 loc) · 3.04 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
/* Access to the unicode database.
See also: https://docs.python.org/3/library/unicodedata.html
*/
use crate::function::OptionalArg;
use crate::obj::objstr::PyStringRef;
use crate::pyobject::{PyObjectRef, PyResult};
use crate::vm::VirtualMachine;
use unic::char::property::EnumeratedCharProperty;
use unic::ucd::category::GeneralCategory;
use unic::ucd::Name;
use unicode_names2;
pub fn make_module(vm: &VirtualMachine) -> PyObjectRef {
let ctx = &vm.ctx;
let unidata_version = unic::UNICODE_VERSION.to_string();
py_module!(vm, "unicodedata", {
"bidirectional" => ctx.new_rustfunc(bidirectional),
"category" => ctx.new_rustfunc(category),
"name" => ctx.new_rustfunc(name),
"lookup" => ctx.new_rustfunc(lookup),
"normalize" => ctx.new_rustfunc(normalize),
"unidata_version" => ctx.new_str(unidata_version),
})
}
fn category(character: PyStringRef, vm: &VirtualMachine) -> PyResult {
let my_char = extract_char(character, vm)?;
let category = GeneralCategory::of(my_char);
Ok(vm.new_str(category.abbr_name().to_string()))
}
fn lookup(name: PyStringRef, vm: &VirtualMachine) -> PyResult {
// TODO: we might want to use unic_ucd instead of unicode_names2 for this too, if possible:
if let Some(character) = unicode_names2::character(&name.value) {
Ok(vm.new_str(character.to_string()))
} else {
Err(vm.new_key_error(vm.new_str(format!("undefined character name '{}'", name))))
}
}
fn name(
character: PyStringRef,
default: OptionalArg<PyObjectRef>,
vm: &VirtualMachine,
) -> PyResult {
let my_char = extract_char(character, vm)?;
if let Some(name) = Name::of(my_char) {
Ok(vm.new_str(name.to_string()))
} else {
match default {
OptionalArg::Present(obj) => Ok(obj),
OptionalArg::Missing => {
Err(vm.new_value_error("character name not found!".to_string()))
}
}
}
}
fn bidirectional(character: PyStringRef, vm: &VirtualMachine) -> PyResult {
use unic::bidi::BidiClass;
let my_char = extract_char(character, vm)?;
let cls = BidiClass::of(my_char);
Ok(vm.new_str(cls.abbr_name().to_string()))
}
fn normalize(form: PyStringRef, unistr: PyStringRef, vm: &VirtualMachine) -> PyResult {
use unic::normal::StrNormalForm;
let text = &unistr.value;
let normalized_text = match form.value.as_ref() {
"NFC" => text.nfc().collect::<String>(),
"NFKC" => text.nfkc().collect::<String>(),
"NFD" => text.nfd().collect::<String>(),
"NFKD" => text.nfkd().collect::<String>(),
_ => {
return Err(vm.new_value_error("unistr must be one of NFC, NFD".to_string()));
}
};
Ok(vm.new_str(normalized_text))
}
fn extract_char(character: PyStringRef, vm: &VirtualMachine) -> PyResult<char> {
if character.value.len() != 1 {
return Err(vm.new_type_error("argument must be an unicode character, not str".to_string()));
}
let my_char: char = character.value.chars().next().unwrap();
Ok(my_char)
}