Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
lgtm,codescanning
* Improved modeling of the `PyYAML` PyPI package, so we now correctly treat `CSafeLoader` and `CBaseLoader` as being safe loaders that can not lead to code execution.
147 changes: 50 additions & 97 deletions python/ql/src/semmle/python/frameworks/Yaml.qll
Original file line number Diff line number Diff line change
@@ -1,119 +1,72 @@
/**
* Provides classes modeling security-relevant aspects of the PyYAML package
* https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`).
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
* via `import yaml`)
*
* See
* - https://pyyaml.org/wiki/PyYAMLDocumentation
* - https://pyyaml.docsforge.com/master/documentation/
*/

private import python
private import semmle.python.dataflow.new.DataFlow
private import semmle.python.dataflow.new.RemoteFlowSources
private import semmle.python.Concepts
private import semmle.python.ApiGraphs

/**
* Provides classes modeling security-relevant aspects of the PyYAML package (obtained
* via `import yaml`)
*
* See
* - https://pyyaml.org/wiki/PyYAMLDocumentation
* - https://pyyaml.docsforge.com/master/documentation/
*/
private module Yaml {
/** Gets a reference to the `yaml` module. */
private DataFlow::Node yaml(DataFlow::TypeTracker t) {
t.start() and
result = DataFlow::importNode("yaml")
or
exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t))
}

/** Gets a reference to the `yaml` module. */
DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) }
/**
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode {
override CallNode node;
string func_name;

/** Provides models for the `yaml` module. */
module yaml {
/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
*/
private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) {
attr_name in [
// functions
YamlLoadCall() {
func_name in [
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all",
// Classes
"SafeLoader", "BaseLoader"
"safe_load", "safe_load_all"
] and
(
t.start() and
result = DataFlow::importNode("yaml." + attr_name)
or
t.startInAttr(attr_name) and
result = yaml()
)
or
// Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)`
// we have inlined that code and forced a join
exists(DataFlow::TypeTracker t2 |
exists(DataFlow::StepSummary summary |
yaml_attr_first_join(t2, attr_name, result, summary) and
t = t2.append(summary)
)
)
}

pragma[nomagic]
private predicate yaml_attr_first_join(
DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary
) {
DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary)
this = API::moduleImport("yaml").getMember(func_name).getACall()
}

/**
* Gets a reference to the attribute `attr_name` of the `yaml` module.
* WARNING: Only holds for a few predefined attributes.
*
* For example, using `attr_name = "load"` will get all uses of `yaml.load`.
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
*/
DataFlow::Node yaml_attr(string attr_name) {
result = yaml_attr(DataFlow::TypeTracker::end(), attr_name)
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
or
func_name in ["load", "load_all"] and
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default loader will be used, which is not safe.
not exists(DataFlow::Node loader_arg |
loader_arg in [this.getArg(1), this.getArgByName("Loader")]
|
loader_arg =
API::moduleImport("yaml")
.getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"])
.getAUse()
)
}
}
}

/**
* A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`,
* `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`)
*
* See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down).
*/
private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode {
override CallNode node;
string func_name;
override DataFlow::Node getAnInput() { result = this.getArg(0) }

YamlLoadCall() {
func_name in [
"load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all",
"safe_load", "safe_load_all"
] and
node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode()
}
override DataFlow::Node getOutput() { result = this }

/**
* This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`.
* In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0
* (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389).
* Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution.
* See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details.
*/
override predicate mayExecuteInput() {
func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"]
or
func_name in ["load", "load_all"] and
// If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all,
// then the default loader will be used, which is not safe.
not exists(DataFlow::Node loader_arg |
loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")]
|
loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"])
)
override string getFormat() { result = "YAML" }
}

override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) }

override DataFlow::Node getOutput() { result = this }

override string getFormat() { result = "YAML" }
}
Original file line number Diff line number Diff line change
@@ -1,16 +1,37 @@
import yaml
from yaml import SafeLoader

# Unsafe:
yaml.load(payload) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, Loader=SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, Loader=yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML

yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, yaml.Loader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput

# Safe:
yaml.load(payload, yaml.SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, Loader=yaml.SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML

################################################################################
# load_all variants
################################################################################

# Unsafe:
yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput

# Safe:
yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML

################################################################################
# C-based loaders with `libyaml`
################################################################################

# Unsafe:
yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput
yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput

# Safe:
yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML
yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML