From 45a1fc6a969f429859e8080afde1d1122394dc8d Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 18 Mar 2021 11:20:22 +0100 Subject: [PATCH 1/7] Python: Add link to better PyYAML docs I found this randomly --- python/ql/src/semmle/python/frameworks/Yaml.qll | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Yaml.qll b/python/ql/src/semmle/python/frameworks/Yaml.qll index 7e9d7f100e9d..ec3a21379dac 100644 --- a/python/ql/src/semmle/python/frameworks/Yaml.qll +++ b/python/ql/src/semmle/python/frameworks/Yaml.qll @@ -1,6 +1,10 @@ /** - * Provides classes modeling security-relevant aspects of the PyYAML package - * https://pyyaml.org/wiki/PyYAMLDocumentation (obtained via `import yaml`). + * Provides classes modeling security-relevant aspects of the PyYAML package (obtained + * via `import yaml`) + * + * See + * - https://pyyaml.org/wiki/PyYAMLDocumentation + * - https://pyyaml.docsforge.com/master/documentation/ */ private import python @@ -8,6 +12,14 @@ private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.Concepts +/** + * Provides classes modeling security-relevant aspects of the PyYAML package (obtained + * via `import yaml`) + * + * See + * - https://pyyaml.org/wiki/PyYAMLDocumentation + * - https://pyyaml.docsforge.com/master/documentation/ + */ private module Yaml { /** Gets a reference to the `yaml` module. */ private DataFlow::Node yaml(DataFlow::TypeTracker t) { From 14e9bda5de10ef5a1c6dd96692d083f4e0f16025 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 18 Mar 2021 11:35:17 +0100 Subject: [PATCH 2/7] Python: Refactor PyYAML tests a bit --- .../library-tests/frameworks/yaml/Decoding.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py index 816d91d90689..7d5bcde15034 100644 --- a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py +++ b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py @@ -1,15 +1,18 @@ import yaml -from yaml import SafeLoader +# Unsafe: yaml.load(payload) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput -yaml.load(payload, SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML -yaml.load(payload, Loader=SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML -yaml.load(payload, Loader=yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML - -yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML +yaml.load(payload, yaml.Loader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput +# Safe +yaml.load(payload, yaml.SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML +yaml.load(payload, Loader=yaml.SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML +yaml.load(payload, yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML +yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML + +# load_all variants yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput From 5ec8511d50c96fac2909dfba39424fac79c9df89 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 18 Mar 2021 11:47:46 +0100 Subject: [PATCH 3/7] Python: Port PyYAML model to API graphs --- .../ql/src/semmle/python/frameworks/Yaml.qll | 76 ++----------------- 1 file changed, 7 insertions(+), 69 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Yaml.qll b/python/ql/src/semmle/python/frameworks/Yaml.qll index ec3a21379dac..8240dd213787 100644 --- a/python/ql/src/semmle/python/frameworks/Yaml.qll +++ b/python/ql/src/semmle/python/frameworks/Yaml.qll @@ -11,6 +11,7 @@ private import python private import semmle.python.dataflow.new.DataFlow private import semmle.python.dataflow.new.RemoteFlowSources private import semmle.python.Concepts +private import semmle.python.ApiGraphs /** * Provides classes modeling security-relevant aspects of the PyYAML package (obtained @@ -20,70 +21,7 @@ private import semmle.python.Concepts * - https://pyyaml.org/wiki/PyYAMLDocumentation * - https://pyyaml.docsforge.com/master/documentation/ */ -private module Yaml { - /** Gets a reference to the `yaml` module. */ - private DataFlow::Node yaml(DataFlow::TypeTracker t) { - t.start() and - result = DataFlow::importNode("yaml") - or - exists(DataFlow::TypeTracker t2 | result = yaml(t2).track(t2, t)) - } - - /** Gets a reference to the `yaml` module. */ - DataFlow::Node yaml() { result = yaml(DataFlow::TypeTracker::end()) } - - /** Provides models for the `yaml` module. */ - module yaml { - /** - * Gets a reference to the attribute `attr_name` of the `yaml` module. - * WARNING: Only holds for a few predefined attributes. - * - * For example, using `attr_name = "load"` will get all uses of `yaml.load`. - */ - private DataFlow::Node yaml_attr(DataFlow::TypeTracker t, string attr_name) { - attr_name in [ - // functions - "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all", - "safe_load", "safe_load_all", - // Classes - "SafeLoader", "BaseLoader" - ] and - ( - t.start() and - result = DataFlow::importNode("yaml." + attr_name) - or - t.startInAttr(attr_name) and - result = yaml() - ) - or - // Due to bad performance when using normal setup with `yaml_attr(t2, attr_name).track(t2, t)` - // we have inlined that code and forced a join - exists(DataFlow::TypeTracker t2 | - exists(DataFlow::StepSummary summary | - yaml_attr_first_join(t2, attr_name, result, summary) and - t = t2.append(summary) - ) - ) - } - - pragma[nomagic] - private predicate yaml_attr_first_join( - DataFlow::TypeTracker t2, string attr_name, DataFlow::Node res, DataFlow::StepSummary summary - ) { - DataFlow::StepSummary::step(yaml_attr(t2, attr_name), res, summary) - } - - /** - * Gets a reference to the attribute `attr_name` of the `yaml` module. - * WARNING: Only holds for a few predefined attributes. - * - * For example, using `attr_name = "load"` will get all uses of `yaml.load`. - */ - DataFlow::Node yaml_attr(string attr_name) { - result = yaml_attr(DataFlow::TypeTracker::end(), attr_name) - } - } -} +private module Yaml { } /** * A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`, @@ -91,7 +29,7 @@ private module Yaml { * * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). */ -private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode { +private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode { override CallNode node; string func_name; @@ -100,7 +38,7 @@ private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode { "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all", "safe_load", "safe_load_all" ] and - node.getFunction() = Yaml::yaml::yaml_attr(func_name).asCfgNode() + this = API::moduleImport("yaml").getMember(func_name).getACall() } /** @@ -117,13 +55,13 @@ private class YamlLoadCall extends Decoding::Range, DataFlow::CfgNode { // If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all, // then the default loader will be used, which is not safe. not exists(DataFlow::Node loader_arg | - loader_arg.asCfgNode() in [node.getArg(1), node.getArgByName("Loader")] + loader_arg in [this.getArg(1), this.getArgByName("Loader")] | - loader_arg = Yaml::yaml::yaml_attr(["SafeLoader", "BaseLoader"]) + loader_arg = API::moduleImport("yaml").getMember(["SafeLoader", "BaseLoader"]).getAUse() ) } - override DataFlow::Node getAnInput() { result.asCfgNode() = node.getArg(0) } + override DataFlow::Node getAnInput() { result = this.getArg(0) } override DataFlow::Node getOutput() { result = this } From 25b15d74709e912f97ca7d461552eb5681c4ca50 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 18 Mar 2021 11:48:30 +0100 Subject: [PATCH 4/7] Python: Move PyYAML modeling classes within module For now, this is how we're trying to structure things -- all in all it doesn't matter too much, since everything is still marked as private. --- .../ql/src/semmle/python/frameworks/Yaml.qll | 80 +++++++++---------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/python/ql/src/semmle/python/frameworks/Yaml.qll b/python/ql/src/semmle/python/frameworks/Yaml.qll index 8240dd213787..1bf5c517ec88 100644 --- a/python/ql/src/semmle/python/frameworks/Yaml.qll +++ b/python/ql/src/semmle/python/frameworks/Yaml.qll @@ -21,49 +21,49 @@ private import semmle.python.ApiGraphs * - https://pyyaml.org/wiki/PyYAMLDocumentation * - https://pyyaml.docsforge.com/master/documentation/ */ -private module Yaml { } - -/** - * A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`, - * `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`) - * - * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). - */ -private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode { - override CallNode node; - string func_name; - - YamlLoadCall() { - func_name in [ - "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all", - "safe_load", "safe_load_all" - ] and - this = API::moduleImport("yaml").getMember(func_name).getACall() - } - +private module Yaml { /** - * This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`. - * In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0 - * (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389). - * Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution. - * See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details. + * A call to any of the loading functions in `yaml` (`load`, `load_all`, `full_load`, + * `full_load_all`, `unsafe_load`, `unsafe_load_all`, `safe_load`, `safe_load_all`) + * + * See https://pyyaml.org/wiki/PyYAMLDocumentation (you will have to scroll down). */ - override predicate mayExecuteInput() { - func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"] - or - func_name in ["load", "load_all"] and - // If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all, - // then the default loader will be used, which is not safe. - not exists(DataFlow::Node loader_arg | - loader_arg in [this.getArg(1), this.getArgByName("Loader")] - | - loader_arg = API::moduleImport("yaml").getMember(["SafeLoader", "BaseLoader"]).getAUse() - ) - } + private class YamlLoadCall extends Decoding::Range, DataFlow::CallCfgNode { + override CallNode node; + string func_name; + + YamlLoadCall() { + func_name in [ + "load", "load_all", "full_load", "full_load_all", "unsafe_load", "unsafe_load_all", + "safe_load", "safe_load_all" + ] and + this = API::moduleImport("yaml").getMember(func_name).getACall() + } - override DataFlow::Node getAnInput() { result = this.getArg(0) } + /** + * This function was thought safe from the 5.1 release in 2017, when the default loader was changed to `FullLoader`. + * In 2020 new exploits were found, meaning it's not safe. The Current plan is to change the default to `SafeLoader` in release 6.0 + * (as explained in https://github.com/yaml/pyyaml/issues/420#issuecomment-696752389). + * Until 6.0 is released, we will mark `yaml.load` as possibly leading to arbitrary code execution. + * See https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation for more details. + */ + override predicate mayExecuteInput() { + func_name in ["full_load", "full_load_all", "unsafe_load", "unsafe_load_all"] + or + func_name in ["load", "load_all"] and + // If the `Loader` is not set to either `SafeLoader` or `BaseLoader` or not set at all, + // then the default loader will be used, which is not safe. + not exists(DataFlow::Node loader_arg | + loader_arg in [this.getArg(1), this.getArgByName("Loader")] + | + loader_arg = API::moduleImport("yaml").getMember(["SafeLoader", "BaseLoader"]).getAUse() + ) + } - override DataFlow::Node getOutput() { result = this } + override DataFlow::Node getAnInput() { result = this.getArg(0) } - override string getFormat() { result = "YAML" } + override DataFlow::Node getOutput() { result = this } + + override string getFormat() { result = "YAML" } + } } From 54e6f51512e99279ea249c6553168a65d604c618 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 18 Mar 2021 11:49:33 +0100 Subject: [PATCH 5/7] Python: Add example of C-based PyYAML loaders ``` In [6]: yaml.load("!!python/object/new:os.system [echo EXPLOIT!]", yaml.CLoader) EXPLOIT! Out[6]: 0 ``` --- .../experimental/library-tests/frameworks/yaml/Decoding.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py index 7d5bcde15034..80360648391f 100644 --- a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py +++ b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py @@ -17,3 +17,9 @@ yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput + +# C-based loaders with `libyaml` +yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput +yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput +yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML SPURIOUS: decodeMayExecuteInput +yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML SPURIOUS: decodeMayExecuteInput From 42b2c3ed52e529c72f815844d8aaf01f937320d7 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Thu, 18 Mar 2021 11:55:01 +0100 Subject: [PATCH 6/7] Python: Model C-based loaders for PyYAML Not really that important. But easy to do while I was working on this library. --- .../change-notes/2021-03-18-yaml-handle-C-based-loaders.md | 2 ++ python/ql/src/semmle/python/frameworks/Yaml.qll | 5 ++++- .../experimental/library-tests/frameworks/yaml/Decoding.py | 4 ++-- 3 files changed, 8 insertions(+), 3 deletions(-) create mode 100644 python/change-notes/2021-03-18-yaml-handle-C-based-loaders.md diff --git a/python/change-notes/2021-03-18-yaml-handle-C-based-loaders.md b/python/change-notes/2021-03-18-yaml-handle-C-based-loaders.md new file mode 100644 index 000000000000..054d10dbb0bd --- /dev/null +++ b/python/change-notes/2021-03-18-yaml-handle-C-based-loaders.md @@ -0,0 +1,2 @@ +lgtm,codescanning +* Improved modeling of the `PyYAML` PyPI package, so we now correctly treat `CSafeLoader` and `CBaseLoader` as being safe loaders that can not lead to code execution. diff --git a/python/ql/src/semmle/python/frameworks/Yaml.qll b/python/ql/src/semmle/python/frameworks/Yaml.qll index 1bf5c517ec88..f7ac0a9fda74 100644 --- a/python/ql/src/semmle/python/frameworks/Yaml.qll +++ b/python/ql/src/semmle/python/frameworks/Yaml.qll @@ -56,7 +56,10 @@ private module Yaml { not exists(DataFlow::Node loader_arg | loader_arg in [this.getArg(1), this.getArgByName("Loader")] | - loader_arg = API::moduleImport("yaml").getMember(["SafeLoader", "BaseLoader"]).getAUse() + loader_arg = + API::moduleImport("yaml") + .getMember(["SafeLoader", "BaseLoader", "CSafeLoader", "CBaseLoader"]) + .getAUse() ) } diff --git a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py index 80360648391f..a9369d6ec3df 100644 --- a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py +++ b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py @@ -21,5 +21,5 @@ # C-based loaders with `libyaml` yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput -yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML SPURIOUS: decodeMayExecuteInput -yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML SPURIOUS: decodeMayExecuteInput +yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML +yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML From 7543f10593dfe5bd3aaea1572d1c9135aff49b31 Mon Sep 17 00:00:00 2001 From: Rasmus Wriedt Larsen Date: Fri, 19 Mar 2021 09:53:25 +0100 Subject: [PATCH 7/7] Python: Reorganize PyYAML tests a bit --- .../library-tests/frameworks/yaml/Decoding.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py index a9369d6ec3df..987ea76552a7 100644 --- a/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py +++ b/python/ql/test/experimental/library-tests/frameworks/yaml/Decoding.py @@ -6,20 +6,32 @@ yaml.unsafe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.full_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput -# Safe +# Safe: yaml.load(payload, yaml.SafeLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML yaml.load(payload, Loader=yaml.SafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML yaml.load(payload, yaml.BaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML yaml.safe_load(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML +################################################################################ # load_all variants +################################################################################ + +# Unsafe: yaml.load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput -yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML yaml.unsafe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.full_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput +# Safe: +yaml.safe_load_all(payload) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML + +################################################################################ # C-based loaders with `libyaml` +################################################################################ + +# Unsafe: yaml.load(payload, yaml.CLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput yaml.load(payload, yaml.CFullLoader) # $ decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML decodeMayExecuteInput + +# Safe: yaml.load(payload, yaml.CSafeLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML yaml.load(payload, yaml.CBaseLoader) # $decodeInput=payload decodeOutput=Attribute() decodeFormat=YAML