diff --git a/docs/codeql/reusables/supported-frameworks.rst b/docs/codeql/reusables/supported-frameworks.rst index 5ab3b6058343..ebc17dd90cc5 100644 --- a/docs/codeql/reusables/supported-frameworks.rst +++ b/docs/codeql/reusables/supported-frameworks.rst @@ -202,6 +202,7 @@ and the CodeQL library pack ``codeql/python-all`` (`changelog <https://github.co Flask-Admin, Web framework Tornado, Web framework Twisted, Web framework + Gradio, Web framework starlette, Asynchronous Server Gateway Interface (ASGI) ldap3, Lightweight Directory Access Protocol (LDAP) python-ldap, Lightweight Directory Access Protocol (LDAP) diff --git a/python/ql/lib/semmle/python/Frameworks.qll b/python/ql/lib/semmle/python/Frameworks.qll index a6288dadc111..83d78b579868 100644 --- a/python/ql/lib/semmle/python/Frameworks.qll +++ b/python/ql/lib/semmle/python/Frameworks.qll @@ -29,6 +29,7 @@ private import semmle.python.frameworks.FastApi private import semmle.python.frameworks.Flask private import semmle.python.frameworks.FlaskAdmin private import semmle.python.frameworks.FlaskSqlAlchemy +private import semmle.python.frameworks.Gradio private import semmle.python.frameworks.Httpx private import semmle.python.frameworks.Idna private import semmle.python.frameworks.Invoke diff --git a/python/ql/lib/semmle/python/frameworks/Gradio.qll b/python/ql/lib/semmle/python/frameworks/Gradio.qll new file mode 100644 index 000000000000..11109e150bfd --- /dev/null +++ b/python/ql/lib/semmle/python/frameworks/Gradio.qll @@ -0,0 +1,123 @@ +/** + * Provides classes modeling security-relevant aspects of the `gradio` PyPI package. + * See https://pypi.org/project/gradio/. + */ + +import python +import semmle.python.dataflow.new.RemoteFlowSources +import semmle.python.dataflow.new.TaintTracking +import semmle.python.ApiGraphs + +/** + * Provides models for the `gradio` PyPI package. + * See https://pypi.org/project/gradio/. + */ +module Gradio { + /** + * The event handlers, Interface and gradio.ChatInterface classes, which take untrusted data. + */ + private class GradioInput extends API::CallNode { + GradioInput() { + this = + API::moduleImport("gradio") + .getMember([ + "Button", "Textbox", "UploadButton", "Slider", "JSON", "HTML", "Markdown", "File", + "AnnotatedImage", "Audio", "BarPlot", "Chatbot", "Checkbox", "CheckboxGroup", + "ClearButton", "Code", "ColorPicker", "Dataframe", "Dataset", "DownloadButton", + "Dropdown", "DuplicateButton", "FileExplorer", "Gallery", "HighlightedText", + "Image", "ImageEditor", "Label", "LinePlot", "LoginButton", "LogoutButton", + "Model3D", "Number", "ParamViewer", "Plot", "Radio", "ScatterPlot", "SimpleImage", + "State", "Video" + ]) + .getReturn() + .getMember([ + "change", "input", "click", "submit", "edit", "clear", "play", "pause", "stop", + "end", "start_recording", "pause_recording", "stop_recording", "focus", "blur", + "upload", "release", "select", "stream", "like", "load", "key_up", + ]) + .getACall() + or + this = API::moduleImport("gradio").getMember(["Interface", "ChatInterface"]).getACall() + } + } + + /** + * The `inputs` parameters in Gradio event handlers, that are lists and are sources of untrusted data. + * This model allows tracking each element list back to source, f.ex. `gr.Textbox(...)`. + */ + private class GradioInputList extends RemoteFlowSource::Range { + GradioInputList() { + exists(GradioInput call | + // limit only to lists of parameters given to `inputs`. + ( + ( + call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode + or + call.getParameter(1).asSink().asCfgNode() instanceof ListNode + ) and + ( + this = call.getKeywordParameter("inputs").getASubscript().getAValueReachingSink() + or + this = call.getParameter(1).getASubscript().getAValueReachingSink() + ) + ) + ) + } + + override string getSourceType() { result = "Gradio untrusted input" } + } + + /** + * The `inputs` parameters in Gradio event handlers, that are not lists and are sources of untrusted data. + */ + private class GradioInputParameter extends RemoteFlowSource::Range { + GradioInputParameter() { + exists(GradioInput call | + this = call.getParameter(0, "fn").getParameter(_).asSource() and + // exclude lists of parameters given to `inputs` + not call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode and + not call.getParameter(1).asSink().asCfgNode() instanceof ListNode + ) + } + + override string getSourceType() { result = "Gradio untrusted input" } + } + + /** + * The `inputs` parameters in Gradio decorators to event handlers, that are sources of untrusted data. + */ + private class GradioInputDecorator extends RemoteFlowSource::Range { + GradioInputDecorator() { + exists(GradioInput call | + this = call.getReturn().getACall().getParameter(0).getParameter(_).asSource() + ) + } + + override string getSourceType() { result = "Gradio untrusted input" } + } + + /** + * Extra taint propagation for tracking `inputs` parameters in Gradio event handlers, that are lists. + */ + private class ListTaintStep extends TaintTracking::AdditionalTaintStep { + override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + exists(GradioInput node | + // handle cases where there are multiple arguments passed as a list to `inputs` + ( + ( + node.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode + or + node.getParameter(1).asSink().asCfgNode() instanceof ListNode + ) and + exists(int i | nodeTo = node.getParameter(0, "fn").getParameter(i).asSource() | + nodeFrom.asCfgNode() = + node.getKeywordParameter("inputs").asSink().asCfgNode().(ListNode).getElement(i) + or + nodeFrom.asCfgNode() = + node.getParameter(1).asSink().asCfgNode().(ListNode).getElement(i) + ) + ) + ) + } + } +} diff --git a/python/ql/src/change-notes/2024-04-05-gradio-models.md b/python/ql/src/change-notes/2024-04-05-gradio-models.md new file mode 100644 index 000000000000..261c25f98865 --- /dev/null +++ b/python/ql/src/change-notes/2024-04-05-gradio-models.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* Added models of `gradio` PyPI package. diff --git a/python/ql/test/library-tests/frameworks/gradio/source_test.expected b/python/ql/test/library-tests/frameworks/gradio/source_test.expected new file mode 100644 index 000000000000..8ec8033d086e --- /dev/null +++ b/python/ql/test/library-tests/frameworks/gradio/source_test.expected @@ -0,0 +1,2 @@ +testFailures +failures diff --git a/python/ql/test/library-tests/frameworks/gradio/source_test.py b/python/ql/test/library-tests/frameworks/gradio/source_test.py new file mode 100644 index 000000000000..3191f7c03aca --- /dev/null +++ b/python/ql/test/library-tests/frameworks/gradio/source_test.py @@ -0,0 +1,33 @@ +import gradio as gr + + +with gr.Blocks() as demo: + name = gr.Textbox(label="Name") + output = gr.Textbox(label="Output Box") + # static block - not used as a source + static_block = gr.HTML(""" + <div style='height: 100px; width: 800px; background-color: pink;'></div> + """) + greet_btn = gr.Button("Hello") + + # decorator + @greet_btn.click(inputs=name, outputs=output) + def greet(name): # $ source=name + return "Hello " + name + "!" + + # `click` event handler with keyword arguments + def greet1(name): # $ source=name + return "Hello " + name + "!" + + greet1_btn = gr.Button("Hello") + greet1_btn.click(fn=greet1, inputs=name, outputs=output, api_name="greet") + + # `click` event handler with positional arguments + def greet2(name): # $ source=name + return "Hello " + name + "!" + + greet2_btn = gr.Button("Hello") + greet2_btn.click(fn=greet2, inputs=name, outputs=output, api_name="greet") + + +demo.launch() diff --git a/python/ql/test/library-tests/frameworks/gradio/source_test.ql b/python/ql/test/library-tests/frameworks/gradio/source_test.ql new file mode 100644 index 000000000000..65015afe4dba --- /dev/null +++ b/python/ql/test/library-tests/frameworks/gradio/source_test.ql @@ -0,0 +1,20 @@ +import python +import semmle.python.dataflow.new.RemoteFlowSources +import TestUtilities.InlineExpectationsTest +private import semmle.python.dataflow.new.internal.PrintNode + +module SourceTest implements TestSig { + string getARelevantTag() { result = "source" } + + predicate hasActualResult(Location location, string element, string tag, string value) { + exists(location.getFile().getRelativePath()) and + exists(RemoteFlowSource rfs | + location = rfs.getLocation() and + element = rfs.toString() and + value = prettyNode(rfs) and + tag = "source" + ) + } +} + +import MakeTest<SourceTest> diff --git a/python/ql/test/library-tests/frameworks/gradio/taint_step_test.expected b/python/ql/test/library-tests/frameworks/gradio/taint_step_test.expected new file mode 100644 index 000000000000..2ebf825a19b5 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/gradio/taint_step_test.expected @@ -0,0 +1,26 @@ +edges +| taint_step_test.py:5:5:5:8 | ControlFlowNode for path | taint_step_test.py:19:43:19:46 | ControlFlowNode for path | provenance | | +| taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | taint_step_test.py:5:5:5:8 | ControlFlowNode for path | provenance | | +| taint_step_test.py:6:5:6:8 | ControlFlowNode for file | taint_step_test.py:19:48:19:51 | ControlFlowNode for file | provenance | | +| taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | taint_step_test.py:6:5:6:8 | ControlFlowNode for file | provenance | | +| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | | +| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | AdditionalTaintStep | +| taint_step_test.py:11:24:11:27 | ControlFlowNode for file | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | AdditionalTaintStep | +| taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | provenance | | +| taint_step_test.py:19:43:19:46 | ControlFlowNode for path | taint_step_test.py:11:18:11:21 | ControlFlowNode for path | provenance | AdditionalTaintStep | +| taint_step_test.py:19:48:19:51 | ControlFlowNode for file | taint_step_test.py:11:24:11:27 | ControlFlowNode for file | provenance | AdditionalTaintStep | +nodes +| taint_step_test.py:5:5:5:8 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| taint_step_test.py:6:5:6:8 | ControlFlowNode for file | semmle.label | ControlFlowNode for file | +| taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | +| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| taint_step_test.py:11:24:11:27 | ControlFlowNode for file | semmle.label | ControlFlowNode for file | +| taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath | +| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath | +| taint_step_test.py:19:43:19:46 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | +| taint_step_test.py:19:48:19:51 | ControlFlowNode for file | semmle.label | ControlFlowNode for file | +subpaths +#select +| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | This path depends on a $@. | taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | user-provided value | +| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | This path depends on a $@. | taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | user-provided value | diff --git a/python/ql/test/library-tests/frameworks/gradio/taint_step_test.py b/python/ql/test/library-tests/frameworks/gradio/taint_step_test.py new file mode 100644 index 000000000000..eb1614e99b03 --- /dev/null +++ b/python/ql/test/library-tests/frameworks/gradio/taint_step_test.py @@ -0,0 +1,22 @@ +import gradio as gr +import os + +with gr.Blocks() as demo: + path = gr.Textbox(label="Path") # $ source=gr.Textbox(..) + file = gr.Textbox(label="File") # $ source=gr.Textbox(..) + output = gr.Textbox(label="Output Box") + + + # path injection sink + def fileread(path, file): + filepath = os.path.join(path, file) + with open(filepath, "r") as f: + return f.read() + + + # `click` event handler with `inputs` containing a list + greet1_btn = gr.Button("Path for the file to display") + greet1_btn.click(fn=fileread, inputs=[path,file], outputs=output, api_name="fileread") + + +demo.launch() diff --git a/python/ql/test/library-tests/frameworks/gradio/taint_step_test.qlref b/python/ql/test/library-tests/frameworks/gradio/taint_step_test.qlref new file mode 100644 index 000000000000..d43482cc509e --- /dev/null +++ b/python/ql/test/library-tests/frameworks/gradio/taint_step_test.qlref @@ -0,0 +1 @@ +Security/CWE-022/PathInjection.ql