-
Notifications
You must be signed in to change notification settings - Fork 1.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Python: Add Gradio models #16135
Merged
Merged
Python: Add Gradio models #16135
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
bed0d56
Add Gradio models
sylwia-budzynska ca7789d
Fix QLdoc
sylwia-budzynska 84d6956
Fix decorator QLdoc
sylwia-budzynska 1129925
Add change note
sylwia-budzynska 5d94658
Add tests
sylwia-budzynska eaba798
Apply suggestions from code review
sylwia-budzynska 8bb4193
Put GradioInterface models into GradioInput
sylwia-budzynska 944f884
Change getASuccessor() to getASubscript()
sylwia-budzynska 52ceb7f
Apply suggestions from code review
sylwia-budzynska d6acea1
Fix tests
sylwia-budzynska f72afdc
Merge branch 'main' into gradio-model
sylwia-budzynska 34c4479
Update test results
sylwia-budzynska 72493a6
Change classes to private
sylwia-budzynska File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/** | ||
* Provides classes modeling security-relevant aspects of the `gradio` PyPI package. | ||
* See https://pypi.org/project/gradio/. | ||
*/ | ||
|
||
import python | ||
import semmle.python.dataflow.new.RemoteFlowSources | ||
import semmle.python.dataflow.new.TaintTracking | ||
import semmle.python.ApiGraphs | ||
|
||
/** | ||
* Provides models for the `gradio` PyPI package. | ||
* See https://pypi.org/project/gradio/. | ||
*/ | ||
module Gradio { | ||
/** | ||
* The event handlers, Interface and gradio.ChatInterface classes, which take untrusted data. | ||
*/ | ||
private class GradioInput extends API::CallNode { | ||
GradioInput() { | ||
this = | ||
API::moduleImport("gradio") | ||
.getMember([ | ||
"Button", "Textbox", "UploadButton", "Slider", "JSON", "HTML", "Markdown", "File", | ||
"AnnotatedImage", "Audio", "BarPlot", "Chatbot", "Checkbox", "CheckboxGroup", | ||
"ClearButton", "Code", "ColorPicker", "Dataframe", "Dataset", "DownloadButton", | ||
"Dropdown", "DuplicateButton", "FileExplorer", "Gallery", "HighlightedText", | ||
"Image", "ImageEditor", "Label", "LinePlot", "LoginButton", "LogoutButton", | ||
"Model3D", "Number", "ParamViewer", "Plot", "Radio", "ScatterPlot", "SimpleImage", | ||
"State", "Video" | ||
]) | ||
.getReturn() | ||
.getMember([ | ||
"change", "input", "click", "submit", "edit", "clear", "play", "pause", "stop", | ||
"end", "start_recording", "pause_recording", "stop_recording", "focus", "blur", | ||
"upload", "release", "select", "stream", "like", "load", "key_up", | ||
]) | ||
.getACall() | ||
or | ||
this = API::moduleImport("gradio").getMember(["Interface", "ChatInterface"]).getACall() | ||
} | ||
} | ||
|
||
/** | ||
* The `inputs` parameters in Gradio event handlers, that are lists and are sources of untrusted data. | ||
* This model allows tracking each element list back to source, f.ex. `gr.Textbox(...)`. | ||
*/ | ||
private class GradioInputList extends RemoteFlowSource::Range { | ||
GradioInputList() { | ||
exists(GradioInput call | | ||
// limit only to lists of parameters given to `inputs`. | ||
( | ||
( | ||
call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode | ||
or | ||
call.getParameter(1).asSink().asCfgNode() instanceof ListNode | ||
) and | ||
( | ||
this = call.getKeywordParameter("inputs").getASubscript().getAValueReachingSink() | ||
or | ||
this = call.getParameter(1).getASubscript().getAValueReachingSink() | ||
) | ||
) | ||
) | ||
} | ||
|
||
override string getSourceType() { result = "Gradio untrusted input" } | ||
} | ||
|
||
/** | ||
* The `inputs` parameters in Gradio event handlers, that are not lists and are sources of untrusted data. | ||
*/ | ||
private class GradioInputParameter extends RemoteFlowSource::Range { | ||
GradioInputParameter() { | ||
exists(GradioInput call | | ||
this = call.getParameter(0, "fn").getParameter(_).asSource() and | ||
// exclude lists of parameters given to `inputs` | ||
not call.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode and | ||
not call.getParameter(1).asSink().asCfgNode() instanceof ListNode | ||
) | ||
} | ||
|
||
override string getSourceType() { result = "Gradio untrusted input" } | ||
} | ||
|
||
/** | ||
* The `inputs` parameters in Gradio decorators to event handlers, that are sources of untrusted data. | ||
*/ | ||
private class GradioInputDecorator extends RemoteFlowSource::Range { | ||
GradioInputDecorator() { | ||
exists(GradioInput call | | ||
this = call.getReturn().getACall().getParameter(0).getParameter(_).asSource() | ||
) | ||
} | ||
|
||
override string getSourceType() { result = "Gradio untrusted input" } | ||
} | ||
|
||
/** | ||
* Extra taint propagation for tracking `inputs` parameters in Gradio event handlers, that are lists. | ||
*/ | ||
private class ListTaintStep extends TaintTracking::AdditionalTaintStep { | ||
override predicate step(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { | ||
exists(GradioInput node | | ||
// handle cases where there are multiple arguments passed as a list to `inputs` | ||
( | ||
( | ||
node.getKeywordParameter("inputs").asSink().asCfgNode() instanceof ListNode | ||
or | ||
node.getParameter(1).asSink().asCfgNode() instanceof ListNode | ||
) and | ||
exists(int i | nodeTo = node.getParameter(0, "fn").getParameter(i).asSource() | | ||
nodeFrom.asCfgNode() = | ||
node.getKeywordParameter("inputs").asSink().asCfgNode().(ListNode).getElement(i) | ||
or | ||
nodeFrom.asCfgNode() = | ||
node.getParameter(1).asSink().asCfgNode().(ListNode).getElement(i) | ||
) | ||
) | ||
) | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
--- | ||
category: minorAnalysis | ||
--- | ||
* Added models of `gradio` PyPI package. |
2 changes: 2 additions & 0 deletions
2
python/ql/test/library-tests/frameworks/gradio/source_test.expected
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
testFailures | ||
failures |
33 changes: 33 additions & 0 deletions
33
python/ql/test/library-tests/frameworks/gradio/source_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import gradio as gr | ||
|
||
|
||
with gr.Blocks() as demo: | ||
name = gr.Textbox(label="Name") | ||
output = gr.Textbox(label="Output Box") | ||
# static block - not used as a source | ||
static_block = gr.HTML(""" | ||
<div style='height: 100px; width: 800px; background-color: pink;'></div> | ||
""") | ||
greet_btn = gr.Button("Hello") | ||
|
||
# decorator | ||
@greet_btn.click(inputs=name, outputs=output) | ||
def greet(name): # $ source=name | ||
return "Hello " + name + "!" | ||
|
||
# `click` event handler with keyword arguments | ||
def greet1(name): # $ source=name | ||
return "Hello " + name + "!" | ||
|
||
greet1_btn = gr.Button("Hello") | ||
greet1_btn.click(fn=greet1, inputs=name, outputs=output, api_name="greet") | ||
|
||
# `click` event handler with positional arguments | ||
def greet2(name): # $ source=name | ||
return "Hello " + name + "!" | ||
|
||
greet2_btn = gr.Button("Hello") | ||
greet2_btn.click(fn=greet2, inputs=name, outputs=output, api_name="greet") | ||
|
||
|
||
demo.launch() |
20 changes: 20 additions & 0 deletions
20
python/ql/test/library-tests/frameworks/gradio/source_test.ql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
import python | ||
import semmle.python.dataflow.new.RemoteFlowSources | ||
import TestUtilities.InlineExpectationsTest | ||
private import semmle.python.dataflow.new.internal.PrintNode | ||
|
||
module SourceTest implements TestSig { | ||
string getARelevantTag() { result = "source" } | ||
|
||
predicate hasActualResult(Location location, string element, string tag, string value) { | ||
exists(location.getFile().getRelativePath()) and | ||
exists(RemoteFlowSource rfs | | ||
location = rfs.getLocation() and | ||
element = rfs.toString() and | ||
value = prettyNode(rfs) and | ||
tag = "source" | ||
) | ||
} | ||
} | ||
|
||
import MakeTest<SourceTest> |
26 changes: 26 additions & 0 deletions
26
python/ql/test/library-tests/frameworks/gradio/taint_step_test.expected
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,26 @@ | ||
edges | ||
| taint_step_test.py:5:5:5:8 | ControlFlowNode for path | taint_step_test.py:19:43:19:46 | ControlFlowNode for path | provenance | | | ||
| taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | taint_step_test.py:5:5:5:8 | ControlFlowNode for path | provenance | | | ||
| taint_step_test.py:6:5:6:8 | ControlFlowNode for file | taint_step_test.py:19:48:19:51 | ControlFlowNode for file | provenance | | | ||
| taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | taint_step_test.py:6:5:6:8 | ControlFlowNode for file | provenance | | | ||
| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | | | ||
| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | AdditionalTaintStep | | ||
| taint_step_test.py:11:24:11:27 | ControlFlowNode for file | taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | provenance | AdditionalTaintStep | | ||
| taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | provenance | | | ||
| taint_step_test.py:19:43:19:46 | ControlFlowNode for path | taint_step_test.py:11:18:11:21 | ControlFlowNode for path | provenance | AdditionalTaintStep | | ||
| taint_step_test.py:19:48:19:51 | ControlFlowNode for file | taint_step_test.py:11:24:11:27 | ControlFlowNode for file | provenance | AdditionalTaintStep | | ||
nodes | ||
| taint_step_test.py:5:5:5:8 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | | ||
| taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | ||
| taint_step_test.py:6:5:6:8 | ControlFlowNode for file | semmle.label | ControlFlowNode for file | | ||
| taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() | | ||
| taint_step_test.py:11:18:11:21 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | | ||
| taint_step_test.py:11:24:11:27 | ControlFlowNode for file | semmle.label | ControlFlowNode for file | | ||
| taint_step_test.py:12:9:12:16 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath | | ||
| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | semmle.label | ControlFlowNode for filepath | | ||
| taint_step_test.py:19:43:19:46 | ControlFlowNode for path | semmle.label | ControlFlowNode for path | | ||
| taint_step_test.py:19:48:19:51 | ControlFlowNode for file | semmle.label | ControlFlowNode for file | | ||
subpaths | ||
#select | ||
| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | This path depends on a $@. | taint_step_test.py:5:12:5:35 | ControlFlowNode for Attribute() | user-provided value | | ||
| taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | taint_step_test.py:13:19:13:26 | ControlFlowNode for filepath | This path depends on a $@. | taint_step_test.py:6:12:6:35 | ControlFlowNode for Attribute() | user-provided value | |
22 changes: 22 additions & 0 deletions
22
python/ql/test/library-tests/frameworks/gradio/taint_step_test.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
import gradio as gr | ||
import os | ||
|
||
with gr.Blocks() as demo: | ||
path = gr.Textbox(label="Path") # $ source=gr.Textbox(..) | ||
file = gr.Textbox(label="File") # $ source=gr.Textbox(..) | ||
output = gr.Textbox(label="Output Box") | ||
|
||
|
||
# path injection sink | ||
def fileread(path, file): | ||
filepath = os.path.join(path, file) | ||
with open(filepath, "r") as f: | ||
return f.read() | ||
|
||
|
||
# `click` event handler with `inputs` containing a list | ||
greet1_btn = gr.Button("Path for the file to display") | ||
greet1_btn.click(fn=fileread, inputs=[path,file], outputs=output, api_name="fileread") | ||
|
||
|
||
demo.launch() |
1 change: 1 addition & 0 deletions
1
python/ql/test/library-tests/frameworks/gradio/taint_step_test.qlref
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Security/CWE-022/PathInjection.ql |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Are all these vulnerable? I quickly checked a few of them and it looks like:
HighlightedText
only allows the user to specify substrings of a given textColorPicker
only passes on hex stringsAnnotatedImage
does not accept user input at allWe should probably only include actually vulnerable inputs here in order to avoid the query getting noisy.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It’s the event listeners that take untrusted user input, so, for example,
gradio.AnnotatedImage.select(fn, inputs, outputs)
(see docs) and that's what we model here.To clarify - with the above models, we look for any event listeners of the listed classes. For example,
gr.Button
has only one,click()
(see this example, which asks for a user's name and displays it). Side note that if an event listener does not exist for a given class, CodeQL doesn't (can't) match it (for example there is nogr.Button.select()
). I found this way of modeling to be the most succinct and it doesn't slow performance, but I'm open to feedback around it.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it is fine to model situations that will never match, I was more worried about false positives. But on reflection, I think this is fine to start; we can refine it if we actually encounter any problems..
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've already found a few vulnerabilities using these models, including in stable-diffusion-webui (120k stars):
https://securitylab.github.com/advisories/GHSL-2024-010_stable-diffusion-webui/
https://securitylab.github.com/advisories/GHSL-2024-019_GHSL-2024-024_kohya_ss/
To be fair, I tested the
AnnotatedImage.select()
event listener (and several other event listeners on the list), and it does take user input, but it's not very popular to use, so I haven't actually seen vulnerabilities around it. Most of the vulnerabilities happen withButton.click()
(including the 7 vulns I linked to above). I think there shouldn't be many false positives here, but I'll be happy to help if anything needs refining.