diff --git a/python/ql/lib/change-notes/2026-05-19-add-shared-ssa.md b/python/ql/lib/change-notes/2026-05-19-add-shared-ssa.md new file mode 100644 index 000000000000..7e845df3a369 --- /dev/null +++ b/python/ql/lib/change-notes/2026-05-19-add-shared-ssa.md @@ -0,0 +1,4 @@ +--- +category: minorAnalysis +--- +* A new SSA adapter has been added under `semmle.python.dataflow.new.internal.SsaImpl`, built on the shared `codeql.ssa.Ssa` library and the new shared CFG (`semmle.python.controlflow.internal.Cfg`). It is not yet used by the dataflow library or any production query; the legacy ESSA SSA in `semmle/python/essa/*` remains the default. The new SSA adapter is exposed for tests and for the upcoming dataflow migration. diff --git a/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll new file mode 100644 index 000000000000..d9609599a583 --- /dev/null +++ b/python/ql/lib/semmle/python/dataflow/new/internal/SsaImpl.qll @@ -0,0 +1,547 @@ +/** + * Provides the Python SSA implementation built on the new (shared) CFG. + * + * Mirrors the Java SSA adapter at + * `java/ql/lib/semmle/code/java/dataflow/internal/SsaImpl.qll`: + * an `InputSig` is defined in terms of positional `(BasicBlock, int)` + * variable references, and the shared + * `codeql.ssa.Ssa::Make` module is then + * instantiated. + * + * `SourceVariable` is the AST-level `Py::Variable`. Variable references + * are looked up via the CFG facade's `NameNode.defines`/`uses`/`deletes` + * predicates, which themselves are one-line bridges to AST-level + * `Name.defines`/`uses`/`deletes`. + * + * Implicit-entry definitions are inserted for: + * - non-local / global / builtin variables that are read in the scope + * but never assigned (no enclosing CFG node defines them), + * - captured variables (variables defined in an enclosing scope that + * are read inside the scope), and + * - parameters, but only if the corresponding parameter name is *not* + * itself a CFG node. With the C#-style parameter wiring already + * installed in `AstNodeImpl.qll`, parameter names *are* CFG nodes, + * so the regular `variableWrite` path handles them — no `i = -1` + * entry is needed for ordinary parameters. + */ +overlay[local?] +module; + +private import python as Py +private import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +private import semmle.python.controlflow.internal.Cfg as Cfg +private import codeql.ssa.Ssa as SsaImplCommon +private import codeql.controlflow.BasicBlock as BB + +/** + * Adapts the Python `Cfg` facade to the shared SSA library's `CfgSig`. + * All members are inherited from `Cfg::ControlFlowNode` and + * `Cfg::BasicBlock`. + */ +private module CfgForSsa implements BB::CfgSig { + class ControlFlowNode = CfgImpl::ControlFlowNode; + + class BasicBlock = CfgImpl::BasicBlock; + + class EntryBasicBlock = CfgImpl::Cfg::EntryBasicBlock; + + predicate dominatingEdge = CfgImpl::Cfg::dominatingEdge/2; +} + +/** + * A source variable for SSA, wrapping a Python AST `Variable`. + * + * We only track variables that are read at least once in their scope — + * tracking write-only variables would be unnecessary work — *except* + * for module-scope globals, where the "read" can be external (e.g. + * `import mymodule; mymodule.x`). Such globals are tracked + * unconditionally so that import-resolution can find their defining + * write. + */ +private newtype TSsaSourceVariable = + TPyVar(Py::Variable v) { + // Has a use somewhere — read-relevant for SSA. + exists(Cfg::NameNode n | n.uses(v)) + or + // Or has a deletion (treated as a write that destroys the value). + exists(Cfg::NameNode n | n.deletes(v)) + or + // Or is a module-scope global written in this module — must be + // tracked even if never read locally, because importers may read + // it as an attribute on the module object. + v.getScope() instanceof Py::Module and + exists(Cfg::NameNode n | n.defines(v)) + or + // Or is a parameter — parameters must always have a + // `ParameterDefinition` for dataflow argument-routing to work, + // even if the parameter is never read in its scope. Mirrors + // legacy ESSA's `ParameterDefinition` (which fired for every + // parameter binding regardless of liveness). + exists(Py::Parameter p | p.asName() = v.getAStore()) + } + +/** + * A source variable for SSA, wrapping a Python AST `Variable`. + */ +class SsaSourceVariable extends TSsaSourceVariable { + /** Gets the underlying Python AST variable. */ + Py::Variable getVariable() { this = TPyVar(result) } + + /** Gets the (textual) name of this variable. */ + string getName() { result = this.getVariable().getId() } + + /** Gets a textual representation of this source variable. */ + string toString() { result = this.getVariable().toString() } + + /** Gets the location of this source variable. */ + Py::Location getLocation() { result = this.getVariable().getScope().getLocation() } + + /** Gets the scope in which this variable lives. */ + Py::Scope getScope() { result = this.getVariable().getScope() } + + /** + * Gets a use of this variable as it appears in the source — a `NameNode` + * that loads or deletes the variable. Mirrors legacy + * `SsaSourceVariable.getASourceUse()`. + */ + Cfg::ControlFlowNode getASourceUse() { + exists(Cfg::NameNode n | result = n | + n.uses(this.getVariable()) or n.deletes(this.getVariable()) + ) + } + + /** + * Gets an implicit use of this variable. The new SSA does not have + * implicit-use refinements, but we keep this for API parity — every + * normal-exit of the variable's scope counts as a sink, ensuring + * variables stay live to scope exit for taint-tracking. + */ + Cfg::ControlFlowNode getAnImplicitUse() { + result.isNormalExit() and result.getScope() = this.getScope() + } + + /** + * Gets a use of this variable — either an explicit source use or an + * implicit use at scope exit. Mirrors legacy `SsaSourceVariable.getAUse()`. + */ + Cfg::ControlFlowNode getAUse() { + result = this.getASourceUse() or result = this.getAnImplicitUse() + } +} + +/** + * Holds if `v` is a non-local read in scope `s`, in the sense that `s` + * uses `v` but does not write it within `s`. This includes globals, + * builtins, and variables captured from an enclosing function scope. + * + * The `Py::Variable` `v` lives in some defining scope (the module for + * globals, an outer function for closures, etc.); the reading scope + * `s` is the scope where the use of `v` occurs. + */ +private predicate nonLocalReadIn(Py::Variable v, Py::Scope s) { + exists(Cfg::NameNode n | + n.uses(v) and + n.getScope() = s and + not exists(Cfg::NameNode def | def.defines(v) and def.getScope() = s) + ) and + // Match legacy ESSA: only create entry defs for variables that have + // at least one defining store somewhere — otherwise the entry def + // represents "nothing reaches here", which is the default anyway and + // introduces no useful flow. (Legacy's `ModuleVariable` required a + // store; this is the closure-aware generalisation.) + exists(Cfg::NameNode store | store.defines(v)) +} + +/** + * Holds if `bb` is the entry basic block of a scope where `v` should + * have an implicit entry definition. This covers: + * - non-local / global / builtin variables read in `s`, and + * - captured variables (defined in an enclosing scope but read in `s`). + * + * Each reading scope gets its own entry def, so a closure variable can + * have multiple entry defs across all functions/methods that read it. + * + * Parameters are *not* included: their bound `Name` is itself a CFG + * node (per the C#-style parameter wiring), so `variableWrite` fires at + * the parameter's natural CFG index. + */ +private predicate hasEntryDefIn(SsaSourceVariable v, CfgImpl::BasicBlock bb) { + exists(Py::Scope s | + nonLocalReadIn(v.getVariable(), s) and + bb = entryBlock(s) + ) +} + +/** + * Gets the entry basic block of scope `s`, where implicit entry + * definitions are placed (at synthetic index `-1`). + */ +private CfgImpl::BasicBlock entryBlock(Py::Scope s) { + exists(CfgImpl::ControlFlowNode entry | + entry instanceof CfgImpl::ControlFlow::EntryNode and + entry.getEnclosingCallable().asScope() = s and + result = entry.getBasicBlock() + ) +} + +/** + * The SSA `InputSig` for Python. References are positional + * `(BasicBlock, int)` pairs into the new CFG. + */ +private module SsaImplInput implements SsaImplCommon::InputSig { + class SourceVariable = SsaSourceVariable; + + predicate variableWrite(CfgImpl::BasicBlock bb, int i, SourceVariable v, boolean certain) { + // Explicit binding at a CFG node — includes assignments, + // parameter Names (wired in via the C# pattern), exception-handler + // `as`-bindings, import aliases, and match-pattern captures. + exists(Cfg::NameNode n | + bb.getNode(i) = n and + n.defines(v.getVariable()) and + certain = true + ) + or + // `del x` — removes the binding. Modelled as a certain write that + // makes any subsequent read invalid. + exists(Cfg::NameNode n | + bb.getNode(i) = n and + n.deletes(v.getVariable()) and + certain = true + ) + or + // Implicit entry definition for non-local / captured / global / + // builtin variables read in some scope. Each reading scope's entry + // block gets one such write, allowing closures: e.g. when `x` is a + // parameter of an outer function and read inside a nested + // function, both scopes get entry defs for `x`. + hasEntryDefIn(v, bb) and + i = -1 and + certain = true + or + // `from X import *` — possibly rebinds every name in the importing + // scope. Modelled as an uncertain write at the import-star's CFG + // position for every variable that lives in (or is referenced + // from) the same scope as the import-star. Mirrors legacy ESSA's + // `ImportStarRefinement` (see `essa/SsaDefinitions.qll`'s + // `import_star_refinement` predicate). The write is uncertain so + // that prior definitions of the variable remain available — the + // shared-SSA `SsaUncertainWrite` merges the new value with the + // immediately preceding definition. + exists(Cfg::ImportStarNode imp | + bb.getNode(i) = imp and + certain = false and + ( + v.getVariable().getScope() = imp.getScope() + or + // Variable is defined in some other scope but referenced in + // the same scope as the import-star (matches legacy clause 2: + // `other.uses(v) and def.getScope() = other.getScope()`). + exists(Cfg::NameNode other | + other.uses(v.getVariable()) and + imp.getScope() = other.getScope() + ) + ) + ) + } + + predicate variableRead(CfgImpl::BasicBlock bb, int i, SourceVariable v, boolean certain) { + // Explicit source use — a `Name` load or a `del x` of the variable. + exists(Cfg::NameNode n | + bb.getNode(i) = n and + n.uses(v.getVariable()) and + certain = true + ) + or + // Synthetic use at the normal exit of the variable's defining scope. + // This keeps every variable live to scope exit so that callers (e.g. + // `module_export` in ImportResolution.qll, or taint-tracking pass-through + // through unread locals) can ask "which definition reaches end of + // scope?". Mirrors legacy ESSA's `SsaSourceVariable.getAUse()` which + // included `getScope().getANormalExit()`. + exists(Cfg::ControlFlowNode exit | + exit.isNormalExit() and + exit.getScope() = v.getVariable().getScope() and + bb.getNode(i) = exit and + certain = true + ) + } +} + +/** + * The shared SSA instantiation for Python. + * + * Members: + * - `Definition` — the union of explicit, uncertain, and phi definitions + * - `WriteDefinition`, `UncertainWriteDefinition`, `PhiNode` + * - the standard SSA predicates (`getAUse`, `getAnUltimateDefinition`, ...). + */ +module Ssa = SsaImplCommon::Make; + +final class Definition = Ssa::Definition; + +final class WriteDefinition = Ssa::WriteDefinition; + +final class UncertainWriteDefinition = Ssa::UncertainWriteDefinition; + +final class PhiNode = Ssa::PhiNode; + +// =========================================================================== +// ESSA-shaped adapter layer +// +// The dataflow library (`python/ql/lib/semmle/python/dataflow/new/`) and +// related modules (`ApiGraphs.qll`, etc.) consume the legacy ESSA API +// (`EssaVariable`, `EssaDefinition`, `AssignmentDefinition`, +// `ScopeEntryDefinition`, `ParameterDefinition`, `WithDefinition`, +// `PhiFunction`, plus the `AdjacentUses` module). To migrate them off +// the legacy CFG, we expose the same API surface on top of the +// shared SSA built above. +// +// This adapter is intentionally narrow: it covers only the predicates +// that new dataflow consumes. The richer legacy ESSA — refinement +// nodes, attribute refinements, edge refinements — stays available +// via `semmle.python.essa.Essa` for points-to / legacy code. +// =========================================================================== +/** + * Gets the CFG node at which a write definition's binding takes place. + * + * For ordinary writes (assignment, deletion, parameter) this is the + * canonical CFG node of the bound Name. For implicit entry definitions + * (synthesised at position `-1` of a scope's entry BB) this is the + * scope's entry node. + */ +private Cfg::ControlFlowNode writeDefNode(Ssa::WriteDefinition def) { + exists(CfgImpl::BasicBlock bb, int i | def.definesAt(_, bb, i) | + i >= 0 and result = bb.getNode(i) + or + i = -1 and result = bb.getNode(0) + ) +} + +/** + * A write definition whose binding has a corresponding CFG node — i.e. + * everything that's not a phi node. Mirrors legacy ESSA's + * `EssaNodeDefinition`. + */ +class EssaNodeDefinition extends Ssa::WriteDefinition { + /** Gets the CFG node where this definition's binding takes place. */ + Cfg::ControlFlowNode getDefiningNode() { result = writeDefNode(this) } + + /** Gets the variable defined here (legacy name). */ + SsaSourceVariable getVariable() { result = this.getSourceVariable() } + + /** Gets the enclosing scope. */ + Py::Scope getScope() { + exists(Cfg::ControlFlowNode n | n = this.getDefiningNode() | result = n.getScope()) + } + + /** + * Holds if this definition defines source variable `v` at CFG node + * `defNode`. Flatter form of `getSourceVariable()` + + * `getDefiningNode()`, matching legacy ESSA's `definedBy`. + */ + predicate definedBy(SsaSourceVariable v, Cfg::ControlFlowNode defNode) { + v = this.getSourceVariable() and defNode = this.getDefiningNode() + } +} + +/** + * An assignment definition: any binding where the value being assigned + * is statically known via `Cfg::DefinitionNode.getValue()`. Includes + * plain assignments, walrus, annotated assignments, augmented + * assignments, import aliases (`import x` / `from m import x [as y]`), + * `with ... as x`, and for-target bindings (where `getValue()` returns + * the iter expression's CFG node). Excludes parameter bindings — + * those are modelled by `ParameterDefinition`. + */ +class AssignmentDefinition extends EssaNodeDefinition { + AssignmentDefinition() { + exists(Cfg::NameNode n | n = this.getDefiningNode() | + exists(n.(Cfg::DefinitionNode).getValue()) and + not n.(Cfg::ControlFlowNode).isParameter() + ) + } + + /** Gets the CFG node for the value being assigned, if statically known. */ + Cfg::ControlFlowNode getValue() { + result = this.getDefiningNode().(Cfg::DefinitionNode).getValue() + } +} + +/** + * A parameter definition — the binding of a parameter name in a + * function's scope. + */ +class ParameterDefinition extends EssaNodeDefinition { + ParameterDefinition() { this.getDefiningNode().isParameter() } + + /** Gets the AST `Parameter` (a `Py::Name` in param context). */ + Py::Name getParameter() { result = this.getDefiningNode().getNode() } +} + +/** + * A definition introduced by a `with ... as x:` clause. + */ +class WithDefinition extends EssaNodeDefinition { + WithDefinition() { + exists(Cfg::NameNode n, Py::With w | + n = this.getDefiningNode() and + w.getOptionalVars() = n.getNode() + ) + } +} + +/** + * An assignment where the LHS is a tuple/list and the RHS is unpacked: + * `a, b = (1, 2)` or `a, *rest = xs`. The SSA def lives at the inner + * `Name` CFG node, but for IterableUnpacking integration we expose + * the enclosing `StarredNode` as the `getDefiningNode()` for `*rest` + * patterns — mirroring legacy ESSA's `multi_assignment_definition`, + * which placed the def at the StarredNode CFG node. + */ +class MultiAssignmentDefinition extends EssaNodeDefinition { + MultiAssignmentDefinition() { + exists(Cfg::NameNode n | n = super.getDefiningNode() | + exists(Py::Assign a, Py::Expr lhs | + a.getATarget() = lhs and + (lhs instanceof Py::Tuple or lhs instanceof Py::List) and + lhs.getASubExpression+() = n.getNode() + ) + or + // For-loop with tuple/list target: `for a, b in xs:` — + // tuple-unpacking semantics applies to the for-target. + exists(Py::For f, Py::Expr lhs | + f.getTarget() = lhs and + (lhs instanceof Py::Tuple or lhs instanceof Py::List) and + lhs.getASubExpression+() = n.getNode() + ) + ) + } + + override Cfg::ControlFlowNode getDefiningNode() { + // Default: the underlying `Name` CFG node (where the SSA def lives). + not exists(Cfg::StarredNode s | + s.getNode().(Py::Starred).getValue() = super.getDefiningNode().getNode() + ) and + result = super.getDefiningNode() + or + // Exception: for `*rest`, expose the enclosing `Starred` CFG node + // so that `IterableUnpacking::iterableUnpackingStarredElementStoreStep` + // can attach the rest-list to it. + exists(Cfg::StarredNode s | + s.getNode().(Py::Starred).getValue() = super.getDefiningNode().getNode() + | + result = s + ) + } +} + +/** + * An implicit entry definition for a non-local / captured / global / + * builtin variable read in a scope but not defined there. + * + * Inherits from `EssaNodeDefinition` and exposes the scope's entry node + * as its defining node (matching legacy ESSA semantics). + */ +class ScopeEntryDefinition extends EssaNodeDefinition { + ScopeEntryDefinition() { + exists(CfgImpl::BasicBlock bb | + this.definesAt(_, bb, -1) and + bb instanceof CfgImpl::Cfg::EntryBasicBlock + ) + } + + /** Gets the enclosing scope (the scope whose entry block this def is in). */ + override Py::Scope getScope() { + exists(CfgImpl::BasicBlock bb | + this.definesAt(_, bb, -1) and + result = bb.getNode(0).(Cfg::ControlFlowNode).getScope() + ) + } +} + +/** A phi node (alias matching legacy naming). */ +class PhiFunction extends PhiNode { + /** + * Gets an input to this phi function (a definition that flows into + * the phi from one of its predecessor blocks). Mirrors legacy + * ESSA's `PhiFunction.getAnInput()`. + */ + Ssa::Definition getAnInput() { Ssa::phiHasInputFromBlock(this, result, _) } +} + +/** Base class for all ESSA definitions (legacy-shaped). */ +class EssaDefinition = Ssa::Definition; + +/** + * An adapter representing a single SSA-defined "variable" — wrapping + * one `Ssa::Definition`. Mirrors legacy `EssaVariable` API. + */ +class EssaVariable extends Ssa::Definition { + /** Gets the underlying SSA definition (legacy name). */ + Ssa::Definition getDefinition() { result = this } + + /** + * Gets a CFG node where this definition is used. Includes regular + * `Name` reads as well as the synthetic scope-exit "use" registered + * via `SsaImplInput::variableRead` — mirrors legacy ESSA's + * `EssaVariable.getAUse()` which inherited the synthetic exit-use + * from `SsaSourceVariable`. + */ + Cfg::ControlFlowNode getAUse() { + exists(CfgImpl::BasicBlock bb, int i | + Ssa::ssaDefReachesRead(this.getSourceVariable(), this, bb, i) and + bb.getNode(i) = result + ) + } + + /** Gets the (textual) name of the underlying variable. */ + string getName() { result = this.getSourceVariable().getVariable().getId() } + + /** Gets the scope in which this variable lives. */ + Py::Scope getScope() { result = this.getSourceVariable().getVariable().getScope() } + + /** Gets an ultimate non-phi ancestor of this definition. */ + EssaVariable getAnUltimateDefinition() { + if this instanceof PhiNode + then + exists(Ssa::Definition input | + Ssa::phiHasInputFromBlock(this, input, _) and + result = input.(EssaVariable).getAnUltimateDefinition() + ) + else result = this + } +} + +/** + * Adjacent use-use and def-use relations exposed by the shared SSA + * library. Provides the same interface as legacy + * `semmle.python.essa.SsaCompute::AdjacentUses`. + */ +module AdjacentUses { + /** Holds if `nodeFrom` and `nodeTo` are adjacent uses of the same SSA variable. */ + predicate adjacentUseUse(Cfg::NameNode nodeFrom, Cfg::NameNode nodeTo) { + exists(SsaSourceVariable v, CfgImpl::BasicBlock bb1, int i1, CfgImpl::BasicBlock bb2, int i2 | + Ssa::adjacentUseUse(bb1, i1, bb2, i2, v, _) and + nodeFrom = bb1.getNode(i1) and + nodeTo = bb2.getNode(i2) + ) + } + + /** Holds if `use` is a first use of definition `def`. */ + predicate firstUse(Ssa::Definition def, Cfg::NameNode use) { + exists(CfgImpl::BasicBlock bb, int i | + Ssa::firstUse(def, bb, i, _) and + use = bb.getNode(i) + ) + } + + /** + * Holds if `use` is any reachable use of definition `def`. Combines + * `firstUse` with transitive use-use adjacency. + */ + predicate useOfDef(Ssa::Definition def, Cfg::NameNode use) { + firstUse(def, use) + or + exists(Cfg::NameNode mid | useOfDef(def, mid) and adjacentUseUse(mid, use)) + } +} diff --git a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected new file mode 100644 index 000000000000..3b5cd963eeab --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.expected @@ -0,0 +1,6 @@ +| def-only-old | $:0:0 | +| def-only-old | __name__:0:0 | +| def-only-old | __package__:0:0 | +| def-only-old | e:37:1 | +| def-only-old | e:40:25 | +| def-only-old | x:20:1 | diff --git a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.ql b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.ql new file mode 100644 index 000000000000..590f5ebed47a --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/CmpTest.ql @@ -0,0 +1,59 @@ +/** + * Compares the new-CFG SSA against the legacy ESSA on the same Python + * sources. Reports definitions present in one implementation but not + * the other, identified by variable name + source position. + * + * The `.expected` file records the current diff as a snapshot: as the + * new SSA matures (closing captured-variable gap, exception bindings, + * etc.) and tracks more variables, the snapshot should monotonically + * shrink. + * + * Known categories of `def-only-old` mismatches: + * - Function / class / global definitions with no in-scope read + * (intentional: SSA is liveness-pruned, write-only variables are + * not tracked). + * - Captured / closure variables (gap: new SSA does not yet model + * closure captures). + * - Module variables `__name__`, `__package__`, `$` (legacy ESSA + * adds implicit bindings the new SSA does not). + * - Exception-handler `as` bindings (depend on raise modelling). + * + * `def-only-new` mismatches would indicate the new SSA produces spurious + * definitions; currently none are expected. + */ + +import python +import semmle.python.dataflow.new.internal.SsaImpl as NewSsa +import semmle.python.controlflow.internal.Cfg as Cfg +import semmle.python.essa.Essa + +string newDefSig(NewSsa::EssaNodeDefinition def) { + exists(Cfg::ControlFlowNode n | n = def.getDefiningNode() | + result = + def.getVariable().getVariable().getId() + ":" + n.getLocation().getStartLine() + ":" + + n.getLocation().getStartColumn() + ) +} + +string legacyDefSig(EssaNodeDefinition def) { + exists(ControlFlowNode n | n = def.getDefiningNode() | + result = + def.getSourceVariable().getName() + ":" + n.getLocation().getStartLine() + ":" + + n.getLocation().getStartColumn() + ) +} + +from string kind, string sig +where + kind = "def-only-new" and + exists(NewSsa::EssaNodeDefinition def | + sig = newDefSig(def) and + not exists(EssaNodeDefinition legacyDef | sig = legacyDefSig(legacyDef)) + ) + or + kind = "def-only-old" and + exists(EssaNodeDefinition legacyDef | + sig = legacyDefSig(legacyDef) and + not exists(NewSsa::EssaNodeDefinition def | sig = newDefSig(def)) + ) +select kind, sig diff --git a/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/test.py b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/test.py new file mode 100644 index 000000000000..8b061109bf2c --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa-vs-legacy/test.py @@ -0,0 +1,53 @@ +def simple_assign(): + x = 1 + return x + + +def reassignment(): + x = 1 + x = 2 + return x + + +def if_else_branch(cond): + if cond: + x = 1 + else: + x = 2 + return x + + +def loop(xs): + total = 0 + for x in xs: + total = total + x + return total + + +def parameter(a, b=2, *args, **kwargs): + return a + b + sum(args) + + +def closure(x): + def inner(): + return x + return inner + + +def exception_binding(): + try: + compute() + except Exception as e: + return e + + +def with_binding(): + with open("file") as f: + return f.read() + + +GLOBAL = 1 + + +def read_global(): + return GLOBAL diff --git a/python/ql/test/library-tests/dataflow-new-ssa/SsaTest.expected b/python/ql/test/library-tests/dataflow-new-ssa/SsaTest.expected new file mode 100644 index 000000000000..d87d63fc11f1 --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa/SsaTest.expected @@ -0,0 +1,6 @@ +| test.py:14:5:14:15 | basic_param | Unexpected result: def=basic_param | +| test.py:18:5:18:16 | basic_assign | Unexpected result: def=basic_assign | +| test.py:23:5:23:16 | reassignment | Unexpected result: def=reassignment | +| test.py:29:5:29:15 | if_else_phi | Unexpected result: def=if_else_phi | +| test.py:37:5:37:14 | use_global | Unexpected result: def=use_global | +| test.py:38:28:38:49 | Comment # $ use=some_undefined | Missing result: use=some_undefined | diff --git a/python/ql/test/library-tests/dataflow-new-ssa/SsaTest.ql b/python/ql/test/library-tests/dataflow-new-ssa/SsaTest.ql new file mode 100644 index 000000000000..0bebf4a637d0 --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa/SsaTest.ql @@ -0,0 +1,59 @@ +/** + * Inline-expectations test for the new-CFG SSA adapter + * (`semmle.python.dataflow.new.internal.SsaImpl`). + * + * Tags: + * - `def=`: there is an SSA write definition of `` at this + * line (parameter init, plain assignment, augmented assignment, + * exception-handler binding, deletion, etc.). + * - `use=`: `` is used at this line, and some SSA definition + * of `` reaches the read. + * - `phi=`: there is an SSA phi definition of `` whose BB + * starts on this line. + */ + +import python +import semmle.python.dataflow.new.internal.SsaImpl as SsaImpl +import semmle.python.controlflow.internal.AstNodeImpl as CfgImpl +import semmle.python.controlflow.internal.Cfg as Cfg +import utils.test.InlineExpectationsTest + +module SsaTest implements TestSig { + string getARelevantTag() { result = ["def", "use", "phi"] } + + predicate hasActualResult(Location location, string element, string tag, string value) { + // A `def=` fires when an SSA WriteDefinition is at a CFG node + // on the given line. + exists(SsaImpl::Ssa::WriteDefinition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n | + def.definesAt(_, bb, i) and + bb.getNode(i) = n and + tag = "def" and + location = n.getLocation() and + element = n.toString() and + value = n.getId() + ) + or + // A `use=` fires when an SSA Definition reaches a read at this + // CFG node. + exists(SsaImpl::Ssa::Definition def, CfgImpl::BasicBlock bb, int i, Cfg::NameNode n | + SsaImpl::Ssa::ssaDefReachesRead(_, def, bb, i) and + bb.getNode(i) = n and + tag = "use" and + location = n.getLocation() and + element = n.toString() and + value = n.getId() + ) + or + // A `phi=` fires when there is a phi node whose BB's first + // CFG node is on the given line. + exists(SsaImpl::Ssa::PhiNode phi, CfgImpl::BasicBlock bb | + phi.definesAt(_, bb, _) and + tag = "phi" and + location = bb.getNode(0).getLocation() and + element = bb.toString() and + value = phi.getSourceVariable().(SsaImpl::SsaSourceVariable).getVariable().getId() + ) + } +} + +import MakeTest diff --git a/python/ql/test/library-tests/dataflow-new-ssa/test.py b/python/ql/test/library-tests/dataflow-new-ssa/test.py new file mode 100644 index 000000000000..c6cdc22c3b36 --- /dev/null +++ b/python/ql/test/library-tests/dataflow-new-ssa/test.py @@ -0,0 +1,40 @@ +# Basic SSA tests for the new-CFG SSA adapter. +# +# The shared SSA implementation prunes its construction by liveness: +# definitions of variables that are not read are never materialised. +# This is by design — write-only variables would only bloat the SSA +# graph. Tests therefore must always include a read of each variable +# being verified. +# +# Annotations: +# def=: there is an SSA write definition of at this line +# use=: is used here and the read resolves to some def + + +def basic_param(x): # $ def=x + return x # $ use=x + + +def basic_assign(): + y = 1 # $ def=y + return y # $ use=y + + +def reassignment(): + x = 1 + x = 2 # $ def=x + return x # $ use=x + + +def if_else_phi(cond): # $ def=cond + if cond: # $ use=cond phi=x + x = 1 # $ def=x + else: + x = 2 # $ def=x + return x # $ use=x + + +def use_global(): + return some_undefined # $ use=some_undefined + +