mirror of
https://github.com/autc04/Retro68.git
synced 2024-06-25 21:29:51 +00:00
1166 lines
30 KiB
C++
1166 lines
30 KiB
C++
/* An experimental state machine, for tracking "taint": unsanitized uses
|
|
of data potentially under an attacker's control.
|
|
|
|
Copyright (C) 2019-2022 Free Software Foundation, Inc.
|
|
Contributed by David Malcolm <dmalcolm@redhat.com>.
|
|
|
|
This file is part of GCC.
|
|
|
|
GCC is free software; you can redistribute it and/or modify it
|
|
under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation; either version 3, or (at your option)
|
|
any later version.
|
|
|
|
GCC is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with GCC; see the file COPYING3. If not see
|
|
<http://www.gnu.org/licenses/>. */
|
|
|
|
#include "config.h"
|
|
#include "system.h"
|
|
#include "coretypes.h"
|
|
#include "tree.h"
|
|
#include "function.h"
|
|
#include "basic-block.h"
|
|
#include "gimple.h"
|
|
#include "options.h"
|
|
#include "diagnostic-path.h"
|
|
#include "diagnostic-metadata.h"
|
|
#include "function.h"
|
|
#include "json.h"
|
|
#include "analyzer/analyzer.h"
|
|
#include "analyzer/analyzer-logging.h"
|
|
#include "gimple-iterator.h"
|
|
#include "tristate.h"
|
|
#include "selftest.h"
|
|
#include "ordered-hash-map.h"
|
|
#include "cgraph.h"
|
|
#include "cfg.h"
|
|
#include "digraph.h"
|
|
#include "stringpool.h"
|
|
#include "attribs.h"
|
|
#include "analyzer/supergraph.h"
|
|
#include "analyzer/call-string.h"
|
|
#include "analyzer/program-point.h"
|
|
#include "analyzer/store.h"
|
|
#include "analyzer/region-model.h"
|
|
#include "analyzer/sm.h"
|
|
#include "analyzer/program-state.h"
|
|
#include "analyzer/pending-diagnostic.h"
|
|
|
|
#if ENABLE_ANALYZER
|
|
|
|
namespace ana {
|
|
|
|
namespace {
|
|
|
|
/* An enum for describing tainted values. */
|
|
|
|
enum bounds
|
|
{
|
|
/* This tainted value has no upper or lower bound. */
|
|
BOUNDS_NONE,
|
|
|
|
/* This tainted value has an upper bound but not lower bound. */
|
|
BOUNDS_UPPER,
|
|
|
|
/* This tainted value has a lower bound but no upper bound. */
|
|
BOUNDS_LOWER
|
|
};
|
|
|
|
/* An experimental state machine, for tracking "taint": unsanitized uses
|
|
of data potentially under an attacker's control. */
|
|
|
|
class taint_state_machine : public state_machine
|
|
{
|
|
public:
|
|
taint_state_machine (logger *logger);
|
|
|
|
bool inherited_state_p () const FINAL OVERRIDE { return true; }
|
|
|
|
state_t alt_get_inherited_state (const sm_state_map &map,
|
|
const svalue *sval,
|
|
const extrinsic_state &ext_state)
|
|
const FINAL OVERRIDE;
|
|
|
|
bool on_stmt (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gimple *stmt) const FINAL OVERRIDE;
|
|
|
|
void on_condition (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gimple *stmt,
|
|
const svalue *lhs,
|
|
enum tree_code op,
|
|
const svalue *rhs) const FINAL OVERRIDE;
|
|
|
|
bool can_purge_p (state_t s) const FINAL OVERRIDE;
|
|
|
|
bool get_taint (state_t s, tree type, enum bounds *out) const;
|
|
|
|
state_t combine_states (state_t s0, state_t s1) const;
|
|
|
|
private:
|
|
void check_for_tainted_size_arg (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gcall *call,
|
|
tree callee_fndecl) const;
|
|
void check_for_tainted_divisor (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gassign *assign) const;
|
|
|
|
public:
|
|
/* State for a "tainted" value: unsanitized data potentially under an
|
|
attacker's control. */
|
|
state_t m_tainted;
|
|
|
|
/* State for a "tainted" value that has a lower bound. */
|
|
state_t m_has_lb;
|
|
|
|
/* State for a "tainted" value that has an upper bound. */
|
|
state_t m_has_ub;
|
|
|
|
/* Stop state, for a value we don't want to track any more. */
|
|
state_t m_stop;
|
|
};
|
|
|
|
/* Class for diagnostics relating to taint_state_machine. */
|
|
|
|
class taint_diagnostic : public pending_diagnostic
|
|
{
|
|
public:
|
|
taint_diagnostic (const taint_state_machine &sm, tree arg,
|
|
enum bounds has_bounds)
|
|
: m_sm (sm), m_arg (arg), m_has_bounds (has_bounds)
|
|
{}
|
|
|
|
bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
|
|
{
|
|
const taint_diagnostic &other = (const taint_diagnostic &)base_other;
|
|
return (same_tree_p (m_arg, other.m_arg)
|
|
&& m_has_bounds == other.m_has_bounds);
|
|
}
|
|
|
|
label_text describe_state_change (const evdesc::state_change &change)
|
|
FINAL OVERRIDE
|
|
{
|
|
if (change.m_new_state == m_sm.m_tainted)
|
|
{
|
|
if (change.m_origin)
|
|
return change.formatted_print ("%qE has an unchecked value here"
|
|
" (from %qE)",
|
|
change.m_expr, change.m_origin);
|
|
else
|
|
return change.formatted_print ("%qE gets an unchecked value here",
|
|
change.m_expr);
|
|
}
|
|
else if (change.m_new_state == m_sm.m_has_lb)
|
|
return change.formatted_print ("%qE has its lower bound checked here",
|
|
change.m_expr);
|
|
else if (change.m_new_state == m_sm.m_has_ub)
|
|
return change.formatted_print ("%qE has its upper bound checked here",
|
|
change.m_expr);
|
|
return label_text ();
|
|
}
|
|
protected:
|
|
const taint_state_machine &m_sm;
|
|
tree m_arg;
|
|
enum bounds m_has_bounds;
|
|
};
|
|
|
|
/* Concrete taint_diagnostic subclass for reporting attacker-controlled
|
|
array index. */
|
|
|
|
class tainted_array_index : public taint_diagnostic
|
|
{
|
|
public:
|
|
tainted_array_index (const taint_state_machine &sm, tree arg,
|
|
enum bounds has_bounds)
|
|
: taint_diagnostic (sm, arg, has_bounds)
|
|
{}
|
|
|
|
const char *get_kind () const FINAL OVERRIDE { return "tainted_array_index"; }
|
|
|
|
int get_controlling_option () const FINAL OVERRIDE
|
|
{
|
|
return OPT_Wanalyzer_tainted_array_index;
|
|
}
|
|
|
|
bool emit (rich_location *rich_loc) FINAL OVERRIDE
|
|
{
|
|
diagnostic_metadata m;
|
|
/* CWE-129: "Improper Validation of Array Index". */
|
|
m.add_cwe (129);
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE"
|
|
" in array lookup without bounds checking",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_UPPER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE"
|
|
" in array lookup without checking for negative",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_LOWER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE"
|
|
" in array lookup without upper-bounds checking",
|
|
m_arg);
|
|
break;
|
|
}
|
|
}
|
|
|
|
label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
|
|
{
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value %qE in array lookup"
|
|
" without bounds checking",
|
|
m_arg);
|
|
case BOUNDS_UPPER:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value %qE"
|
|
" in array lookup without checking for negative",
|
|
m_arg);
|
|
case BOUNDS_LOWER:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value %qE"
|
|
" in array lookup without upper-bounds checking",
|
|
m_arg);
|
|
}
|
|
}
|
|
};
|
|
|
|
/* Concrete taint_diagnostic subclass for reporting attacker-controlled
|
|
pointer offset. */
|
|
|
|
class tainted_offset : public taint_diagnostic
|
|
{
|
|
public:
|
|
tainted_offset (const taint_state_machine &sm, tree arg,
|
|
enum bounds has_bounds)
|
|
: taint_diagnostic (sm, arg, has_bounds)
|
|
{}
|
|
|
|
const char *get_kind () const FINAL OVERRIDE { return "tainted_offset"; }
|
|
|
|
int get_controlling_option () const FINAL OVERRIDE
|
|
{
|
|
return OPT_Wanalyzer_tainted_offset;
|
|
}
|
|
|
|
bool emit (rich_location *rich_loc) FINAL OVERRIDE
|
|
{
|
|
diagnostic_metadata m;
|
|
/* CWE-823: "Use of Out-of-range Pointer Offset". */
|
|
m.add_cwe (823);
|
|
if (m_arg)
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as offset"
|
|
" without bounds checking",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_UPPER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as offset"
|
|
" without lower-bounds checking",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_LOWER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as offset"
|
|
" without upper-bounds checking",
|
|
m_arg);
|
|
break;
|
|
}
|
|
else
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value as offset"
|
|
" without bounds checking");
|
|
break;
|
|
case BOUNDS_UPPER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value as offset"
|
|
" without lower-bounds checking");
|
|
break;
|
|
case BOUNDS_LOWER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value as offset"
|
|
" without upper-bounds checking");
|
|
break;
|
|
}
|
|
}
|
|
|
|
label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
|
|
{
|
|
if (m_arg)
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return ev.formatted_print ("use of attacker-controlled value %qE"
|
|
" as offset without bounds checking",
|
|
m_arg);
|
|
case BOUNDS_UPPER:
|
|
return ev.formatted_print ("use of attacker-controlled value %qE"
|
|
" as offset without lower-bounds checking",
|
|
m_arg);
|
|
case BOUNDS_LOWER:
|
|
return ev.formatted_print ("use of attacker-controlled value %qE"
|
|
" as offset without upper-bounds checking",
|
|
m_arg);
|
|
}
|
|
else
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return ev.formatted_print ("use of attacker-controlled value"
|
|
" as offset without bounds checking");
|
|
case BOUNDS_UPPER:
|
|
return ev.formatted_print ("use of attacker-controlled value"
|
|
" as offset without lower-bounds"
|
|
" checking");
|
|
case BOUNDS_LOWER:
|
|
return ev.formatted_print ("use of attacker-controlled value"
|
|
" as offset without upper-bounds"
|
|
" checking");
|
|
}
|
|
}
|
|
};
|
|
|
|
/* Concrete taint_diagnostic subclass for reporting attacker-controlled
|
|
size. */
|
|
|
|
class tainted_size : public taint_diagnostic
|
|
{
|
|
public:
|
|
tainted_size (const taint_state_machine &sm, tree arg,
|
|
enum bounds has_bounds)
|
|
: taint_diagnostic (sm, arg, has_bounds)
|
|
{}
|
|
|
|
const char *get_kind () const OVERRIDE { return "tainted_size"; }
|
|
|
|
int get_controlling_option () const FINAL OVERRIDE
|
|
{
|
|
return OPT_Wanalyzer_tainted_size;
|
|
}
|
|
|
|
bool emit (rich_location *rich_loc) OVERRIDE
|
|
{
|
|
diagnostic_metadata m;
|
|
m.add_cwe (129);
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as size"
|
|
" without bounds checking",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_UPPER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as size"
|
|
" without lower-bounds checking",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_LOWER:
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as size"
|
|
" without upper-bounds checking",
|
|
m_arg);
|
|
break;
|
|
}
|
|
}
|
|
|
|
label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
|
|
{
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return ev.formatted_print ("use of attacker-controlled value %qE"
|
|
" as size without bounds checking",
|
|
m_arg);
|
|
case BOUNDS_UPPER:
|
|
return ev.formatted_print ("use of attacker-controlled value %qE"
|
|
" as size without lower-bounds checking",
|
|
m_arg);
|
|
case BOUNDS_LOWER:
|
|
return ev.formatted_print ("use of attacker-controlled value %qE"
|
|
" as size without upper-bounds checking",
|
|
m_arg);
|
|
}
|
|
}
|
|
};
|
|
|
|
/* Subclass of tainted_size for reporting on tainted size values
|
|
passed to an external function annotated with attribute "access". */
|
|
|
|
class tainted_access_attrib_size : public tainted_size
|
|
{
|
|
public:
|
|
tainted_access_attrib_size (const taint_state_machine &sm, tree arg,
|
|
enum bounds has_bounds, tree callee_fndecl,
|
|
unsigned size_argno, const char *access_str)
|
|
: tainted_size (sm, arg, has_bounds),
|
|
m_callee_fndecl (callee_fndecl),
|
|
m_size_argno (size_argno), m_access_str (access_str)
|
|
{
|
|
}
|
|
|
|
const char *get_kind () const OVERRIDE
|
|
{
|
|
return "tainted_access_attrib_size";
|
|
}
|
|
|
|
bool emit (rich_location *rich_loc) FINAL OVERRIDE
|
|
{
|
|
bool warned = tainted_size::emit (rich_loc);
|
|
if (warned)
|
|
{
|
|
inform (DECL_SOURCE_LOCATION (m_callee_fndecl),
|
|
"parameter %i of %qD marked as a size via attribute %qs",
|
|
m_size_argno + 1, m_callee_fndecl, m_access_str);
|
|
}
|
|
return warned;
|
|
}
|
|
|
|
private:
|
|
tree m_callee_fndecl;
|
|
unsigned m_size_argno;
|
|
const char *m_access_str;
|
|
};
|
|
|
|
/* Concrete taint_diagnostic subclass for reporting attacker-controlled
|
|
divisor (so that an attacker can trigger a divide by zero). */
|
|
|
|
class tainted_divisor : public taint_diagnostic
|
|
{
|
|
public:
|
|
tainted_divisor (const taint_state_machine &sm, tree arg,
|
|
enum bounds has_bounds)
|
|
: taint_diagnostic (sm, arg, has_bounds)
|
|
{}
|
|
|
|
const char *get_kind () const FINAL OVERRIDE { return "tainted_divisor"; }
|
|
|
|
int get_controlling_option () const FINAL OVERRIDE
|
|
{
|
|
return OPT_Wanalyzer_tainted_divisor;
|
|
}
|
|
|
|
bool emit (rich_location *rich_loc) FINAL OVERRIDE
|
|
{
|
|
diagnostic_metadata m;
|
|
/* CWE-369: "Divide By Zero". */
|
|
m.add_cwe (369);
|
|
if (m_arg)
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as divisor"
|
|
" without checking for zero",
|
|
m_arg);
|
|
else
|
|
return warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value as divisor"
|
|
" without checking for zero");
|
|
}
|
|
|
|
label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
|
|
{
|
|
if (m_arg)
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value %qE as divisor"
|
|
" without checking for zero",
|
|
m_arg);
|
|
else
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value as divisor"
|
|
" without checking for zero");
|
|
}
|
|
};
|
|
|
|
/* Concrete taint_diagnostic subclass for reporting attacker-controlled
|
|
size of a dynamic allocation. */
|
|
|
|
class tainted_allocation_size : public taint_diagnostic
|
|
{
|
|
public:
|
|
tainted_allocation_size (const taint_state_machine &sm, tree arg,
|
|
enum bounds has_bounds, enum memory_space mem_space)
|
|
: taint_diagnostic (sm, arg, has_bounds),
|
|
m_mem_space (mem_space)
|
|
{
|
|
}
|
|
|
|
const char *get_kind () const FINAL OVERRIDE
|
|
{
|
|
return "tainted_allocation_size";
|
|
}
|
|
|
|
bool subclass_equal_p (const pending_diagnostic &base_other) const OVERRIDE
|
|
{
|
|
if (!taint_diagnostic::subclass_equal_p (base_other))
|
|
return false;
|
|
const tainted_allocation_size &other
|
|
= (const tainted_allocation_size &)base_other;
|
|
return m_mem_space == other.m_mem_space;
|
|
}
|
|
|
|
int get_controlling_option () const FINAL OVERRIDE
|
|
{
|
|
return OPT_Wanalyzer_tainted_allocation_size;
|
|
}
|
|
|
|
bool emit (rich_location *rich_loc) FINAL OVERRIDE
|
|
{
|
|
diagnostic_metadata m;
|
|
/* "CWE-789: Memory Allocation with Excessive Size Value". */
|
|
m.add_cwe (789);
|
|
|
|
bool warned;
|
|
if (m_arg)
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
warned = warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as"
|
|
" allocation size without bounds checking",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_UPPER:
|
|
warned = warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as"
|
|
" allocation size without"
|
|
" lower-bounds checking",
|
|
m_arg);
|
|
break;
|
|
case BOUNDS_LOWER:
|
|
warned = warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value %qE as"
|
|
" allocation size without"
|
|
" upper-bounds checking",
|
|
m_arg);
|
|
break;
|
|
}
|
|
else
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
warned = warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value as"
|
|
" allocation size without bounds"
|
|
" checking");
|
|
break;
|
|
case BOUNDS_UPPER:
|
|
warned = warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value as"
|
|
" allocation size without"
|
|
" lower-bounds checking");
|
|
break;
|
|
case BOUNDS_LOWER:
|
|
warned = warning_meta (rich_loc, m, get_controlling_option (),
|
|
"use of attacker-controlled value as"
|
|
" allocation size without"
|
|
" upper-bounds checking");
|
|
break;
|
|
}
|
|
if (warned)
|
|
{
|
|
location_t loc = rich_loc->get_loc ();
|
|
switch (m_mem_space)
|
|
{
|
|
default:
|
|
break;
|
|
case MEMSPACE_STACK:
|
|
inform (loc, "stack-based allocation");
|
|
break;
|
|
case MEMSPACE_HEAP:
|
|
inform (loc, "heap-based allocation");
|
|
break;
|
|
}
|
|
}
|
|
return warned;
|
|
}
|
|
|
|
label_text describe_final_event (const evdesc::final_event &ev) FINAL OVERRIDE
|
|
{
|
|
if (m_arg)
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value %qE as allocation size"
|
|
" without bounds checking",
|
|
m_arg);
|
|
case BOUNDS_UPPER:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value %qE as allocation size"
|
|
" without lower-bounds checking",
|
|
m_arg);
|
|
case BOUNDS_LOWER:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value %qE as allocation size"
|
|
" without upper-bounds checking",
|
|
m_arg);
|
|
}
|
|
else
|
|
switch (m_has_bounds)
|
|
{
|
|
default:
|
|
gcc_unreachable ();
|
|
case BOUNDS_NONE:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value as allocation size"
|
|
" without bounds checking");
|
|
case BOUNDS_UPPER:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value as allocation size"
|
|
" without lower-bounds checking");
|
|
case BOUNDS_LOWER:
|
|
return ev.formatted_print
|
|
("use of attacker-controlled value as allocation size"
|
|
" without upper-bounds checking");
|
|
}
|
|
}
|
|
|
|
private:
|
|
enum memory_space m_mem_space;
|
|
};
|
|
|
|
/* taint_state_machine's ctor. */
|
|
|
|
taint_state_machine::taint_state_machine (logger *logger)
|
|
: state_machine ("taint", logger)
|
|
{
|
|
m_tainted = add_state ("tainted");
|
|
m_has_lb = add_state ("has_lb");
|
|
m_has_ub = add_state ("has_ub");
|
|
m_stop = add_state ("stop");
|
|
}
|
|
|
|
state_machine::state_t
|
|
taint_state_machine::alt_get_inherited_state (const sm_state_map &map,
|
|
const svalue *sval,
|
|
const extrinsic_state &ext_state)
|
|
const
|
|
{
|
|
switch (sval->get_kind ())
|
|
{
|
|
default:
|
|
break;
|
|
case SK_UNARYOP:
|
|
{
|
|
const unaryop_svalue *unaryop_sval
|
|
= as_a <const unaryop_svalue *> (sval);
|
|
enum tree_code op = unaryop_sval->get_op ();
|
|
const svalue *arg = unaryop_sval->get_arg ();
|
|
switch (op)
|
|
{
|
|
case NOP_EXPR:
|
|
{
|
|
state_t arg_state = map.get_state (arg, ext_state);
|
|
return arg_state;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case SK_BINOP:
|
|
{
|
|
const binop_svalue *binop_sval = as_a <const binop_svalue *> (sval);
|
|
enum tree_code op = binop_sval->get_op ();
|
|
const svalue *arg0 = binop_sval->get_arg0 ();
|
|
const svalue *arg1 = binop_sval->get_arg1 ();
|
|
switch (op)
|
|
{
|
|
default:
|
|
break;
|
|
case PLUS_EXPR:
|
|
case MINUS_EXPR:
|
|
case MULT_EXPR:
|
|
case POINTER_PLUS_EXPR:
|
|
case TRUNC_DIV_EXPR:
|
|
case TRUNC_MOD_EXPR:
|
|
{
|
|
state_t arg0_state = map.get_state (arg0, ext_state);
|
|
state_t arg1_state = map.get_state (arg1, ext_state);
|
|
return combine_states (arg0_state, arg1_state);
|
|
}
|
|
break;
|
|
|
|
case EQ_EXPR:
|
|
case GE_EXPR:
|
|
case LE_EXPR:
|
|
case NE_EXPR:
|
|
case GT_EXPR:
|
|
case LT_EXPR:
|
|
case UNORDERED_EXPR:
|
|
case ORDERED_EXPR:
|
|
/* Comparisons are just booleans. */
|
|
return m_start;
|
|
|
|
case BIT_AND_EXPR:
|
|
case RSHIFT_EXPR:
|
|
return NULL;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
/* Implementation of state_machine::on_stmt vfunc for taint_state_machine. */
|
|
|
|
bool
|
|
taint_state_machine::on_stmt (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gimple *stmt) const
|
|
{
|
|
if (const gcall *call = dyn_cast <const gcall *> (stmt))
|
|
if (tree callee_fndecl = sm_ctxt->get_fndecl_for_call (call))
|
|
{
|
|
if (is_named_call_p (callee_fndecl, "fread", call, 4))
|
|
{
|
|
tree arg = gimple_call_arg (call, 0);
|
|
|
|
sm_ctxt->on_transition (node, stmt, arg, m_start, m_tainted);
|
|
|
|
/* Dereference an ADDR_EXPR. */
|
|
// TODO: should the engine do this?
|
|
if (TREE_CODE (arg) == ADDR_EXPR)
|
|
sm_ctxt->on_transition (node, stmt, TREE_OPERAND (arg, 0),
|
|
m_start, m_tainted);
|
|
return true;
|
|
}
|
|
|
|
/* External function with "access" attribute. */
|
|
if (sm_ctxt->unknown_side_effects_p ())
|
|
check_for_tainted_size_arg (sm_ctxt, node, call, callee_fndecl);
|
|
}
|
|
// TODO: ...etc; many other sources of untrusted data
|
|
|
|
if (const gassign *assign = dyn_cast <const gassign *> (stmt))
|
|
{
|
|
enum tree_code op = gimple_assign_rhs_code (assign);
|
|
|
|
switch (op)
|
|
{
|
|
default:
|
|
break;
|
|
case TRUNC_DIV_EXPR:
|
|
case CEIL_DIV_EXPR:
|
|
case FLOOR_DIV_EXPR:
|
|
case ROUND_DIV_EXPR:
|
|
case TRUNC_MOD_EXPR:
|
|
case CEIL_MOD_EXPR:
|
|
case FLOOR_MOD_EXPR:
|
|
case ROUND_MOD_EXPR:
|
|
case RDIV_EXPR:
|
|
case EXACT_DIV_EXPR:
|
|
check_for_tainted_divisor (sm_ctxt, node, assign);
|
|
break;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/* Implementation of state_machine::on_condition vfunc for taint_state_machine.
|
|
Potentially transition state 'tainted' to 'has_ub' or 'has_lb',
|
|
and states 'has_ub' and 'has_lb' to 'stop'. */
|
|
|
|
void
|
|
taint_state_machine::on_condition (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gimple *stmt,
|
|
const svalue *lhs,
|
|
enum tree_code op,
|
|
const svalue *rhs ATTRIBUTE_UNUSED) const
|
|
{
|
|
if (stmt == NULL)
|
|
return;
|
|
|
|
// TODO: this doesn't use the RHS; should we make it symmetric?
|
|
|
|
// TODO
|
|
switch (op)
|
|
{
|
|
//case NE_EXPR:
|
|
//case EQ_EXPR:
|
|
case GE_EXPR:
|
|
case GT_EXPR:
|
|
{
|
|
sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
|
|
m_has_lb);
|
|
sm_ctxt->on_transition (node, stmt, lhs, m_has_ub,
|
|
m_stop);
|
|
}
|
|
break;
|
|
case LE_EXPR:
|
|
case LT_EXPR:
|
|
{
|
|
sm_ctxt->on_transition (node, stmt, lhs, m_tainted,
|
|
m_has_ub);
|
|
sm_ctxt->on_transition (node, stmt, lhs, m_has_lb,
|
|
m_stop);
|
|
}
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool
|
|
taint_state_machine::can_purge_p (state_t s ATTRIBUTE_UNUSED) const
|
|
{
|
|
return true;
|
|
}
|
|
|
|
/* If STATE is a tainted state, write the bounds to *OUT and return true.
|
|
Otherwise return false.
|
|
Use the signedness of TYPE to determine if "has_ub" is tainted. */
|
|
|
|
bool
|
|
taint_state_machine::get_taint (state_t state, tree type,
|
|
enum bounds *out) const
|
|
{
|
|
/* Unsigned types have an implicit lower bound. */
|
|
bool is_unsigned = false;
|
|
if (type)
|
|
if (INTEGRAL_TYPE_P (type))
|
|
is_unsigned = TYPE_UNSIGNED (type);
|
|
|
|
/* Can't use a switch as the states are non-const. */
|
|
if (state == m_tainted)
|
|
{
|
|
*out = is_unsigned ? BOUNDS_LOWER : BOUNDS_NONE;
|
|
return true;
|
|
}
|
|
else if (state == m_has_lb)
|
|
{
|
|
*out = BOUNDS_LOWER;
|
|
return true;
|
|
}
|
|
else if (state == m_has_ub && !is_unsigned)
|
|
{
|
|
/* Missing lower bound. */
|
|
*out = BOUNDS_UPPER;
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/* Find the most tainted state of S0 and S1. */
|
|
|
|
state_machine::state_t
|
|
taint_state_machine::combine_states (state_t s0, state_t s1) const
|
|
{
|
|
gcc_assert (s0);
|
|
gcc_assert (s1);
|
|
if (s0 == s1)
|
|
return s0;
|
|
if (s0 == m_tainted || s1 == m_tainted)
|
|
return m_tainted;
|
|
if (s0 == m_start)
|
|
return s1;
|
|
if (s1 == m_start)
|
|
return s0;
|
|
if (s0 == m_stop)
|
|
return s1;
|
|
if (s1 == m_stop)
|
|
return s0;
|
|
/* The only remaining combinations are one of has_ub and has_lb
|
|
(in either order). */
|
|
gcc_assert ((s0 == m_has_lb && s1 == m_has_ub)
|
|
|| (s0 == m_has_ub && s1 == m_has_lb));
|
|
return m_tainted;
|
|
}
|
|
|
|
/* Check for calls to external functions marked with
|
|
__attribute__((access)) with a size-index: complain about
|
|
tainted values passed as a size to such a function. */
|
|
|
|
void
|
|
taint_state_machine::check_for_tainted_size_arg (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gcall *call,
|
|
tree callee_fndecl) const
|
|
{
|
|
tree fntype = TREE_TYPE (callee_fndecl);
|
|
if (!fntype)
|
|
return;
|
|
|
|
if (!TYPE_ATTRIBUTES (fntype))
|
|
return;
|
|
|
|
/* Initialize a map of attribute access specifications for arguments
|
|
to the function call. */
|
|
rdwr_map rdwr_idx;
|
|
init_attr_rdwr_indices (&rdwr_idx, TYPE_ATTRIBUTES (fntype));
|
|
|
|
unsigned argno = 0;
|
|
|
|
for (tree iter = TYPE_ARG_TYPES (fntype); iter;
|
|
iter = TREE_CHAIN (iter), ++argno)
|
|
{
|
|
const attr_access* access = rdwr_idx.get (argno);
|
|
if (!access)
|
|
continue;
|
|
|
|
/* Ignore any duplicate entry in the map for the size argument. */
|
|
if (access->ptrarg != argno)
|
|
continue;
|
|
|
|
if (access->sizarg == UINT_MAX)
|
|
continue;
|
|
|
|
tree size_arg = gimple_call_arg (call, access->sizarg);
|
|
|
|
state_t state = sm_ctxt->get_state (call, size_arg);
|
|
enum bounds b;
|
|
if (get_taint (state, TREE_TYPE (size_arg), &b))
|
|
{
|
|
const char* const access_str =
|
|
TREE_STRING_POINTER (access->to_external_string ());
|
|
tree diag_size = sm_ctxt->get_diagnostic_tree (size_arg);
|
|
sm_ctxt->warn (node, call, size_arg,
|
|
new tainted_access_attrib_size (*this, diag_size, b,
|
|
callee_fndecl,
|
|
access->sizarg,
|
|
access_str));
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Complain if ASSIGN (a division operation) has a tainted divisor
|
|
that could be zero. */
|
|
|
|
void
|
|
taint_state_machine::check_for_tainted_divisor (sm_context *sm_ctxt,
|
|
const supernode *node,
|
|
const gassign *assign) const
|
|
{
|
|
const region_model *old_model = sm_ctxt->get_old_region_model ();
|
|
if (!old_model)
|
|
return;
|
|
|
|
tree divisor_expr = gimple_assign_rhs2 (assign);;
|
|
const svalue *divisor_sval = old_model->get_rvalue (divisor_expr, NULL);
|
|
|
|
state_t state = sm_ctxt->get_state (assign, divisor_sval);
|
|
enum bounds b;
|
|
if (get_taint (state, TREE_TYPE (divisor_expr), &b))
|
|
{
|
|
const svalue *zero_sval
|
|
= old_model->get_manager ()->get_or_create_int_cst
|
|
(TREE_TYPE (divisor_expr), 0);
|
|
tristate ts
|
|
= old_model->eval_condition (divisor_sval, NE_EXPR, zero_sval);
|
|
if (ts.is_true ())
|
|
/* The divisor is known to not equal 0: don't warn. */
|
|
return;
|
|
|
|
tree diag_divisor = sm_ctxt->get_diagnostic_tree (divisor_expr);
|
|
sm_ctxt->warn (node, assign, divisor_expr,
|
|
new tainted_divisor (*this, diag_divisor, b));
|
|
sm_ctxt->set_next_state (assign, divisor_sval, m_stop);
|
|
}
|
|
}
|
|
|
|
} // anonymous namespace
|
|
|
|
/* Internal interface to this file. */
|
|
|
|
state_machine *
|
|
make_taint_state_machine (logger *logger)
|
|
{
|
|
return new taint_state_machine (logger);
|
|
}
|
|
|
|
/* Complain to CTXT if accessing REG leads could lead to arbitrary
|
|
memory access under an attacker's control (due to taint). */
|
|
|
|
void
|
|
region_model::check_region_for_taint (const region *reg,
|
|
enum access_direction,
|
|
region_model_context *ctxt) const
|
|
{
|
|
gcc_assert (reg);
|
|
gcc_assert (ctxt);
|
|
|
|
LOG_SCOPE (ctxt->get_logger ());
|
|
|
|
sm_state_map *smap;
|
|
const state_machine *sm;
|
|
unsigned sm_idx;
|
|
if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
|
|
return;
|
|
|
|
gcc_assert (smap);
|
|
gcc_assert (sm);
|
|
|
|
const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
|
|
|
|
const extrinsic_state *ext_state = ctxt->get_ext_state ();
|
|
if (!ext_state)
|
|
return;
|
|
|
|
const region *iter_region = reg;
|
|
while (iter_region)
|
|
{
|
|
switch (iter_region->get_kind ())
|
|
{
|
|
default:
|
|
break;
|
|
|
|
case RK_ELEMENT:
|
|
{
|
|
const element_region *element_reg
|
|
= (const element_region *)iter_region;
|
|
const svalue *index = element_reg->get_index ();
|
|
const state_machine::state_t
|
|
state = smap->get_state (index, *ext_state);
|
|
gcc_assert (state);
|
|
enum bounds b;
|
|
if (taint_sm.get_taint (state, index->get_type (), &b))
|
|
{
|
|
tree arg = get_representative_tree (index);
|
|
ctxt->warn (new tainted_array_index (taint_sm, arg, b));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case RK_OFFSET:
|
|
{
|
|
const offset_region *offset_reg
|
|
= (const offset_region *)iter_region;
|
|
const svalue *offset = offset_reg->get_byte_offset ();
|
|
const state_machine::state_t
|
|
state = smap->get_state (offset, *ext_state);
|
|
gcc_assert (state);
|
|
/* Handle implicit cast to sizetype. */
|
|
tree effective_type = offset->get_type ();
|
|
if (const svalue *cast = offset->maybe_undo_cast ())
|
|
if (cast->get_type ())
|
|
effective_type = cast->get_type ();
|
|
enum bounds b;
|
|
if (taint_sm.get_taint (state, effective_type, &b))
|
|
{
|
|
tree arg = get_representative_tree (offset);
|
|
ctxt->warn (new tainted_offset (taint_sm, arg, b));
|
|
}
|
|
}
|
|
break;
|
|
|
|
case RK_CAST:
|
|
{
|
|
const cast_region *cast_reg
|
|
= as_a <const cast_region *> (iter_region);
|
|
iter_region = cast_reg->get_original_region ();
|
|
continue;
|
|
}
|
|
|
|
case RK_SIZED:
|
|
{
|
|
const sized_region *sized_reg
|
|
= (const sized_region *)iter_region;
|
|
const svalue *size_sval = sized_reg->get_byte_size_sval (m_mgr);
|
|
const state_machine::state_t
|
|
state = smap->get_state (size_sval, *ext_state);
|
|
gcc_assert (state);
|
|
enum bounds b;
|
|
if (taint_sm.get_taint (state, size_sval->get_type (), &b))
|
|
{
|
|
tree arg = get_representative_tree (size_sval);
|
|
ctxt->warn (new tainted_size (taint_sm, arg, b));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
|
|
iter_region = iter_region->get_parent_region ();
|
|
}
|
|
}
|
|
|
|
/* Complain to CTXT about a tainted allocation size if SIZE_IN_BYTES is
|
|
under an attacker's control (due to taint), where the allocation
|
|
is happening within MEM_SPACE. */
|
|
|
|
void
|
|
region_model::check_dynamic_size_for_taint (enum memory_space mem_space,
|
|
const svalue *size_in_bytes,
|
|
region_model_context *ctxt) const
|
|
{
|
|
gcc_assert (size_in_bytes);
|
|
gcc_assert (ctxt);
|
|
|
|
LOG_SCOPE (ctxt->get_logger ());
|
|
|
|
sm_state_map *smap;
|
|
const state_machine *sm;
|
|
unsigned sm_idx;
|
|
if (!ctxt->get_taint_map (&smap, &sm, &sm_idx))
|
|
return;
|
|
|
|
gcc_assert (smap);
|
|
gcc_assert (sm);
|
|
|
|
const taint_state_machine &taint_sm = (const taint_state_machine &)*sm;
|
|
|
|
const extrinsic_state *ext_state = ctxt->get_ext_state ();
|
|
if (!ext_state)
|
|
return;
|
|
|
|
const state_machine::state_t
|
|
state = smap->get_state (size_in_bytes, *ext_state);
|
|
gcc_assert (state);
|
|
enum bounds b;
|
|
if (taint_sm.get_taint (state, size_in_bytes->get_type (), &b))
|
|
{
|
|
tree arg = get_representative_tree (size_in_bytes);
|
|
ctxt->warn (new tainted_allocation_size (taint_sm, arg, b, mem_space));
|
|
}
|
|
}
|
|
|
|
} // namespace ana
|
|
|
|
#endif /* #if ENABLE_ANALYZER */
|