Feature/add memap cstack usage ports (#661)
* Added memap, avstack, and checkstackusage tools to STM32F4xx Makefile and CMake builds to calculate CSTACK depth and RAM usage * Added memap, cstack, and ram-usage recipes to stm32f10x port Makefile. Added Cmake build. * Removed local dlmstp.c module from stm32f10x port, and used the common datalink dlmstp.c module with MS/TP extended frames and zero-config support. * Added .nm and .su to .gitignore to skip the analysis file residue.
This commit is contained in:
Executable
+251
@@ -0,0 +1,251 @@
|
||||
#!/usr/bin/perl -w
|
||||
# avstack.pl: GCC C stack checker
|
||||
# Copyright (C) 2013 Daniel Beer <dlbeer@gmail.com>
|
||||
#
|
||||
# Permission to use, copy, modify, and/or distribute this software for
|
||||
# any purpose with or without fee is hereby granted, provided that the
|
||||
# above copyright notice and this permission notice appear in all
|
||||
# copies.
|
||||
#
|
||||
# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL
|
||||
# WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED
|
||||
# WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE
|
||||
# AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
|
||||
# DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
|
||||
# PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
|
||||
# TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
|
||||
# PERFORMANCE OF THIS SOFTWARE.
|
||||
#
|
||||
# Usage
|
||||
# -----
|
||||
#
|
||||
# This script requires that you compile your code with -fstack-usage.
|
||||
# This results in GCC generating a .su file for each .o file. Once you
|
||||
# have these, do:
|
||||
#
|
||||
# ./avstack.pl <object files>
|
||||
#
|
||||
# This will disassemble .o files to construct a call graph, and read
|
||||
# frame size information from .su. The call graph is traced to find, for
|
||||
# each function:
|
||||
#
|
||||
# - Call height: the maximum call height of any callee, plus 1
|
||||
# (defined to be 1 for any function which has no callees).
|
||||
#
|
||||
# - Inherited frame: the maximum *inherited* frame of any callee, plus
|
||||
# the GCC-calculated frame size of the function in question.
|
||||
#
|
||||
# Using these two pieces of information, we calculate a cost (estimated
|
||||
# peak stack usage) for calling the function. Functions are then listed
|
||||
# on stdout in decreasing order of cost.
|
||||
#
|
||||
# Functions which are recursive are marked with an 'R' to the left of
|
||||
# them. Their cost is calculated for a single level of recursion.
|
||||
#
|
||||
# The peak stack usage of your entire program can usually be estimated
|
||||
# as the stack cost of "main", plus the maximum stack cost of any
|
||||
# interrupt handler which might execute.
|
||||
|
||||
use strict;
|
||||
|
||||
# Configuration: set these as appropriate for your architecture/project.
|
||||
|
||||
my $objdump = "arm-none-eabi-objdump";
|
||||
my $call_cost = 4;
|
||||
|
||||
# First, we need to read all object and corresponding .su files. We're
|
||||
# gathering a mapping of functions to callees and functions to frame
|
||||
# sizes. We're just parsing at this stage -- callee name resolution
|
||||
# comes later.
|
||||
|
||||
my %frame_size; # "func@file" -> size
|
||||
my %call_graph; # "func@file" -> {callees}
|
||||
my %addresses; # "addr@file" -> "func@file"
|
||||
|
||||
my %global_name; # "func" -> "func@file"
|
||||
my %ambiguous; # "func" -> 1
|
||||
|
||||
foreach (@ARGV) {
|
||||
# Disassemble this object file to obtain a callees. Sources in the
|
||||
# call graph are named "func@file". Targets in the call graph are
|
||||
# named either "offset@file" or "funcname". We also keep a list of
|
||||
# the addresses and names of each function we encounter.
|
||||
my $objfile = $_;
|
||||
my $source;
|
||||
|
||||
open(DISASSEMBLY, "$objdump -dr $objfile|") ||
|
||||
die "Can't disassemble $objfile";
|
||||
while (<DISASSEMBLY>) {
|
||||
chomp;
|
||||
|
||||
if (/^([0-9a-fA-F]+) <(.*)>:/) {
|
||||
my $a = $1;
|
||||
my $name = $2;
|
||||
|
||||
$source = "$name\@$objfile";
|
||||
$call_graph{$source} = {};
|
||||
$ambiguous{$name} = 1 if defined($global_name{$name});
|
||||
$global_name{$name} = "$name\@$objfile";
|
||||
|
||||
$a =~ s/^0*//;
|
||||
$addresses{"$a\@$objfile"} = "$name\@$objfile";
|
||||
}
|
||||
|
||||
if (/: R_[A-Za-z0-9_]+_CALL[ \t]+(.*)/) {
|
||||
my $t = $1;
|
||||
|
||||
if ($t eq ".text") {
|
||||
$t = "\@$objfile";
|
||||
} elsif ($t =~ /^\.text\+0x(.*)$/) {
|
||||
$t = "$1\@$objfile";
|
||||
}
|
||||
|
||||
$call_graph{$source}->{$t} = 1;
|
||||
}
|
||||
}
|
||||
close(DISASSEMBLY);
|
||||
|
||||
# Extract frame sizes from the corresponding .su file.
|
||||
if ($objfile =~ /^(.*).o$/) {
|
||||
my $sufile = "$1.su";
|
||||
|
||||
open(SUFILE, "<$sufile") || die "Can't open $sufile";
|
||||
while (<SUFILE>) {
|
||||
$frame_size{"$1\@$objfile"} = $2 + $call_cost
|
||||
if /^.*:([^\t ]+)[ \t]+([0-9]+)/;
|
||||
}
|
||||
close(SUFILE);
|
||||
}
|
||||
}
|
||||
|
||||
# In this step, we enumerate each list of callees in the call graph and
|
||||
# try to resolve the symbols. We omit ones we can't resolve, but keep a
|
||||
# set of them anyway.
|
||||
|
||||
my %unresolved;
|
||||
|
||||
foreach (keys %call_graph) {
|
||||
my $from = $_;
|
||||
my $callees = $call_graph{$from};
|
||||
my %resolved;
|
||||
|
||||
foreach (keys %$callees) {
|
||||
my $t = $_;
|
||||
|
||||
if (defined($addresses{$t})) {
|
||||
$resolved{$addresses{$t}} = 1;
|
||||
} elsif (defined($global_name{$t})) {
|
||||
$resolved{$global_name{$t}} = 1;
|
||||
warn "Ambiguous resolution: $t" if defined ($ambiguous{$t});
|
||||
} elsif (defined($call_graph{$t})) {
|
||||
$resolved{$t} = 1;
|
||||
} else {
|
||||
$unresolved{$t} = 1;
|
||||
}
|
||||
}
|
||||
|
||||
$call_graph{$from} = \%resolved;
|
||||
}
|
||||
|
||||
# Create fake edges and nodes to account for dynamic behaviour.
|
||||
$call_graph{"INTERRUPT"} = {};
|
||||
|
||||
foreach (keys %call_graph) {
|
||||
$call_graph{"INTERRUPT"}->{$_} = 1 if /^__vector_/;
|
||||
}
|
||||
|
||||
# Trace the call graph and calculate, for each function:
|
||||
#
|
||||
# - inherited frames: maximum inherited frame of callees, plus own
|
||||
# frame size.
|
||||
# - height: maximum height of callees, plus one.
|
||||
# - recursion: is the function called recursively (including indirect
|
||||
# recursion)?
|
||||
|
||||
my %has_caller;
|
||||
my %visited;
|
||||
my %total_cost;
|
||||
my %call_depth;
|
||||
|
||||
sub trace {
|
||||
my $f = shift;
|
||||
|
||||
if ($visited{$f}) {
|
||||
$visited{$f} = "R" if $visited{$f} eq "?";
|
||||
return;
|
||||
}
|
||||
|
||||
$visited{$f} = "?";
|
||||
|
||||
my $max_depth = 0;
|
||||
my $max_frame = 0;
|
||||
|
||||
my $targets = $call_graph{$f} || die "Unknown function: $f";
|
||||
if (defined($targets)) {
|
||||
foreach (keys %$targets) {
|
||||
my $t = $_;
|
||||
|
||||
$has_caller{$t} = 1;
|
||||
trace($t);
|
||||
|
||||
my $is = $total_cost{$t};
|
||||
my $d = $call_depth{$t};
|
||||
|
||||
$max_frame = $is if $is > $max_frame;
|
||||
$max_depth = $d if $d > $max_depth;
|
||||
}
|
||||
}
|
||||
|
||||
$call_depth{$f} = $max_depth + 1;
|
||||
$total_cost{$f} = $max_frame + ($frame_size{$f} || 0);
|
||||
$visited{$f} = " " if $visited{$f} eq "?";
|
||||
}
|
||||
|
||||
foreach (keys %call_graph) { trace $_; }
|
||||
|
||||
# Now, print results in a nice table.
|
||||
printf " %-40s %8s %8s %8s\n",
|
||||
"Function Name", "Cost", "Frame", "Height";
|
||||
print "------------------------------------";
|
||||
print "------------------------------------\n";
|
||||
|
||||
my $max_iv = 0;
|
||||
my $main = 0;
|
||||
|
||||
foreach (sort { $total_cost{$b} <=> $total_cost{$a} } keys %visited) {
|
||||
my $name = $_;
|
||||
|
||||
if (/^(.*)@(.*)$/) {
|
||||
$name = $1 unless $ambiguous{$name};
|
||||
}
|
||||
|
||||
my $tag = $visited{$_};
|
||||
my $cost = $total_cost{$_};
|
||||
|
||||
$name = $_ if $ambiguous{$name};
|
||||
$tag = ">" unless $has_caller{$_};
|
||||
|
||||
if (/^__vector_/) {
|
||||
$max_iv = $cost if $cost > $max_iv;
|
||||
} elsif (/^main@/) {
|
||||
$main = $cost;
|
||||
}
|
||||
|
||||
if ($ambiguous{$name}) { $name = $_; }
|
||||
|
||||
printf "%s %-40s %8d %8d %8d\n", $tag, $name, $cost,
|
||||
$frame_size{$_} || 0, $call_depth{$_};
|
||||
}
|
||||
|
||||
print "\n";
|
||||
|
||||
print "Peak execution estimate (main + worst-case IV):\n";
|
||||
printf " main = %d, worst IV = %d, total = %d\n",
|
||||
$total_cost{$global_name{"main"}},
|
||||
$total_cost{"INTERRUPT"},
|
||||
$total_cost{$global_name{"main"}} + $total_cost{"INTERRUPT"};
|
||||
|
||||
print "\n";
|
||||
|
||||
print "The following functions were not resolved:\n";
|
||||
foreach (keys %unresolved) { print " $_\n"; }
|
||||
@@ -0,0 +1,120 @@
|
||||
# avstack.pl
|
||||
|
||||
dlbeer@gmail.com
|
||||
31 May 2013 (updated 24 Aug 2015)
|
||||
|
||||
When developing for memory constrained systems, it's useful to know at compile-time that the available memory is sufficient for your application firmware. Dynamic allocation is usually avoided, and the size of statically allocated memory in the `.data` and `.bss` sections can be easily inspected with GNU `size` or a similary tool. This leaves one problem: stack use. The stack grows and shrinks dynamically at runtime, but under some circumstances, the peak stack use can be statically analyzed.
|
||||
|
||||
* [avstack.pl](avstack.pl)
|
||||
|
||||
The script linked above is a static stack checker, intended for use with a recent-ish version of [AVR-GCC](http://gcc.gnu.org/wiki/avr-gcc). In order to use it, you must ensure that your object files are compiled with `-fstack-usage`. This causes GCC to generate a `.su` file for every `.o` file. [These files](http://gcc.gnu.org/onlinedocs/gnat_ugn_unw/Static-Stack-Usage-Analysis.html) contain information on the size of the stack frame for each function compiled in the `.o` file. For example:
|
||||
|
||||
sha1.c:43:13:mix_w 0 static
|
||||
sha1.c:54:13:rot_left 0 static
|
||||
sha1.c:69:13:rot_right 0 static
|
||||
sha1.c:178:6:sha1_init 0 static
|
||||
sha1.c:183:6:sha1_next 65 static
|
||||
sha1.c:192:6:sha1_final 6 static
|
||||
|
||||
Once you have these files, invoke `avstack.pl`, passing as arguments the names of all `.o` files that will be linked into your binary. The `.su` files are assumed to be located in the same directories as their corresponding `.o` files.
|
||||
|
||||
The script reads all `.su` files, and disassembles all `.o` files, including relocation data. The disassemblies are parsed and used to construct a call graph. Multiple functions in different translation units with the same name don't cause problems, provided there are no global namespace collisions. Information will appear on any unresolvable or ambiguous references.
|
||||
|
||||
Next, the call graph is traced to find the peak stack usage of all functions. This is calculated for each function as the maximum stack usage of any of its callees, plus its own stack frame, plus some call-cost constant (not included in GCC's analysis).
|
||||
|
||||
Here's an example of the output produced:
|
||||
|
||||
$ ./avstack.pl */*.o
|
||||
Func Cost Frame Height
|
||||
------------------------------------------------------------------------
|
||||
> main 235 90 4
|
||||
prng_next 145 72 3
|
||||
sha1_final 83 10 3
|
||||
sha1_next 73 69 2
|
||||
__vector_16 28 20 3
|
||||
> INTERRUPT 28 0 4
|
||||
clock_signal_tick 8 4 2
|
||||
clock_iterate 8 4 2
|
||||
clock_init 4 4 1
|
||||
led_init 4 4 1
|
||||
led_set 4 4 1
|
||||
> led_get 4 4 1
|
||||
rot_right 4 4 1
|
||||
mix_w 4 4 1
|
||||
prng_init 4 4 1
|
||||
clock_signal_pps 4 4 1
|
||||
clock_get_raw 4 4 1
|
||||
clock_get 4 4 1
|
||||
prng_stir 4 4 1
|
||||
rot_left 4 4 1
|
||||
sha1_init 4 4 1
|
||||
led_slot_clock_tick 4 4 1
|
||||
|
||||
The following functions were not resolved:
|
||||
memcpy_P
|
||||
|
||||
The columns are:
|
||||
|
||||
* **Cost**: peak stack usage during a call to the function.
|
||||
* **Frame**: stack frame size, obtained from the `.su` file, plus the call-cost constant.
|
||||
* **Height**: height in call graph -- calculated as maximum height of any callee, plus one.
|
||||
|
||||
Indicators to the left of the function name indicate features of the call graph. A `>` indicates that the function has no callee. This could be because it's an entry point (like `main` or an interrupt vector), because the function is called dynamically through a function pointer, or simply that it's unused. An `R` indicates that the function is recursive, and the cost estimate is for a single level of recursion. Multiple functions with the same name are distinguished in the listing by appending a suffix of the form `@filename.o`.
|
||||
|
||||
You can customize this script by altering two variables near the beginning:
|
||||
|
||||
my $objdump = "avr-objdump";
|
||||
my $call_cost = 4;
|
||||
|
||||
Note that making sense of the output of the stack analysis still requires you to know something about how your program runs. For example, in many programs, the actual peak stack use would be the cost of `main`, plus the maximum cost of any interrupt handler that might execute. You will also need to take into account dynamically invoked functions, if you have any.
|
||||
|
||||
To make things easier, there is a processing section in which fake nodes and edges can be added to the call graph to account for dynamic behaviour. For example, the script currently contains in this section, to represent interrupt execution:
|
||||
|
||||
$call_graph{"INTERRUPT"} = {};
|
||||
|
||||
foreach (keys %call_graph) {
|
||||
$call_graph{"INTERRUPT"}->{$_} = 1 if /^__vector_/;
|
||||
}
|
||||
|
||||
## Use with C++
|
||||
|
||||
Updated: 24 Aug 2015
|
||||
|
||||
Unfortunately, GCC uses "printable" names in the output for `-fstack-usage`. For example, the function:
|
||||
|
||||
namespace test {
|
||||
|
||||
uint32_t crc32_ieee802_3(const uint8_t *data, size_t len,
|
||||
uint32_t crc = 0)
|
||||
{
|
||||
...
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
Gets listed as:
|
||||
|
||||
foo.cpp:6:10:uint32_t test::crc32_ieee802_3(const uint8_t*, size_t, uint32_t) 32 static
|
||||
|
||||
This is difficult to match up with the disassembler output. Using `c++filt` doesn't help, because it doesn't know anything about typedefs (`uint8_t` becomes `unsigned char`, for example).
|
||||
|
||||
I've prepared the following patches for two versions of GCC. They're independent of language and target:
|
||||
|
||||
* [gcc-4.7.2-mangled-stack-usage.patch](../downloads/gcc-4.7.2-mangled-stack-usage.patch)
|
||||
* [gcc-4.9.2-mangled-stack-usage.patch](../downloads/gcc-4.9.2-mangled-stack-usage.patch)
|
||||
|
||||
These patches cause GCC to use names in the output for `-fstack-usage` which match the disassembler output:
|
||||
|
||||
foo.cpp:6:10:_ZN4test15crc32_ieee802_3EPKhyj 32 static
|
||||
|
||||
Nothing changes for C code. With these patches, `avstack.pl` works fine for C++ (although you probably want to run the final output through `c++filt`).
|
||||
|
||||
## Copyright
|
||||
|
||||
Copyright © 2013 Daniel Beer dlbeer@gmail.com
|
||||
|
||||
>
|
||||
|
||||
Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||
Reference in New Issue
Block a user