145 lines
3.1 KiB
C
145 lines
3.1 KiB
C
|
//
|
||
|
// This module allows performance monitoring by looking at
|
||
|
// the PC at regular intervals and building a histogram
|
||
|
//
|
||
|
// perf.start(start, end, nbins[, pc offset on stack])
|
||
|
// perf.stop() -> total sample, samples outside range, table { addr -> count , .. }
|
||
|
|
||
|
|
||
|
#include "ets_sys.h"
|
||
|
#include "os_type.h"
|
||
|
#include "osapi.h"
|
||
|
#include <stdlib.h>
|
||
|
|
||
|
#include "module.h"
|
||
|
#include "lauxlib.h"
|
||
|
#include "platform.h"
|
||
|
#include "hw_timer.h"
|
||
|
#include "cpu_esp8266.h"
|
||
|
|
||
|
typedef struct {
|
||
|
int ref;
|
||
|
uint32_t start;
|
||
|
uint32_t bucket_shift;
|
||
|
uint32_t bucket_count;
|
||
|
uint32_t total_samples;
|
||
|
uint32_t outside_samples;
|
||
|
uint32_t bucket[1];
|
||
|
} DATA;
|
||
|
|
||
|
static DATA *data;
|
||
|
extern char _flash_used_end[];
|
||
|
|
||
|
#define TIMER_OWNER ((os_param_t) 'p')
|
||
|
|
||
|
static void ICACHE_RAM_ATTR hw_timer_cb(os_param_t p)
|
||
|
{
|
||
|
(void) p;
|
||
|
uint32_t stackaddr;
|
||
|
|
||
|
if (data) {
|
||
|
uint32_t pc;
|
||
|
asm (
|
||
|
"rsr %0, EPC1;" /* read out the EPC */
|
||
|
:"=r"(pc)
|
||
|
);
|
||
|
|
||
|
uint32_t bucket_number = (pc - data->start) >> data->bucket_shift;
|
||
|
if (bucket_number < data->bucket_count) {
|
||
|
data->bucket[bucket_number]++;
|
||
|
} else {
|
||
|
data->outside_samples++;
|
||
|
}
|
||
|
data->total_samples++;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
static int perf_start(lua_State *L)
|
||
|
{
|
||
|
uint32_t start = luaL_optinteger(L, 1, 0x40000000);
|
||
|
uint32_t end = luaL_optinteger(L, 2, (uint32_t) _flash_used_end);
|
||
|
uint32_t bins = luaL_optinteger(L, 3, 1024);
|
||
|
|
||
|
if (end <= start) {
|
||
|
luaL_error(L, "end must be larger than start");
|
||
|
}
|
||
|
|
||
|
uint32_t binsize = (end - start + bins - 1) / bins;
|
||
|
|
||
|
// Round up to a power of two
|
||
|
int shift;
|
||
|
binsize = binsize - 1;
|
||
|
for (shift = 0; binsize > 0; shift++) {
|
||
|
binsize >>= 1;
|
||
|
}
|
||
|
|
||
|
bins = (end - start + (1 << shift) - 1) / (1 << shift);
|
||
|
|
||
|
size_t data_size = sizeof(DATA) + bins * sizeof(uint32_t);
|
||
|
DATA *d = (DATA *) lua_newuserdata(L, data_size);
|
||
|
memset(d, 0, data_size);
|
||
|
d->ref = luaL_ref(L, LUA_REGISTRYINDEX);
|
||
|
d->start = start;
|
||
|
d->bucket_shift = shift;
|
||
|
d->bucket_count = bins;
|
||
|
|
||
|
if (data) {
|
||
|
luaL_unref(L, LUA_REGISTRYINDEX, data->ref);
|
||
|
}
|
||
|
|
||
|
data = d;
|
||
|
|
||
|
// Start the timer
|
||
|
if (!platform_hw_timer_init(TIMER_OWNER, FRC1_SOURCE, TRUE)) {
|
||
|
// Failed to init the timer
|
||
|
data = NULL;
|
||
|
luaL_unref(L, LUA_REGISTRYINDEX, d->ref);
|
||
|
luaL_error(L, "Unable to initialize timer");
|
||
|
}
|
||
|
|
||
|
platform_hw_timer_set_func(TIMER_OWNER, hw_timer_cb, 0);
|
||
|
platform_hw_timer_arm_us(TIMER_OWNER, 50);
|
||
|
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
static int perf_stop(lua_State *L)
|
||
|
{
|
||
|
if (!data) {
|
||
|
return 0;
|
||
|
}
|
||
|
|
||
|
// stop the timer
|
||
|
platform_hw_timer_close(TIMER_OWNER);
|
||
|
|
||
|
DATA *d = data;
|
||
|
data = NULL;
|
||
|
|
||
|
lua_pushunsigned(L, d->total_samples);
|
||
|
lua_pushunsigned(L, d->outside_samples);
|
||
|
lua_newtable(L);
|
||
|
int i;
|
||
|
uint32_t addr = d->start;
|
||
|
for (i = 0; i < d->bucket_count; i++, addr += (1 << d->bucket_shift)) {
|
||
|
if (d->bucket[i]) {
|
||
|
lua_pushunsigned(L, addr);
|
||
|
lua_pushunsigned(L, d->bucket[i]);
|
||
|
lua_settable(L, -3);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
lua_pushunsigned(L, 1 << d->bucket_shift);
|
||
|
|
||
|
luaL_unref(L, LUA_REGISTRYINDEX, d->ref);
|
||
|
|
||
|
return 4;
|
||
|
}
|
||
|
|
||
|
LROT_BEGIN(perf, NULL, 0)
|
||
|
LROT_FUNCENTRY( start, perf_start )
|
||
|
LROT_FUNCENTRY( stop, perf_stop )
|
||
|
LROT_END(perf, NULL, 0)
|
||
|
|
||
|
|
||
|
NODEMCU_MODULE(PERF, "perf", perf, NULL);
|