CS ETM maintains its own circular array for last branch entries, with local helpers to update, copy and reset the branch stack. This duplicates logic already provided by the common code.
Record branch with thread_stack__event() and synthesize branch stack with thread_stack__br_sample(). This removes the local last_branch_rb buffer and position tracking. Keep the buffer number updated via thread_stack__set_trace_nr(), which is used when exporting samples to Python scripts.
The output should remain same, except that be->flags.predicted is no longer set. Since CoreSight trace does not provide branch prediction information, clearing the flag avoids confusion.
Signed-off-by: Leo Yan leo.yan@arm.com --- tools/perf/util/cs-etm.c | 152 +++++++++++++---------------------------------- 1 file changed, 41 insertions(+), 111 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 5bff8811d61e423463b7bd4e20d599d5b5307a1a..398ab3b7a429d402cc8e5f6cccb35c0b7c253732 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -83,14 +83,13 @@ struct cs_etm_auxtrace { struct cs_etm_traceid_queue { u8 trace_chan_id; u64 period_instructions; - size_t last_branch_pos; union perf_event *event_buf; struct thread *thread; struct thread *prev_packet_thread; ocsd_ex_level prev_packet_el; ocsd_ex_level el; + unsigned int br_stack_sz; struct branch_stack *last_branch; - struct branch_stack *last_branch_rb; struct cs_etm_packet *prev_packet; struct cs_etm_packet *packet; struct cs_etm_packet_queue packet_queue; @@ -635,9 +634,8 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, tidq->last_branch = zalloc(sz); if (!tidq->last_branch) goto out_free; - tidq->last_branch_rb = zalloc(sz); - if (!tidq->last_branch_rb) - goto out_free; + + tidq->br_stack_sz = etm->synth_opts.last_branch_sz; }
tidq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); @@ -647,7 +645,6 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq, return 0;
out_free: - zfree(&tidq->last_branch_rb); zfree(&tidq->last_branch); zfree(&tidq->prev_packet); zfree(&tidq->packet); @@ -941,7 +938,6 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq) thread__zput(tidq->prev_packet_thread); zfree(&tidq->event_buf); zfree(&tidq->last_branch); - zfree(&tidq->last_branch_rb); zfree(&tidq->prev_packet); zfree(&tidq->packet); zfree(&tidq); @@ -1281,57 +1277,6 @@ static int cs_etm__queue_first_cs_timestamp(struct cs_etm_auxtrace *etm, return ret; }
-static inline -void cs_etm__copy_last_branch_rb(struct cs_etm_queue *etmq, - struct cs_etm_traceid_queue *tidq) -{ - struct branch_stack *bs_src = tidq->last_branch_rb; - struct branch_stack *bs_dst = tidq->last_branch; - size_t nr = 0; - - /* - * Set the number of records before early exit: ->nr is used to - * determine how many branches to copy from ->entries. - */ - bs_dst->nr = bs_src->nr; - - /* - * Early exit when there is nothing to copy. - */ - if (!bs_src->nr) - return; - - /* - * As bs_src->entries is a circular buffer, we need to copy from it in - * two steps. First, copy the branches from the most recently inserted - * branch ->last_branch_pos until the end of bs_src->entries buffer. - */ - nr = etmq->etm->synth_opts.last_branch_sz - tidq->last_branch_pos; - memcpy(&bs_dst->entries[0], - &bs_src->entries[tidq->last_branch_pos], - sizeof(struct branch_entry) * nr); - - /* - * If we wrapped around at least once, the branches from the beginning - * of the bs_src->entries buffer and until the ->last_branch_pos element - * are older valid branches: copy them over. The total number of - * branches copied over will be equal to the number of branches asked by - * the user in last_branch_sz. - */ - if (bs_src->nr >= etmq->etm->synth_opts.last_branch_sz) { - memcpy(&bs_dst->entries[nr], - &bs_src->entries[0], - sizeof(struct branch_entry) * tidq->last_branch_pos); - } -} - -static inline -void cs_etm__reset_last_branch_rb(struct cs_etm_traceid_queue *tidq) -{ - tidq->last_branch_pos = 0; - tidq->last_branch_rb->nr = 0; -} - static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 addr) { @@ -1400,38 +1345,6 @@ static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq, return addr; }
-static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq, - struct cs_etm_traceid_queue *tidq) -{ - struct branch_stack *bs = tidq->last_branch_rb; - struct branch_entry *be; - - /* - * The branches are recorded in a circular buffer in reverse - * chronological order: we start recording from the last element of the - * buffer down. After writing the first element of the stack, move the - * insert position back to the end of the buffer. - */ - if (!tidq->last_branch_pos) - tidq->last_branch_pos = etmq->etm->synth_opts.last_branch_sz; - - tidq->last_branch_pos -= 1; - - be = &bs->entries[tidq->last_branch_pos]; - be->from = cs_etm__last_executed_instr(tidq->prev_packet); - be->to = cs_etm__first_executed_instr(tidq->packet); - /* No support for mispredict */ - be->flags.mispred = 0; - be->flags.predicted = 1; - - /* - * Increment bs->nr until reaching the number of last branches asked by - * the user on the command line. - */ - if (bs->nr < etmq->etm->synth_opts.last_branch_sz) - bs->nr += 1; -} - static int cs_etm__inject_event(struct cs_etm_auxtrace *etm, union perf_event *event, struct perf_sample *sample, u64 type) { @@ -1579,6 +1492,37 @@ static inline u64 cs_etm__resolve_sample_time(struct cs_etm_queue *etmq, return etm->latest_kernel_timestamp; }
+static void cs_etm__add_stack_event(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) +{ + u64 from, to; + int size; + + if (!tidq->prev_packet->last_instr_taken_branch) + return; + + if (tidq->prev_packet->sample_type != CS_ETM_RANGE || + tidq->packet->sample_type != CS_ETM_RANGE) + return; + + if (etmq->etm->synth_opts.last_branch) { + from = cs_etm__last_executed_instr(tidq->prev_packet); + to = cs_etm__first_executed_instr(tidq->packet); + + size = cs_etm__instr_size(etmq, tidq->trace_chan_id, + tidq->prev_packet->isa, from); + + /* Enable callchain so thread stack entry can be allocated */ + thread_stack__event(tidq->thread, tidq->prev_packet->cpu, + tidq->prev_packet->flags, from, to, size, + etmq->buffer->buffer_nr + 1, true, + tidq->br_stack_sz, 0); + } else { + thread_stack__set_trace_nr(tidq->thread, tidq->prev_packet->cpu, + etmq->buffer->buffer_nr + 1); + } +} + static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq, struct cs_etm_traceid_queue *tidq, u64 addr, u64 period) @@ -1608,8 +1552,12 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
cs_etm__copy_insn(etmq, tidq->trace_chan_id, tidq->packet, &sample);
- if (etm->synth_opts.last_branch) + if (etm->synth_opts.last_branch) { + thread_stack__br_sample(tidq->thread, tidq->packet->cpu, + tidq->last_branch, + tidq->br_stack_sz); sample.branch_stack = tidq->last_branch; + }
if (etm->synth_opts.inject) { ret = cs_etm__inject_event(etm, event, &sample, @@ -1798,14 +1746,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq,
tidq->period_instructions += tidq->packet->instr_count;
- /* - * Record a branch when the last instruction in - * PREV_PACKET is a branch. - */ - if (etm->synth_opts.last_branch && - tidq->prev_packet->sample_type == CS_ETM_RANGE && - tidq->prev_packet->last_instr_taken_branch) - cs_etm__update_last_branch_rb(etmq, tidq); + cs_etm__add_stack_event(etmq, tidq);
if (etm->synth_opts.instructions && tidq->period_instructions >= etm->instructions_sample_period) { @@ -1864,10 +1805,6 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, u64 offset = etm->instructions_sample_period - instrs_prev; u64 addr;
- /* Prepare last branches for instruction sample */ - if (etm->synth_opts.last_branch) - cs_etm__copy_last_branch_rb(etmq, tidq); - while (tidq->period_instructions >= etm->instructions_sample_period) { /* @@ -1947,10 +1884,6 @@ static int cs_etm__flush(struct cs_etm_queue *etmq, etmq->etm->synth_opts.instructions && tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr; - - /* Prepare last branches for instruction sample */ - cs_etm__copy_last_branch_rb(etmq, tidq); - /* * Generate a last branch event for the branches left in the * circular buffer at the end of the trace. @@ -1982,7 +1915,7 @@ static int cs_etm__flush(struct cs_etm_queue *etmq,
/* Reset last branches after flush the trace */ if (etm->synth_opts.last_branch) - cs_etm__reset_last_branch_rb(tidq); + thread_stack__flush(tidq->thread);
return err; } @@ -2006,9 +1939,6 @@ static int cs_etm__end_block(struct cs_etm_queue *etmq, tidq->prev_packet->sample_type == CS_ETM_RANGE) { u64 addr;
- /* Prepare last branches for instruction sample */ - cs_etm__copy_last_branch_rb(etmq, tidq); - /* * Use the address of the end of the last reported execution * range.