Fix thread tracking when decoding Coresight trace and add a new test for it.
Unfortunately the test has to be added as a separate binary like the other existing Coresight workloads which needs a bit of makefile boilerplate. Ideally it would be a builtin Perf test workload, but Perf has a lot of dependencies and generates a lot of trace when starting up which makes tracing tests very slow because all that has to be decoded.
Hopefully I can find a generic way to enable Perf events at the beginning of the workload and then I can migrate everything from tools/perf/tests/shell/coresight to Perf test workloads, but that will have to be done as a separate change.
Signed-off-by: James Clark james.clark@linaro.org --- James Clark (1): perf test cs-etm: Test thread attribution
Leo Yan (1): perf cs-etm: Queue context packets for frontend
tools/perf/tests/shell/coresight/Makefile | 1 + .../shell/coresight/context_switch_loop/.gitignore | 1 + .../shell/coresight/context_switch_loop/Makefile | 29 ++++ .../context_switch_loop/context_switch_loop.c | 83 ++++++++++ .../tests/shell/coresight/context_switch_thread.sh | 48 ++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 18 +- tools/perf/util/cs-etm.c | 181 +++++++++++++-------- tools/perf/util/cs-etm.h | 5 +- 8 files changed, 295 insertions(+), 71 deletions(-) --- base-commit: 09d355618f7ccc27ffc7fc668b2e232872962079 change-id: 20260515-james-cs-context-tracking-fix-754998bae7ed
Best regards,
From: Leo Yan leo.yan@arm.com
PE_CONTEXT elements update the context ID and exception level, but the decoder may still have prior packets cached for frontend processing. Updating the context immediately in the decoder backend can make those cached packets get consumed with the wrong thread or EL state.
Add a CS_ETM_CONTEXT packet carrying the TID and EL to the frontend, this keeps context changes ordered with the rest of the packet stream and avoids mismatches when synthesizing samples from cached packets.
Separate the memory access function into one for the frontend and one for decoding. The frontend also needs memory access to attach the instruction to samples.
Treat context packets as a boundary for branch sample generation and remove tidq->prev_packet_thread because it's not possible to branch to a different thread, so only tracking the current thread is required for sample generation.
Reported-by: Amir Ayupov aaupov@meta.com Closes: https://lore.kernel.org/linux-perf-users/20260515021135.1729028-1-aaupov@met... Co-authored-by: James Clark james.clark@linaro.org Signed-off-by: Leo Yan leo.yan@arm.com --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 18 ++- tools/perf/util/cs-etm.c | 181 +++++++++++++++--------- tools/perf/util/cs-etm.h | 5 +- 3 files changed, 133 insertions(+), 71 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index dee3020ceaa9..0b9e316b3bf4 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -525,6 +525,8 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, const ocsd_generic_trace_elem *elem, const uint8_t trace_chan_id) { + struct cs_etm_packet *packet; + int ret; pid_t tid = -1;
/* @@ -546,12 +548,18 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, break; }
- if (cs_etm__etmq_set_tid_el(etmq, tid, trace_chan_id, - elem->context.exception_level)) + if (cs_etm__etmq_set_decode_tid_el(etmq, tid, trace_chan_id, + elem->context.exception_level)) return OCSD_RESP_FATAL_SYS_ERR;
- if (tid == -1) - return OCSD_RESP_CONT; + ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id, + CS_ETM_CONTEXT); + if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT) + return ret; + + packet = &packet_queue->packet_buffer[packet_queue->tail]; + packet->tid = tid; + packet->el = elem->context.exception_level;
/* * A timestamp is generated after a PE_CONTEXT element so make sure @@ -559,7 +567,7 @@ cs_etm_decoder__set_tid(struct cs_etm_queue *etmq, */ cs_etm_decoder__reset_timestamp(packet_queue);
- return OCSD_RESP_CONT; + return ret; }
static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 8a639d2e51a4..e22196418e8a 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -85,10 +85,11 @@ struct cs_etm_traceid_queue { u64 period_instructions; size_t last_branch_pos; union perf_event *event_buf; - struct thread *thread; - struct thread *prev_packet_thread; - ocsd_ex_level prev_packet_el; - ocsd_ex_level el; + struct thread *frontend_thread; + struct thread *decode_thread; + ocsd_ex_level prev_frontend_el; + ocsd_ex_level frontend_el; + ocsd_ex_level decode_el; struct branch_stack *last_branch; struct branch_stack *last_branch_rb; struct cs_etm_packet *prev_packet; @@ -614,10 +615,12 @@ static int cs_etm__init_traceid_queue(struct cs_etm_queue *etmq,
queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; tidq->trace_chan_id = trace_chan_id; - tidq->el = tidq->prev_packet_el = ocsd_EL_unknown; - tidq->thread = machine__findnew_thread(&etm->session->machines.host, -1, + tidq->frontend_el = tidq->decode_el = ocsd_EL_unknown; + tidq->frontend_thread = machine__findnew_thread(&etm->session->machines.host, -1, queue->tid); - tidq->prev_packet_thread = machine__idle_thread(&etm->session->machines.host); + tidq->decode_thread = machine__findnew_thread(&etm->session->machines.host, -1, + queue->tid); +
tidq->packet = zalloc(sizeof(struct cs_etm_packet)); if (!tidq->packet) @@ -751,20 +754,17 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm, * Swap PACKET with PREV_PACKET: PACKET becomes PREV_PACKET for * the next incoming packet. * - * Threads and exception levels are also tracked for both the - * previous and current packets. This is because the previous - * packet is used for the 'from' IP for branch samples, so the - * thread at that time must also be assigned to that sample. - * Across discontinuity packets the thread can change, so by - * tracking the thread for the previous packet the branch sample - * will have the correct info. + * Track Exception levels for both the previous and current + * packets. This is because the previous packet's address is + * used for the 'from' IP for branch samples, so the previous EL + * must also be used so that sample shows it originates from the + * correct EL. Branches can't branch to a different thread, so + * no need to track the previous thread. */ tmp = tidq->packet; tidq->packet = tidq->prev_packet; tidq->prev_packet = tmp; - tidq->prev_packet_el = tidq->el; - thread__put(tidq->prev_packet_thread); - tidq->prev_packet_thread = thread__get(tidq->thread); + tidq->prev_frontend_el = tidq->frontend_el; } }
@@ -937,8 +937,8 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
/* Free this traceid_queue from the array */ tidq = etmq->traceid_queues[idx]; - thread__zput(tidq->thread); - thread__zput(tidq->prev_packet_thread); + thread__zput(tidq->frontend_thread); + thread__zput(tidq->decode_thread); zfree(&tidq->event_buf); zfree(&tidq->last_branch); zfree(&tidq->last_branch_rb); @@ -1083,9 +1083,10 @@ static u8 cs_etm__cpu_mode(struct cs_etm_queue *etmq, u64 address, } }
-static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, - u64 address, size_t size, u8 *buffer, - const ocsd_mem_space_acc_t mem_space) +static u32 __cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, + u64 address, size_t size, u8 *buffer, + const ocsd_mem_space_acc_t mem_space, + bool is_decoder) { u8 cpumode; u64 offset; @@ -1094,6 +1095,8 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, struct dso *dso; struct cs_etm_traceid_queue *tidq; int ret = 0; + ocsd_ex_level el; + struct thread *thread;
if (!etmq) return 0; @@ -1103,27 +1106,35 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, if (!tidq) goto out;
+ if (is_decoder) { + el = tidq->decode_el; + thread = tidq->decode_thread; + } else { + el = tidq->frontend_el; + thread = tidq->frontend_thread; + } + /* - * We've already tracked EL along side the PID in cs_etm__set_thread() - * so double check that it matches what OpenCSD thinks as well. It - * doesn't distinguish between EL0 and EL1 for this mem access callback - * so we had to do the extra tracking. Skip validation if it's any of - * the 'any' values. + * We've already tracked EL along side the PID in + * cs_etm__etmq_set_[decode/frontend]_tid_el() so double check that it + * matches what OpenCSD thinks as well. OpenCSD doesn't distinguish + * between EL0 and EL1 for this mem access callback so we had to do the + * extra tracking. Skip validation if it's any of the 'any' values. */ if (!(mem_space == OCSD_MEM_SPACE_ANY || mem_space == OCSD_MEM_SPACE_N || mem_space == OCSD_MEM_SPACE_S)) { if (mem_space & OCSD_MEM_SPACE_EL1N) { /* Includes both non secure EL1 and EL0 */ - assert(tidq->el == ocsd_EL1 || tidq->el == ocsd_EL0); + assert(el == ocsd_EL1 || el == ocsd_EL0); } else if (mem_space & OCSD_MEM_SPACE_EL2) - assert(tidq->el == ocsd_EL2); + assert(el == ocsd_EL2); else if (mem_space & OCSD_MEM_SPACE_EL3) - assert(tidq->el == ocsd_EL3); + assert(el == ocsd_EL3); }
- cpumode = cs_etm__cpu_mode(etmq, address, tidq->el); + cpumode = cs_etm__cpu_mode(etmq, address, el);
- if (!thread__find_map(tidq->thread, cpumode, address, &al)) + if (!thread__find_map(thread, cpumode, address, &al)) goto out;
dso = map__dso(al.map); @@ -1138,7 +1149,7 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
map__load(al.map);
- len = dso__data_read_offset(dso, maps__machine(thread__maps(tidq->thread)), + len = dso__data_read_offset(dso, maps__machine(thread__maps(thread)), offset, buffer, size);
if (len <= 0) { @@ -1158,6 +1169,20 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, return ret; }
+static u32 cs_etm__frontend_mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, + u64 address, size_t size, u8 *buffer) +{ + return __cs_etm__mem_access(etmq, trace_chan_id, address, size, buffer, + OCSD_MEM_SPACE_NONE, false); +} +static u32 cs_etm__decoder_mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id, + u64 address, size_t size, u8 *buffer, + const ocsd_mem_space_acc_t mem_space) +{ + return __cs_etm__mem_access(etmq, trace_chan_id, address, size, buffer, + mem_space, true); +} + static struct cs_etm_queue *cs_etm__alloc_queue(void) { struct cs_etm_queue *etmq = zalloc(sizeof(*etmq)); @@ -1337,8 +1362,8 @@ static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq, { u8 instrBytes[2];
- cs_etm__mem_access(etmq, trace_chan_id, addr, ARRAY_SIZE(instrBytes), - instrBytes, 0); + cs_etm__frontend_mem_access(etmq, trace_chan_id, addr, + ARRAY_SIZE(instrBytes), instrBytes); /* * T32 instruction size is indicated by bits[15:11] of the first * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111 @@ -1472,34 +1497,55 @@ cs_etm__get_trace(struct cs_etm_queue *etmq) return etmq->buf_len; }
-static void cs_etm__set_thread(struct cs_etm_queue *etmq, - struct cs_etm_traceid_queue *tidq, pid_t tid, - ocsd_ex_level el) +/* + * Set the TID and EL from the latest packet pulled out of the + * cs_etm_packet_queue. Used for generating samples by the frontend. + */ +static int cs_etm__etmq_set_frontend_tid_el(struct cs_etm_queue *etmq, + struct cs_etm_traceid_queue *tidq) { - struct machine *machine = cs_etm__get_machine(etmq, el); + struct cs_etm_packet *packet = tidq->packet;
- if (tid != -1) { - thread__zput(tidq->thread); - tidq->thread = machine__find_thread(machine, -1, tid); + struct machine *machine = cs_etm__get_machine(etmq, packet->el); + + if (packet->tid != -1) { + thread__zput(tidq->frontend_thread); + tidq->frontend_thread = machine__find_thread(machine, -1, packet->tid); }
/* Couldn't find a known thread */ - if (!tidq->thread) - tidq->thread = machine__idle_thread(machine); + if (!tidq->frontend_thread) + tidq->frontend_thread = machine__idle_thread(machine);
- tidq->el = el; + tidq->frontend_el = packet->el; + + return 0; }
-int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, - u8 trace_chan_id, ocsd_ex_level el) +/* + * Set the TID and EL used to access memory for decoding. This is ahead + * of the TID and EL for generating samples + */ +int cs_etm__etmq_set_decode_tid_el(struct cs_etm_queue *etmq, pid_t tid, + u8 trace_chan_id, ocsd_ex_level el) { struct cs_etm_traceid_queue *tidq; + struct machine *machine = cs_etm__get_machine(etmq, el);
tidq = cs_etm__etmq_get_traceid_queue(etmq, trace_chan_id); if (!tidq) return -EINVAL;
- cs_etm__set_thread(etmq, tidq, tid, el); + if (tid != -1) { + thread__zput(tidq->decode_thread); + tidq->decode_thread = machine__find_thread(machine, -1, tid); + } + + /* Couldn't find a known thread */ + if (!tidq->decode_thread) + tidq->decode_thread = machine__idle_thread(machine); + + tidq->decode_el = el; return 0; }
@@ -1533,8 +1579,8 @@ static void cs_etm__copy_insn(struct cs_etm_queue *etmq, else sample->insn_len = 4;
- cs_etm__mem_access(etmq, trace_chan_id, sample->ip, sample->insn_len, - (void *)sample->insn, 0); + cs_etm__frontend_mem_access(etmq, trace_chan_id, sample->ip, + sample->insn_len, (void *)sample->insn); }
u64 cs_etm__convert_sample_time(struct cs_etm_queue *etmq, u64 cs_timestamp) @@ -1570,15 +1616,15 @@ static int cs_etm__synth_instruction_sample(struct cs_etm_queue *etmq,
perf_sample__init(&sample, /*all=*/true); event->sample.header.type = PERF_RECORD_SAMPLE; - event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->el); + event->sample.header.misc = cs_etm__cpu_mode(etmq, addr, tidq->frontend_el); event->sample.header.size = sizeof(struct perf_event_header);
/* Set time field based on etm auxtrace config. */ sample.time = cs_etm__resolve_sample_time(etmq, tidq);
sample.ip = addr; - sample.pid = thread__pid(tidq->thread); - sample.tid = thread__tid(tidq->thread); + sample.pid = thread__pid(tidq->frontend_thread); + sample.tid = thread__tid(tidq->frontend_thread); sample.id = etmq->etm->instructions_id; sample.stream_id = etmq->etm->instructions_id; sample.period = period; @@ -1631,15 +1677,15 @@ static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq,
event->sample.header.type = PERF_RECORD_SAMPLE; event->sample.header.misc = cs_etm__cpu_mode(etmq, ip, - tidq->prev_packet_el); + tidq->prev_frontend_el); event->sample.header.size = sizeof(struct perf_event_header);
/* Set time field based on etm auxtrace config. */ sample.time = cs_etm__resolve_sample_time(etmq, tidq);
sample.ip = ip; - sample.pid = thread__pid(tidq->prev_packet_thread); - sample.tid = thread__tid(tidq->prev_packet_thread); + sample.pid = thread__pid(tidq->frontend_thread); + sample.tid = thread__tid(tidq->frontend_thread); sample.addr = cs_etm__first_executed_instr(tidq->packet); sample.id = etmq->etm->branches_id; sample.stream_id = etmq->etm->branches_id; @@ -1874,7 +1920,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq, bool generate_sample = false;
/* Generate sample for tracing on packet */ - if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY) + if (tidq->prev_packet->sample_type == CS_ETM_DISCONTINUITY || + tidq->prev_packet->sample_type == CS_ETM_CONTEXT) generate_sample = true;
/* Generate sample for branch taken packet */ @@ -2057,8 +2104,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * so below only read 2 bytes as instruction size for T32. */ addr = end_addr - 2; - cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr16), - (u8 *)&instr16, 0); + cs_etm__frontend_mem_access(etmq, trace_chan_id, addr, + sizeof(instr16), (u8 *)&instr16); if ((instr16 & 0xFF00) == 0xDF00) return true;
@@ -2073,8 +2120,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * +---------+---------+-------------------------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), - (u8 *)&instr32, 0); + cs_etm__frontend_mem_access(etmq, trace_chan_id, addr, + sizeof(instr32), (u8 *)&instr32); if ((instr32 & 0x0F000000) == 0x0F000000 && (instr32 & 0xF0000000) != 0xF0000000) return true; @@ -2090,8 +2137,8 @@ static bool cs_etm__is_svc_instr(struct cs_etm_queue *etmq, u8 trace_chan_id, * +-----------------------+---------+-----------+ */ addr = end_addr - 4; - cs_etm__mem_access(etmq, trace_chan_id, addr, sizeof(instr32), - (u8 *)&instr32, 0); + cs_etm__frontend_mem_access(etmq, trace_chan_id, addr, + sizeof(instr32), (u8 *)&instr32); if ((instr32 & 0xFFE0001F) == 0xd4000001) return true;
@@ -2376,6 +2423,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq, PERF_IP_FLAG_RETURN | PERF_IP_FLAG_INTERRUPT; break; + case CS_ETM_CONTEXT: case CS_ETM_EMPTY: default: break; @@ -2451,6 +2499,9 @@ static int cs_etm__process_traceid_queue(struct cs_etm_queue *etmq, */ cs_etm__sample(etmq, tidq); break; + case CS_ETM_CONTEXT: + cs_etm__etmq_set_frontend_tid_el(etmq, tidq); + break; case CS_ETM_EXCEPTION: case CS_ETM_EXCEPTION_RET: /* @@ -2602,7 +2653,7 @@ static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, if (!tidq) continue;
- if (tid == -1 || thread__tid(tidq->thread) == tid) + if (tid == -1 || thread__tid(tidq->frontend_thread) == tid) cs_etm__run_per_thread_timeless_decoder(etmq); } else cs_etm__run_per_cpu_timeless_decoder(etmq); @@ -3310,7 +3361,7 @@ static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq) */ if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, 0x0L, ((u64) -1L), - cs_etm__mem_access)) + cs_etm__decoder_mem_access)) goto out_free_decoder;
zfree(&t_params); diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index aa9bb4a32eca..0c6dac9ebd11 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -158,6 +158,7 @@ enum cs_etm_sample_type { CS_ETM_DISCONTINUITY, CS_ETM_EXCEPTION, CS_ETM_EXCEPTION_RET, + CS_ETM_CONTEXT, };
enum cs_etm_isa { @@ -184,6 +185,8 @@ struct cs_etm_packet { u8 last_instr_size; u8 trace_chan_id; int cpu; + pid_t tid; + int el; };
#define CS_ETM_PACKET_MAX_BUFFER 1024 @@ -259,7 +262,7 @@ enum cs_etm_pid_fmt { #include <opencsd/ocsd_if_types.h> int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu); enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq); -int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid, +int cs_etm__etmq_set_decode_tid_el(struct cs_etm_queue *etmq, pid_t tid, u8 trace_chan_id, ocsd_ex_level el); bool cs_etm__etmq_is_timeless(struct cs_etm_queue *etmq); void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
Add a workload that creates two threads that read and write to a pipe so that every read/write results in the other thread being scheduled. If this is run on one CPU then the threads will alternate and we can test that processor trace always attributes symbols to the correct thread and that the attribution changes at the exact point that the thread switch happened.
Signed-off-by: James Clark james.clark@linaro.org --- tools/perf/tests/shell/coresight/Makefile | 1 + .../shell/coresight/context_switch_loop/.gitignore | 1 + .../shell/coresight/context_switch_loop/Makefile | 29 ++++++++ .../context_switch_loop/context_switch_loop.c | 83 ++++++++++++++++++++++ .../tests/shell/coresight/context_switch_thread.sh | 48 +++++++++++++ 5 files changed, 162 insertions(+)
diff --git a/tools/perf/tests/shell/coresight/Makefile b/tools/perf/tests/shell/coresight/Makefile index fa08fd9a5991..710e742d9def 100644 --- a/tools/perf/tests/shell/coresight/Makefile +++ b/tools/perf/tests/shell/coresight/Makefile @@ -6,6 +6,7 @@ include ../../../../../tools/scripts/utilities.mak
SUBDIRS = \ asm_pure_loop \ + context_switch_loop \ memcpy_thread \ thread_loop \ unroll_loop_thread diff --git a/tools/perf/tests/shell/coresight/context_switch_loop/.gitignore b/tools/perf/tests/shell/coresight/context_switch_loop/.gitignore new file mode 100644 index 000000000000..86a7becebba4 --- /dev/null +++ b/tools/perf/tests/shell/coresight/context_switch_loop/.gitignore @@ -0,0 +1 @@ +context_switch_loop diff --git a/tools/perf/tests/shell/coresight/context_switch_loop/Makefile b/tools/perf/tests/shell/coresight/context_switch_loop/Makefile new file mode 100644 index 000000000000..722afc993cb0 --- /dev/null +++ b/tools/perf/tests/shell/coresight/context_switch_loop/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 + +include ../Makefile.miniconfig + +BIN=context_switch_loop +LIB= + +all: $(BIN) + +$(BIN): $(BIN).c +ifdef CORESIGHT +ifeq ($(ARCH),arm64) + $(Q)$(CC) $(CFLAGS) $(BIN).c -O0 -o $(BIN) $(LIB) +endif +endif + +install-tests: all +ifdef CORESIGHT +ifeq ($(ARCH),arm64) + $(call QUIET_INSTALL, tests) \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)'; \ + $(INSTALL) $(BIN) '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/$(INSTDIR_SUB)/$(BIN)/$(BIN)' +endif +endif + +clean: + $(Q)$(RM) -f $(BIN) + +.PHONY: all clean install-tests diff --git a/tools/perf/tests/shell/coresight/context_switch_loop/context_switch_loop.c b/tools/perf/tests/shell/coresight/context_switch_loop/context_switch_loop.c new file mode 100644 index 000000000000..6b7cd11952a2 --- /dev/null +++ b/tools/perf/tests/shell/coresight/context_switch_loop/context_switch_loop.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/compiler.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/prctl.h> +#include <sys/wait.h> +#include <unistd.h> + +static int loops = 100; +static char buf; +int work = 1234; + +#define write_block(fd) \ + do { \ + if (write(fd, &buf, 1) <= 0) \ + exit(1); \ + } while (0) + +#define read_block(fd) \ + do { \ + if (read(fd, &buf, 1) <= 0) \ + exit(1); \ + } while (0) + +static noinline void thread1(int in_fd, int out_fd) +{ + for (int i = 0; i < loops; i++) { + read_block(in_fd); + work += i * 3; + write_block(out_fd); + } +} + +static noinline void thread2(int in_fd, int out_fd) +{ + for (int i = 0; i < loops; i++) { + write_block(out_fd); + work += i * 7; + read_block(in_fd); + } +} + +int main(int argc, char **argv) +{ + int a_to_b[2], b_to_a[2]; + pid_t thread1_pid; + int status; + + if (argc > 1) { + loops = atoi(argv[1]); + if (loops < 0) { + fprintf(stderr, "Invalid number of loops: %s\n", argv[1]); + return 1; + } + } + + if (pipe(a_to_b) || pipe(b_to_a)) { + perror("Pipe error"); + return 1; + } + + thread1_pid = fork(); + if (thread1_pid < 0) { + perror("Fork error"); + return 1; + } + + if (!thread1_pid) { + prctl(PR_SET_NAME, "thread1", 0, 0, 0); + thread1(b_to_a[0], a_to_b[1]); + exit(0); + } + + prctl(PR_SET_NAME, "thread2", 0, 0, 0); + thread2(a_to_b[0], b_to_a[1]); + + if (waitpid(thread1_pid, &status, 0) != thread1_pid || !WIFEXITED(status) || + WEXITSTATUS(status)) + return 1; + + return 0; +} diff --git a/tools/perf/tests/shell/coresight/context_switch_thread.sh b/tools/perf/tests/shell/coresight/context_switch_thread.sh new file mode 100755 index 000000000000..10b4118dff9b --- /dev/null +++ b/tools/perf/tests/shell/coresight/context_switch_thread.sh @@ -0,0 +1,48 @@ +#!/bin/bash -e +# CoreSight / Context switch thread attribution (exclusive) + +# SPDX-License-Identifier: GPL-2.0 + +TEST="context_switch_loop" + +if [ "$(id -u)" != 0 ]; then + # Requires root for "-C 0" in record command + echo "[Skip] No root permission" + exit 2 +fi + +# shellcheck source=../lib/coresight.sh +. "$(dirname $0)"/../lib/coresight.sh + +DATA="$DATD/perf-$TEST.data" +SCRIPT="$DATD/perf-$TEST.script" + +check_samples() { + owner_samples=$(grep -c "thread1.*thread1" "$SCRIPT" || true) + next_samples=$(grep -c "thread2.*thread2" "$SCRIPT" || true) + + if [ "$owner_samples" -eq 0 ] || [ "$next_samples" -eq 0 ]; then + err "No samples found" + fi + + if grep "thread2.*thread1" "$SCRIPT"; then + err "Thread1 symbol was attributed to thread2" + fi + + if grep "thread1.*thread2" "$SCRIPT"; then + err "Thread2 symbol was attributed to thread1" + fi +} + +# Pin to one CPU so the two threads alternate running but record into the +# same trace buffer. +perf record -o "$DATA" -e cs_etm/timestamp=0/u -C 0 \ + -- taskset --cpu-list 0 "$BIN" 20 > /dev/null 2>&1 + +# Test both instruction and branch sample generation modes. +perf script -i "$DATA" --itrace=i4 -F comm,pid,tid,ip,sym > "$SCRIPT" 2>/dev/null +check_samples +perf script -i "$DATA" --itrace=b -F comm,pid,tid,ip,sym > "$SCRIPT" 2>/dev/null +check_samples + +exit 0