From 6b1a27520f990700d0dd23976afd8992bf659698 Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Tue, 14 Apr 2015 13:49:33 -0400
Subject: [PATCH 001/305] perf kmem: Fix compiles on RHEL6/OL6
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

0d68bc92c48 breaks compiles on RHEL6/OL6:
    cc1: warnings being treated as errors
    builtin-kmem.c: In function ‘search_page_alloc_stat’:
    builtin-kmem.c:322: error: declaration of ‘stat’ shadows a global declaration
                            node = &parent->rb_left;
    /usr/include/sys/stat.h:455: error: shadowed declaration is here
    builtin-kmem.c: In function ‘perf_evsel__process_page_alloc_event’:
    builtin-kmem.c:378: error: declaration of ‘stat’ shadows a global declaration
    /usr/include/sys/stat.h:455: error: shadowed declaration is here
    builtin-kmem.c: In function ‘perf_evsel__process_page_free_event’:
    builtin-kmem.c:431: error: declaration of ‘stat’ shadows a global declaration
    /usr/include/sys/stat.h:455: error: shadowed declaration is here

Rename local variable to pstat to avoid the name conflict.

Signed-off-by: David Ahern <david.ahern@oracle.com>
Link: http://lkml.kernel.org/r/1429033773-31383-1-git-send-email-david.ahern@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 54 +++++++++++++++++++--------------------
 1 file changed, 27 insertions(+), 27 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 63ea01349b6e..4f0f38462d97 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -319,7 +319,7 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
 	return 0;
 }
 
-static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool create)
+static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create)
 {
 	struct rb_node **node = &page_alloc_tree.rb_node;
 	struct rb_node *parent = NULL;
@@ -331,7 +331,7 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
 		parent = *node;
 		data = rb_entry(*node, struct page_stat, node);
 
-		cmp = page_stat_cmp(data, stat);
+		cmp = page_stat_cmp(data, pstat);
 		if (cmp < 0)
 			node = &parent->rb_left;
 		else if (cmp > 0)
@@ -345,10 +345,10 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *stat, bool cre
 
 	data = zalloc(sizeof(*data));
 	if (data != NULL) {
-		data->page = stat->page;
-		data->order = stat->order;
-		data->gfp_flags = stat->gfp_flags;
-		data->migrate_type = stat->migrate_type;
+		data->page = pstat->page;
+		data->order = pstat->order;
+		data->gfp_flags = pstat->gfp_flags;
+		data->migrate_type = pstat->migrate_type;
 
 		rb_link_node(&data->node, parent, node);
 		rb_insert_color(&data->node, &page_alloc_tree);
@@ -375,7 +375,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
 						       "migratetype");
 	u64 bytes = kmem_page_size << order;
-	struct page_stat *stat;
+	struct page_stat *pstat;
 	struct page_stat this = {
 		.order = order,
 		.gfp_flags = gfp_flags,
@@ -401,21 +401,21 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	 * This is to find the current page (with correct gfp flags and
 	 * migrate type) at free event.
 	 */
-	stat = search_page(page, true);
-	if (stat == NULL)
+	pstat = search_page(page, true);
+	if (pstat == NULL)
 		return -ENOMEM;
 
-	stat->order = order;
-	stat->gfp_flags = gfp_flags;
-	stat->migrate_type = migrate_type;
+	pstat->order = order;
+	pstat->gfp_flags = gfp_flags;
+	pstat->migrate_type = migrate_type;
 
 	this.page = page;
-	stat = search_page_alloc_stat(&this, true);
-	if (stat == NULL)
+	pstat = search_page_alloc_stat(&this, true);
+	if (pstat == NULL)
 		return -ENOMEM;
 
-	stat->nr_alloc++;
-	stat->alloc_bytes += bytes;
+	pstat->nr_alloc++;
+	pstat->alloc_bytes += bytes;
 
 	order_stats[order][migrate_type]++;
 
@@ -428,7 +428,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	u64 page;
 	unsigned int order = perf_evsel__intval(evsel, sample, "order");
 	u64 bytes = kmem_page_size << order;
-	struct page_stat *stat;
+	struct page_stat *pstat;
 	struct page_stat this = {
 		.order = order,
 	};
@@ -441,8 +441,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	nr_page_frees++;
 	total_page_free_bytes += bytes;
 
-	stat = search_page(page, false);
-	if (stat == NULL) {
+	pstat = search_page(page, false);
+	if (pstat == NULL) {
 		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 			  page, order);
 
@@ -453,18 +453,18 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	}
 
 	this.page = page;
-	this.gfp_flags = stat->gfp_flags;
-	this.migrate_type = stat->migrate_type;
+	this.gfp_flags = pstat->gfp_flags;
+	this.migrate_type = pstat->migrate_type;
 
-	rb_erase(&stat->node, &page_tree);
-	free(stat);
+	rb_erase(&pstat->node, &page_tree);
+	free(pstat);
 
-	stat = search_page_alloc_stat(&this, false);
-	if (stat == NULL)
+	pstat = search_page_alloc_stat(&this, false);
+	if (pstat == NULL)
 		return -ENOENT;
 
-	stat->nr_free++;
-	stat->free_bytes += bytes;
+	pstat->nr_free++;
+	pstat->free_bytes += bytes;
 
 	return 0;
 }

From cc5e461ae8e071a95bf6ee917b50998dc31cae17 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Sun, 19 Apr 2015 13:04:09 +0900
Subject: [PATCH 002/305] perf hists: Get rid of position field from struct
 hist_entry

It's not used anywhere, let's get rid of it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429416255-12070-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 846036a921dc..af192f172fa2 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -106,7 +106,6 @@ struct hist_entry {
 	u8			filtered;
 	char			*srcline;
 	struct symbol		*parent;
-	unsigned long		position;
 	struct rb_root		sorted_chain;
 	struct branch_info	*branch_info;
 	struct hists		*hists;

From a0b404f4c0820a934ae1b6ce39d8a4a0f01a7a20 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Sun, 19 Apr 2015 13:04:10 +0900
Subject: [PATCH 003/305] perf diff: Make hist_entry_diff fields union

The period_ratio_delta, period_ratio and wdiff are never by used at the
same time.  Instead, Just one of them is accessed according to a
comparison method.  So make it union to reduce memory footprint.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429416255-12070-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index af192f172fa2..de3303fe726d 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -58,15 +58,16 @@ struct he_stat {
 
 struct hist_entry_diff {
 	bool	computed;
+	union {
+		/* PERF_HPP__DELTA */
+		double	period_ratio_delta;
 
-	/* PERF_HPP__DELTA */
-	double	period_ratio_delta;
+		/* PERF_HPP__RATIO */
+		double	period_ratio;
 
-	/* PERF_HPP__RATIO */
-	double	period_ratio;
-
-	/* HISTC_WEIGHTED_DIFF */
-	s64	wdiff;
+		/* HISTC_WEIGHTED_DIFF */
+		s64	wdiff;
+	};
 };
 
 /**

From c21410554ee1a8e12fbc99f578d720b641cd1c74 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Sat, 18 Apr 2015 17:50:14 +0200
Subject: [PATCH 004/305] perf data: Show error message when conversion failed

Show message when errors occurred during conversion setup and conversion
process.

Before this patch:
  $ ./perf data convert --to-ctf=ctf
  $ echo $?
  255

After this patch:
  $ ./perf data convert --to-ctf=ctf
  Error during conversion setup.

Signed-off-by: He Kuang <hekuang@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-xvhr1vf7zav9kkeo9w1hv4uk@git.kernel.org
Link: http://lkml.kernel.org/r/1429372220-6406-2-git-send-email-jolsa@kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index dd17c9a32fbc..db4f709af18d 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -837,6 +837,8 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 	err = perf_session__process_events(session);
 	if (!err)
 		err = bt_ctf_stream_flush(cw->stream);
+	else
+		pr_err("Error during conversion.\n");
 
 	fprintf(stderr,
 		"[ perf data convert: Converted '%s' into CTF data '%s' ]\n",
@@ -847,11 +849,15 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 		(double) c.events_size / 1024.0 / 1024.0,
 		c.events_count);
 
-	/* its all good */
+	perf_session__delete(session);
+	ctf_writer__cleanup(cw);
+
+	return err;
+
 free_session:
 	perf_session__delete(session);
-
 free_writer:
 	ctf_writer__cleanup(cw);
+	pr_err("Error during conversion setup.\n");
 	return err;
 }

From d3542436c9a8d114bf81c0742e67ff8ce8be9324 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 18 Apr 2015 17:50:18 +0200
Subject: [PATCH 005/305] tools lib traceevent: Add alias field to struct
 format_field

Introduce an 'alias' field to 'struct format_field' to be able
to use alternative name for the field.

It is initialized with same string pointer as 'name' field.
The free logic checks the 'alias' pointer being reset by user
and frees it.

This will be handy when converting data into CTF, where each
field within event needs to have a unique name (while this
is not required for tracepoint). Converter can easily assign
unique name into the format_field struct.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/n/tip-qwyq8blnfkg6s5vlbrvn1en3@git.kernel.org
Link: http://lkml.kernel.org/r/1429372220-6406-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/event-parse.c | 6 ++++--
 tools/lib/traceevent/event-parse.h | 1 +
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c
index 12a7e2a40c89..1ec56b459bc0 100644
--- a/tools/lib/traceevent/event-parse.c
+++ b/tools/lib/traceevent/event-parse.c
@@ -1387,7 +1387,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 			do_warning_event(event, "%s: no type found", __func__);
 			goto fail;
 		}
-		field->name = last_token;
+		field->name = field->alias = last_token;
 
 		if (test_type(type, EVENT_OP))
 			goto fail;
@@ -1469,7 +1469,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f
 				size_dynamic = type_size(field->name);
 				free_token(field->name);
 				strcat(field->type, brackets);
-				field->name = token;
+				field->name = field->alias = token;
 				type = read_token(&token);
 			} else {
 				char *new_type;
@@ -6444,6 +6444,8 @@ void pevent_ref(struct pevent *pevent)
 void pevent_free_format_field(struct format_field *field)
 {
 	free(field->type);
+	if (field->alias != field->name)
+		free(field->alias);
 	free(field->name);
 	free(field);
 }
diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h
index 86a5839fb048..063b1971eb35 100644
--- a/tools/lib/traceevent/event-parse.h
+++ b/tools/lib/traceevent/event-parse.h
@@ -191,6 +191,7 @@ struct format_field {
 	struct event_format	*event;
 	char			*type;
 	char			*name;
+	char			*alias;
 	int			offset;
 	int			size;
 	unsigned int		arraylen;

From 379a9a28a1b4364ab08fec35c84d39cf4995c509 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 18 Apr 2015 22:34:38 +0200
Subject: [PATCH 006/305] tools build: No need to make libapi for perf
 explicitly

The perf build handles its dependencies by itself.

Also renaming libapi libapikfs to libapi as it got
changed just recently.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: linux-kbuild@vger.kernel.org
Link: http://lkml.kernel.org/r/1429389280-18720-2-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/Makefile b/tools/Makefile
index 9a617adc6675..01995cc8ac44 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -47,10 +47,10 @@ cgroup firewire hv guest usb virtio vm net: FORCE
 liblockdep: FORCE
 	$(call descend,lib/lockdep)
 
-libapikfs: FORCE
+libapi: FORCE
 	$(call descend,lib/api)
 
-perf: libapikfs FORCE
+perf: FORCE
 	$(call descend,$@)
 
 selftests: FORCE
@@ -97,10 +97,10 @@ cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clea
 liblockdep_clean:
 	$(call descend,lib/lockdep,clean)
 
-libapikfs_clean:
+libapi_clean:
 	$(call descend,lib/api,clean)
 
-perf_clean: libapikfs_clean
+perf_clean:
 	$(call descend,$(@:_clean=),clean)
 
 selftests_clean:

From 16671c1e1cac2dbb995feb4bc55082fd939b5c2c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 18 Apr 2015 22:34:39 +0200
Subject: [PATCH 007/305] tools build: Fix Makefile(s) to properly invoke tools
 build

Several fixes were needed to allow following builds:
  $ make tools/tmon
  $ make -C <kernelsrc> tools/perf
  $ make -C <kernelsrc>/tools perf

 - some of the tools (perf) use same make variables as in
   kernel build, unsetting srctree and objtree
 - using original $(O) for O variable
 - perf build does not follow the descend function setup
   invoking it via it's own make rule

I tried the rest of the tools/Makefile targets and they
seem to work now.

Reported-by: Brian Norris <computersforpeace@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: linux-kbuild@vger.kernel.org
Link: http://lkml.kernel.org/r/1429389280-18720-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 Makefile       |  5 ++---
 tools/Makefile | 12 +++++++++++-
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/Makefile b/Makefile
index da36a3be7969..b1ca0607ed2e 100644
--- a/Makefile
+++ b/Makefile
@@ -214,7 +214,6 @@ VPATH		:= $(srctree)$(if $(KBUILD_EXTMOD),:$(KBUILD_EXTMOD))
 
 export srctree objtree VPATH
 
-
 # SUBARCH tells the usermode build what the underlying arch is.  That is set
 # first, and if a usermode build is happening, the "ARCH=um" on the command
 # line overrides the setting of ARCH below.  If a native build is happening,
@@ -1504,11 +1503,11 @@ image_name:
 # Clear a bunch of variables before executing the submake
 tools/: FORCE
 	$(Q)mkdir -p $(objtree)/tools
-	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/
+	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/
 
 tools/%: FORCE
 	$(Q)mkdir -p $(objtree)/tools
-	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(objtree) subdir=tools -C $(src)/tools/ $*
+	$(Q)$(MAKE) LDFLAGS= MAKEFLAGS="$(filter --j% -j,$(MAKEFLAGS))" O=$(O) subdir=tools -C $(src)/tools/ $*
 
 # Single targets
 # ---------------------------------------------------------------------------
diff --git a/tools/Makefile b/tools/Makefile
index 01995cc8ac44..b35102721cbb 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -1,3 +1,8 @@
+# Some of the tools (perf) use same make variables
+# as in kernel build.
+export srctree=
+export objtree=
+
 include scripts/Makefile.include
 
 help:
@@ -50,8 +55,13 @@ liblockdep: FORCE
 libapi: FORCE
 	$(call descend,lib/api)
 
+# The perf build does not follow the descend function setup,
+# invoking it via it's own make rule.
+PERF_O   = $(if $(O),$(O)/tools/perf,)
+
 perf: FORCE
-	$(call descend,$@)
+	$(Q)mkdir -p $(PERF_O) .
+	$(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir=
 
 selftests: FORCE
 	$(call descend,testing/$@)

From c41c6647e515c4eb6f754e83e15c1660692c6ce9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 18 Apr 2015 22:34:40 +0200
Subject: [PATCH 008/305] perf tests: Add build tests for building perf from
 kernel source root and tools

Adding build tests for following make commands:
  $ make -C <kernelsrc> tools/perf
  $ make -C <kernelsrc>/tools perf

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Borislav Petkov <bp@suse.de>
Cc: Brian Norris <computersforpeace@gmail.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: David Howells <dhowells@redhat.com>
Cc: Michal Marek <mmarek@suse.cz>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sam Ravnborg <sam@ravnborg.org>
Cc: linux-kbuild@vger.kernel.org
Link: http://lkml.kernel.org/r/1429389280-18720-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/make | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index bff85324f799..901ec9191359 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -223,7 +223,19 @@ tarpkg:
 	echo "- $@: $$cmd" && echo $$cmd > $@ && \
 	( eval $$cmd ) >> $@ 2>&1
 
-all: $(run) $(run_O) tarpkg
+make_kernelsrc:
+	@echo " - make -C <kernelsrc> tools/perf"
+	$(call clean); \
+	(make -C ../.. tools/perf) > $@ 2>&1 && \
+	test -x perf && rm -f $@ || (cat $@ ; false)
+
+make_kernelsrc_tools:
+	@echo " - make -C <kernelsrc>/tools perf"
+	$(call clean); \
+	(make -C ../../tools perf) > $@ 2>&1 && \
+	test -x perf && rm -f $@ || (cat $@ ; false)
+
+all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools
 	@echo OK
 
 out: $(run_O)

From 90e129ff602672c4d5a631b2564411e19de36e15 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Sat, 18 Apr 2015 17:50:15 +0200
Subject: [PATCH 009/305] perf data: Switch to multiple cpu stream files

Currently we store the data into single data strea/file. The cpu if data
is stored within the event sample. The lttng puts the CPU number that
belongs to the event into the packet context instead into the event.

This patch makes sure that the trace produce by perf does look the same
way. We now use one stream per-CPU. Having it all in one stream
increased the total size of the resulting file. The test went from
416KiB (with perf_cpu event member) to 24MiB due to the required (and
pointless) flush. With the per-cpu streams the total size went up to
588KiB.

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1429372220-6406-3-git-send-email-jolsa@kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 206 ++++++++++++++++++++++++++----
 1 file changed, 182 insertions(+), 24 deletions(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index db4f709af18d..0a2d7a5b70f9 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -38,12 +38,20 @@ struct evsel_priv {
 	struct bt_ctf_event_class *event_class;
 };
 
+#define MAX_CPUS	4096
+
+struct ctf_stream {
+	struct bt_ctf_stream *stream;
+	int cpu;
+};
+
 struct ctf_writer {
 	/* writer primitives */
-	struct bt_ctf_writer		*writer;
-	struct bt_ctf_stream		*stream;
-	struct bt_ctf_stream_class	*stream_class;
-	struct bt_ctf_clock		*clock;
+	struct bt_ctf_writer		 *writer;
+	struct ctf_stream		**stream;
+	int				  stream_cnt;
+	struct bt_ctf_stream_class	 *stream_class;
+	struct bt_ctf_clock		 *clock;
 
 	/* data types */
 	union {
@@ -346,12 +354,6 @@ static int add_generic_values(struct ctf_writer *cw,
 			return -1;
 	}
 
-	if (type & PERF_SAMPLE_CPU) {
-		ret = value_set_u32(cw, event, "perf_cpu", sample->cpu);
-		if (ret)
-			return -1;
-	}
-
 	if (type & PERF_SAMPLE_PERIOD) {
 		ret = value_set_u64(cw, event, "perf_period", sample->period);
 		if (ret)
@@ -381,6 +383,113 @@ static int add_generic_values(struct ctf_writer *cw,
 	return 0;
 }
 
+static int ctf_stream__flush(struct ctf_stream *cs)
+{
+	int err = 0;
+
+	if (cs) {
+		err = bt_ctf_stream_flush(cs->stream);
+		if (err)
+			pr_err("CTF stream %d flush failed\n", cs->cpu);
+
+		pr("Flush stream for cpu %d\n", cs->cpu);
+	}
+
+	return err;
+}
+
+static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu)
+{
+	struct ctf_stream *cs;
+	struct bt_ctf_field *pkt_ctx   = NULL;
+	struct bt_ctf_field *cpu_field = NULL;
+	struct bt_ctf_stream *stream   = NULL;
+	int ret;
+
+	cs = zalloc(sizeof(*cs));
+	if (!cs) {
+		pr_err("Failed to allocate ctf stream\n");
+		return NULL;
+	}
+
+	stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class);
+	if (!stream) {
+		pr_err("Failed to create CTF stream\n");
+		goto out;
+	}
+
+	pkt_ctx = bt_ctf_stream_get_packet_context(stream);
+	if (!pkt_ctx) {
+		pr_err("Failed to obtain packet context\n");
+		goto out;
+	}
+
+	cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id");
+	bt_ctf_field_put(pkt_ctx);
+	if (!cpu_field) {
+		pr_err("Failed to obtain cpu field\n");
+		goto out;
+	}
+
+	ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu);
+	if (ret) {
+		pr_err("Failed to update CPU number\n");
+		goto out;
+	}
+
+	bt_ctf_field_put(cpu_field);
+
+	cs->cpu    = cpu;
+	cs->stream = stream;
+	return cs;
+
+out:
+	if (cpu_field)
+		bt_ctf_field_put(cpu_field);
+	if (stream)
+		bt_ctf_stream_put(stream);
+
+	free(cs);
+	return NULL;
+}
+
+static void ctf_stream__delete(struct ctf_stream *cs)
+{
+	if (cs) {
+		bt_ctf_stream_put(cs->stream);
+		free(cs);
+	}
+}
+
+static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu)
+{
+	struct ctf_stream *cs = cw->stream[cpu];
+
+	if (!cs) {
+		cs = ctf_stream__create(cw, cpu);
+		cw->stream[cpu] = cs;
+	}
+
+	return cs;
+}
+
+static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
+			  struct perf_evsel *evsel)
+{
+	int cpu = 0;
+
+	if (evsel->attr.sample_type & PERF_SAMPLE_CPU)
+		cpu = sample->cpu;
+
+	if (cpu > cw->stream_cnt) {
+		pr_err("Event was recorded for CPU %d, limit is at %d.\n",
+			cpu, cw->stream_cnt);
+		cpu = 0;
+	}
+
+	return cpu;
+}
+
 static int process_sample_event(struct perf_tool *tool,
 				union perf_event *_event __maybe_unused,
 				struct perf_sample *sample,
@@ -390,6 +499,7 @@ static int process_sample_event(struct perf_tool *tool,
 	struct convert *c = container_of(tool, struct convert, tool);
 	struct evsel_priv *priv = evsel->priv;
 	struct ctf_writer *cw = &c->writer;
+	struct ctf_stream *cs;
 	struct bt_ctf_event_class *event_class;
 	struct bt_ctf_event *event;
 	int ret;
@@ -424,9 +534,12 @@ static int process_sample_event(struct perf_tool *tool,
 			return -1;
 	}
 
-	bt_ctf_stream_append_event(cw->stream, event);
+	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
+	if (cs)
+		bt_ctf_stream_append_event(cs->stream, event);
+
 	bt_ctf_event_put(event);
-	return 0;
+	return cs ? 0 : -1;
 }
 
 static int add_tracepoint_fields_types(struct ctf_writer *cw,
@@ -528,9 +641,6 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 	if (type & PERF_SAMPLE_STREAM_ID)
 		ADD_FIELD(event_class, cw->data.u64, "perf_stream_id");
 
-	if (type & PERF_SAMPLE_CPU)
-		ADD_FIELD(event_class, cw->data.u32, "perf_cpu");
-
 	if (type & PERF_SAMPLE_PERIOD)
 		ADD_FIELD(event_class, cw->data.u64, "perf_period");
 
@@ -604,6 +714,39 @@ static int setup_events(struct ctf_writer *cw, struct perf_session *session)
 	return 0;
 }
 
+static int setup_streams(struct ctf_writer *cw, struct perf_session *session)
+{
+	struct ctf_stream **stream;
+	struct perf_header *ph = &session->header;
+	int ncpus;
+
+	/*
+	 * Try to get the number of cpus used in the data file,
+	 * if not present fallback to the MAX_CPUS.
+	 */
+	ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS;
+
+	stream = zalloc(sizeof(*stream) * ncpus);
+	if (!stream) {
+		pr_err("Failed to allocate streams.\n");
+		return -ENOMEM;
+	}
+
+	cw->stream     = stream;
+	cw->stream_cnt = ncpus;
+	return 0;
+}
+
+static void free_streams(struct ctf_writer *cw)
+{
+	int cpu;
+
+	for (cpu = 0; cpu < cw->stream_cnt; cpu++)
+		ctf_stream__delete(cw->stream[cpu]);
+
+	free(cw->stream);
+}
+
 static int ctf_writer__setup_env(struct ctf_writer *cw,
 				 struct perf_session *session)
 {
@@ -713,7 +856,7 @@ static void ctf_writer__cleanup(struct ctf_writer *cw)
 	ctf_writer__cleanup_data(cw);
 
 	bt_ctf_clock_put(cw->clock);
-	bt_ctf_stream_put(cw->stream);
+	free_streams(cw);
 	bt_ctf_stream_class_put(cw->stream_class);
 	bt_ctf_writer_put(cw->writer);
 
@@ -725,8 +868,9 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
 {
 	struct bt_ctf_writer		*writer;
 	struct bt_ctf_stream_class	*stream_class;
-	struct bt_ctf_stream		*stream;
 	struct bt_ctf_clock		*clock;
+	struct bt_ctf_field_type	*pkt_ctx_type;
+	int				ret;
 
 	/* CTF writer */
 	writer = bt_ctf_writer_create(path);
@@ -767,14 +911,15 @@ static int ctf_writer__init(struct ctf_writer *cw, const char *path)
 	if (ctf_writer__init_data(cw))
 		goto err_cleanup;
 
-	/* CTF stream instance */
-	stream = bt_ctf_writer_create_stream(writer, stream_class);
-	if (!stream) {
-		pr("Failed to create CTF stream.\n");
+	/* Add cpu_id for packet context */
+	pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class);
+	if (!pkt_ctx_type)
 		goto err_cleanup;
-	}
 
-	cw->stream = stream;
+	ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id");
+	bt_ctf_field_type_put(pkt_ctx_type);
+	if (ret)
+		goto err_cleanup;
 
 	/* CTF clock writer setup */
 	if (bt_ctf_writer_add_clock(writer, clock)) {
@@ -791,6 +936,16 @@ err:
 	return -1;
 }
 
+static int ctf_writer__flush_streams(struct ctf_writer *cw)
+{
+	int cpu, ret = 0;
+
+	for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++)
+		ret = ctf_stream__flush(cw->stream[cpu]);
+
+	return ret;
+}
+
 int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 {
 	struct perf_session *session;
@@ -834,9 +989,12 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 	if (setup_events(cw, session))
 		goto free_session;
 
+	if (setup_streams(cw, session))
+		goto free_session;
+
 	err = perf_session__process_events(session);
 	if (!err)
-		err = bt_ctf_stream_flush(cw->stream);
+		err = ctf_writer__flush_streams(cw);
 	else
 		pr_err("Error during conversion.\n");
 

From 89e5fa884df8cd72e81c3727cf1f0d1c7a9c7641 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 18 Apr 2015 17:50:16 +0200
Subject: [PATCH 010/305] perf data: Enable stream flush within processing

For big data files the size of data allocated for stream instance could
get really high. It's needed to flush the data out of the stream once in
a while.

Unfortunately there's no size indication in the stream object, so we
govern the flush based on the number of stored events. Current flush
limit is set ot 100000 events.

Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1429372220-6406-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 0a2d7a5b70f9..4f942b37067b 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -43,6 +43,7 @@ struct evsel_priv {
 struct ctf_stream {
 	struct bt_ctf_stream *stream;
 	int cpu;
+	u32 count;
 };
 
 struct ctf_writer {
@@ -392,7 +393,10 @@ static int ctf_stream__flush(struct ctf_stream *cs)
 		if (err)
 			pr_err("CTF stream %d flush failed\n", cs->cpu);
 
-		pr("Flush stream for cpu %d\n", cs->cpu);
+		pr("Flush stream for cpu %d (%u samples)\n",
+		   cs->cpu, cs->count);
+
+		cs->count = 0;
 	}
 
 	return err;
@@ -490,6 +494,19 @@ static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample,
 	return cpu;
 }
 
+#define STREAM_FLUSH_COUNT 100000
+
+/*
+ * Currently we have no other way to determine the
+ * time for the stream flush other than keep track
+ * of the number of events and check it against
+ * threshold.
+ */
+static bool is_flush_needed(struct ctf_stream *cs)
+{
+	return cs->count >= STREAM_FLUSH_COUNT;
+}
+
 static int process_sample_event(struct perf_tool *tool,
 				union perf_event *_event __maybe_unused,
 				struct perf_sample *sample,
@@ -535,8 +552,13 @@ static int process_sample_event(struct perf_tool *tool,
 	}
 
 	cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel));
-	if (cs)
+	if (cs) {
+		if (is_flush_needed(cs))
+			ctf_stream__flush(cs);
+
+		cs->count++;
 		bt_ctf_stream_append_event(cs->stream, event);
+	}
 
 	bt_ctf_event_put(event);
 	return cs ? 0 : -1;

From 8fa46753fe2ca96b0fc175efb6c9ed5c001fb554 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sat, 18 Apr 2015 17:50:17 +0200
Subject: [PATCH 011/305] perf data: Add support for setting ordered_events
 queue size

Adding support to limit the size of ordered_events queue, so we could
control allocation size of perf data files without proper finished round
events.

Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tom Zanussi <tzanussi@gmail.com>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1429372220-6406-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 4f942b37067b..b35c8d6c291d 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -74,6 +74,9 @@ struct convert {
 
 	u64			events_size;
 	u64			events_count;
+
+	/* Ordered events configured queue size. */
+	u64			queue_size;
 };
 
 static int value_set(struct bt_ctf_field_type *type,
@@ -968,6 +971,18 @@ static int ctf_writer__flush_streams(struct ctf_writer *cw)
 	return ret;
 }
 
+static int convert__config(const char *var, const char *value, void *cb)
+{
+	struct convert *c = cb;
+
+	if (!strcmp(var, "convert.queue-size")) {
+		c->queue_size = perf_config_u64(var, value);
+		return 0;
+	}
+
+	return perf_default_config(var, value, cb);
+}
+
 int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 {
 	struct perf_session *session;
@@ -994,6 +1009,8 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 	struct ctf_writer *cw = &c.writer;
 	int err = -1;
 
+	perf_config(convert__config, &c);
+
 	/* CTF writer */
 	if (ctf_writer__init(cw, path))
 		return -1;
@@ -1003,6 +1020,11 @@ int bt_convert__perf2ctf(const char *input, const char *path, bool force)
 	if (!session)
 		goto free_writer;
 
+	if (c.queue_size) {
+		ordered_events__set_alloc_size(&session->ordered_events,
+					       c.queue_size);
+	}
+
 	/* CTF writer env/clock setup  */
 	if (ctf_writer__setup_env(cw, session))
 		goto free_session;

From e0a7cce5366dbd22391e09a83827ba5b4491cd2f Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Sat, 18 Apr 2015 17:50:19 +0200
Subject: [PATCH 012/305] perf data: Fix duplicate field names and avoid
 reserved keywords

Some parameters of syscall tracepoints named as 'nr', 'event', etc.
When dealing with them, perf convert to ctf meets some problem:

 1. If a parameter with name 'nr', it will duplicate syscall's
    common field 'nr'. One such syscall is io_submit().

 2. If a parameter with name 'event', it is denied to be inserted
    because 'event' is a CTF spec keyword[1]. One such syscall is
    epoll_ctl.

This patch appends '_dupl_X' suffix to avoid problem 1, prepend a '_'
prefix to avoid problem 2.

[1] http://diamon.org/docs/ctf/v1.8.2/#specC.1.2

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tom Zanussi <tzanussi@gmail.com>
Link: http://lkml.kernel.org/r/1429372220-6406-7-git-send-email-jolsa@kernel.org
[ changed to use format_file::alias ]
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 86 +++++++++++++++++++++++++++++--
 1 file changed, 82 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index b35c8d6c291d..8eda4ed628e7 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -14,6 +14,7 @@
 #include <babeltrace/ctf-writer/event.h>
 #include <babeltrace/ctf-writer/event-types.h>
 #include <babeltrace/ctf-writer/event-fields.h>
+#include <babeltrace/ctf-ir/utils.h>
 #include <babeltrace/ctf/events.h>
 #include <traceevent/event-parse.h>
 #include "asm/bug.h"
@@ -184,6 +185,7 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	unsigned int len;
 	int ret;
 
+	name = fmtf->alias;
 	offset = fmtf->offset;
 	len = fmtf->size;
 	if (flags & FIELD_IS_STRING)
@@ -567,6 +569,82 @@ static int process_sample_event(struct perf_tool *tool,
 	return cs ? 0 : -1;
 }
 
+/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */
+static char *change_name(char *name, char *orig_name, int dup)
+{
+	char *new_name = NULL;
+	size_t len;
+
+	if (!name)
+		name = orig_name;
+
+	if (dup >= 10)
+		goto out;
+	/*
+	 * Add '_' prefix to potential keywork.  According to
+	 * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652),
+	 * futher CTF spec updating may require us to use '$'.
+	 */
+	if (dup < 0)
+		len = strlen(name) + sizeof("_");
+	else
+		len = strlen(orig_name) + sizeof("_dupl_X");
+
+	new_name = malloc(len);
+	if (!new_name)
+		goto out;
+
+	if (dup < 0)
+		snprintf(new_name, len, "_%s", name);
+	else
+		snprintf(new_name, len, "%s_dupl_%d", orig_name, dup);
+
+out:
+	if (name != orig_name)
+		free(name);
+	return new_name;
+}
+
+static int event_class_add_field(struct bt_ctf_event_class *event_class,
+		struct bt_ctf_field_type *type,
+		struct format_field *field)
+{
+	struct bt_ctf_field_type *t = NULL;
+	char *name;
+	int dup = 1;
+	int ret;
+
+	/* alias was already assigned */
+	if (field->alias != field->name)
+		return bt_ctf_event_class_add_field(event_class, type,
+				(char *)field->alias);
+
+	name = field->name;
+
+	/* If 'name' is a keywork, add prefix. */
+	if (bt_ctf_validate_identifier(name))
+		name = change_name(name, field->name, -1);
+
+	if (!name) {
+		pr_err("Failed to fix invalid identifier.");
+		return -1;
+	}
+	while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) {
+		bt_ctf_field_type_put(t);
+		name = change_name(name, field->name, dup++);
+		if (!name) {
+			pr_err("Failed to create dup name for '%s'\n", field->name);
+			return -1;
+		}
+	}
+
+	ret = bt_ctf_event_class_add_field(event_class, type, name);
+	if (!ret)
+		field->alias = name;
+
+	return ret;
+}
+
 static int add_tracepoint_fields_types(struct ctf_writer *cw,
 				       struct format_field *fields,
 				       struct bt_ctf_event_class *event_class)
@@ -595,14 +673,14 @@ static int add_tracepoint_fields_types(struct ctf_writer *cw,
 		if (flags & FIELD_IS_ARRAY)
 			type = bt_ctf_field_type_array_create(type, field->arraylen);
 
-		ret = bt_ctf_event_class_add_field(event_class, type,
-				field->name);
+		ret = event_class_add_field(event_class, type, field);
 
 		if (flags & FIELD_IS_ARRAY)
 			bt_ctf_field_type_put(type);
 
 		if (ret) {
-			pr_err("Failed to add field '%s\n", field->name);
+			pr_err("Failed to add field '%s': %d\n",
+					field->name, ret);
 			return -1;
 		}
 	}
@@ -646,7 +724,7 @@ static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel,
 	do {								\
 		pr2("  field '%s'\n", n);				\
 		if (bt_ctf_event_class_add_field(cl, t, n)) {		\
-			pr_err("Failed to add field '%s;\n", n);	\
+			pr_err("Failed to add field '%s';\n", n);	\
 			return -1;					\
 		}							\
 	} while (0)

From d4ae42139289cbe38d5b84fa1558161d330d6e54 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Sat, 18 Apr 2015 17:50:20 +0200
Subject: [PATCH 013/305] perf data: Fix signedness of value

When converting int values, perf first extractes it to a ulonglong, then
feeds it to babeltrace as a signed value.

For negative 32 bit values (for example, return values of failed
syscalls), the extracted data should be something like 0xfffffffe (-2).
It becomes a large int64 value.

Babeltrace denies to insert it with bt_ctf_field_signed_integer_set_value()
because it is larger than 0x7fffffff, the largest positive value a
 32 bit int can be.

This patch introduces adjust_signedness(), which fills high bits of
ulonglong with 1 if the value is negative.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jeremie Galarneau <jgalar@efficios.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Tom Zanussi <tzanussi@gmail.com>
Link: http://lkml.kernel.org/r/1429372220-6406-8-git-send-email-jolsa@kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
[ s/signess/signedness/g ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/data-convert-bt.c | 64 +++++++++++++++++++++++++------
 1 file changed, 52 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c
index 8eda4ed628e7..5bfc1198ab46 100644
--- a/tools/perf/util/data-convert-bt.c
+++ b/tools/perf/util/data-convert-bt.c
@@ -166,6 +166,43 @@ get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field)
 		return cw->data.u32;
 }
 
+static unsigned long long adjust_signedness(unsigned long long value_int, int size)
+{
+	unsigned long long value_mask;
+
+	/*
+	 * value_mask = (1 << (size * 8 - 1)) - 1.
+	 * Directly set value_mask for code readers.
+	 */
+	switch (size) {
+	case 1:
+		value_mask = 0x7fULL;
+		break;
+	case 2:
+		value_mask = 0x7fffULL;
+		break;
+	case 4:
+		value_mask = 0x7fffffffULL;
+		break;
+	case 8:
+		/*
+		 * For 64 bit value, return it self. There is no need
+		 * to fill high bit.
+		 */
+		/* Fall through */
+	default:
+		/* BUG! */
+		return value_int;
+	}
+
+	/* If it is a positive value, don't adjust. */
+	if ((value_int & (~0ULL - value_mask)) == 0)
+		return value_int;
+
+	/* Fill upper part of value_int with 1 to make it a negative long long. */
+	return (value_int & value_mask) | ~value_mask;
+}
+
 static int add_tracepoint_field_value(struct ctf_writer *cw,
 				      struct bt_ctf_event_class *event_class,
 				      struct bt_ctf_event *event,
@@ -177,7 +214,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	struct bt_ctf_field *field;
 	const char *name = fmtf->name;
 	void *data = sample->raw_data;
-	unsigned long long value_int;
 	unsigned long flags = fmtf->flags;
 	unsigned int n_items;
 	unsigned int i;
@@ -222,11 +258,6 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 	type = get_tracepoint_field_type(cw, fmtf);
 
 	for (i = 0; i < n_items; i++) {
-		if (!(flags & FIELD_IS_STRING))
-			value_int = pevent_read_number(
-					fmtf->event->pevent,
-					data + offset + i * len, len);
-
 		if (flags & FIELD_IS_ARRAY)
 			field = bt_ctf_field_array_get_field(array_field, i);
 		else
@@ -240,12 +271,21 @@ static int add_tracepoint_field_value(struct ctf_writer *cw,
 		if (flags & FIELD_IS_STRING)
 			ret = bt_ctf_field_string_set_value(field,
 					data + offset + i * len);
-		else if (!(flags & FIELD_IS_SIGNED))
-			ret = bt_ctf_field_unsigned_integer_set_value(
-					field, value_int);
-		else
-			ret = bt_ctf_field_signed_integer_set_value(
-					field, value_int);
+		else {
+			unsigned long long value_int;
+
+			value_int = pevent_read_number(
+					fmtf->event->pevent,
+					data + offset + i * len, len);
+
+			if (!(flags & FIELD_IS_SIGNED))
+				ret = bt_ctf_field_unsigned_integer_set_value(
+						field, value_int);
+			else
+				ret = bt_ctf_field_signed_integer_set_value(
+						field, adjust_signedness(value_int, len));
+		}
+
 		if (ret) {
 			pr_err("failed to set file value %s\n", name);
 			goto err_put_field;

From 4025ea4024c271cc0d993b8641249dce33a63fcc Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:41 +0300
Subject: [PATCH 014/305] perf header: Add AUX area tracing feature

Add a feature to indicate that a perf.data file contains AUX area data.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/header.c | 14 ++++++++++++++
 tools/perf/util/header.h |  1 +
 2 files changed, 15 insertions(+)

diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 918fd8ae2d80..589c28028379 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -869,6 +869,13 @@ static int write_branch_stack(int fd __maybe_unused,
 	return 0;
 }
 
+static int write_auxtrace(int fd __maybe_unused,
+			  struct perf_header *h __maybe_unused,
+			  struct perf_evlist *evlist __maybe_unused)
+{
+	return 0;
+}
+
 static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
 			   FILE *fp)
 {
@@ -1151,6 +1158,12 @@ static void print_branch_stack(struct perf_header *ph __maybe_unused,
 	fprintf(fp, "# contains samples with branch stack\n");
 }
 
+static void print_auxtrace(struct perf_header *ph __maybe_unused,
+			   int fd __maybe_unused, FILE *fp)
+{
+	fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n");
+}
+
 static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused,
 			       FILE *fp)
 {
@@ -1861,6 +1874,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPA(HEADER_BRANCH_STACK,	branch_stack),
 	FEAT_OPP(HEADER_PMU_MAPPINGS,	pmu_mappings),
 	FEAT_OPP(HEADER_GROUP_DESC,	group_desc),
+	FEAT_OPA(HEADER_AUXTRACE,	auxtrace),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
index 3bb90ac172a1..d4d57962c591 100644
--- a/tools/perf/util/header.h
+++ b/tools/perf/util/header.h
@@ -30,6 +30,7 @@ enum {
 	HEADER_BRANCH_STACK,
 	HEADER_PMU_MAPPINGS,
 	HEADER_GROUP_DESC,
+	HEADER_AUXTRACE,
 	HEADER_LAST_FEATURE,
 	HEADER_FEAT_BITS	= 256,
 };

From 718c602d67aac04a48cd8e444e778cc3d5d9027f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:42 +0300
Subject: [PATCH 015/305] perf evlist: Add support for mmapping an AUX area
 buffer

This patch supports the addition to the kernel of AUX area buffers that
can be mmapped separately from the perf-events buffer.

The AUX buffer can be configured to contain hardware-produced trace
information. The first implementation will support Intel BTS and Intel
PT.

One auxtrace buffer is mmapped per perf-events buffer.  If the requested
auxtrace buffer size is zero, which it will be until further support is
added, then no auxtrace mmapping is attempted.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-3-git-send-email-adrian.hunter@intel.com
[ Fixed conflict in evlist.h ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/Build      |   1 +
 tools/perf/util/auxtrace.c | 113 +++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h | 117 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/evlist.c   |  61 ++++++++++++++++++-
 tools/perf/util/evlist.h   |   5 ++
 5 files changed, 294 insertions(+), 3 deletions(-)
 create mode 100644 tools/perf/util/auxtrace.c
 create mode 100644 tools/perf/util/auxtrace.h

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 797490a40075..b6c3f39f081a 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -74,6 +74,7 @@ libperf-y += data.o
 libperf-$(CONFIG_X86) += tsc.o
 libperf-y += cloexec.o
 libperf-y += thread-stack.o
+libperf-y += auxtrace.o
 
 libperf-$(CONFIG_LIBELF) += symbol-elf.o
 libperf-$(CONFIG_LIBELF) += probe-event.o
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
new file mode 100644
index 000000000000..dedb64613000
--- /dev/null
+++ b/tools/perf/util/auxtrace.c
@@ -0,0 +1,113 @@
+/*
+ * auxtrace.c: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/mman.h>
+#include <stdbool.h>
+
+#include <linux/kernel.h>
+#include <linux/perf_event.h>
+#include <linux/types.h>
+#include <linux/bitops.h>
+#include <linux/log2.h>
+
+#include "../perf.h"
+#include "util.h"
+#include "evlist.h"
+#include "cpumap.h"
+#include "thread_map.h"
+#include "asm/bug.h"
+#include "auxtrace.h"
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd)
+{
+	struct perf_event_mmap_page *pc = userpg;
+
+#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	pr_err("Cannot use AUX area tracing mmaps\n");
+	return -1;
+#endif
+
+	WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n");
+
+	mm->userpg = userpg;
+	mm->mask = mp->mask;
+	mm->len = mp->len;
+	mm->prev = 0;
+	mm->idx = mp->idx;
+	mm->tid = mp->tid;
+	mm->cpu = mp->cpu;
+
+	if (!mp->len) {
+		mm->base = NULL;
+		return 0;
+	}
+
+	pc->aux_offset = mp->offset;
+	pc->aux_size = mp->len;
+
+	mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset);
+	if (mm->base == MAP_FAILED) {
+		pr_debug2("failed to mmap AUX area\n");
+		mm->base = NULL;
+		return -1;
+	}
+
+	return 0;
+}
+
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm)
+{
+	if (mm->base) {
+		munmap(mm->base, mm->len);
+		mm->base = NULL;
+	}
+}
+
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite)
+{
+	if (auxtrace_pages) {
+		mp->offset = auxtrace_offset;
+		mp->len = auxtrace_pages * (size_t)page_size;
+		mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0;
+		mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE);
+		pr_debug2("AUX area mmap length %zu\n", mp->len);
+	} else {
+		mp->len = 0;
+	}
+}
+
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct perf_evlist *evlist, int idx,
+				   bool per_cpu)
+{
+	mp->idx = idx;
+
+	if (per_cpu) {
+		mp->cpu = evlist->cpus->map[idx];
+		if (evlist->threads)
+			mp->tid = evlist->threads->map[0];
+		else
+			mp->tid = -1;
+	} else {
+		mp->cpu = -1;
+		mp->tid = evlist->threads->map[idx];
+	}
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
new file mode 100644
index 000000000000..2071b36560a0
--- /dev/null
+++ b/tools/perf/util/auxtrace.h
@@ -0,0 +1,117 @@
+/*
+ * auxtrace.h: AUX area trace support
+ * Copyright (c) 2013-2015, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ */
+
+#ifndef __PERF_AUXTRACE_H
+#define __PERF_AUXTRACE_H
+
+#include <sys/types.h>
+#include <stdbool.h>
+
+#include <linux/perf_event.h>
+#include <linux/types.h>
+
+#include "../perf.h"
+
+struct perf_evlist;
+
+/**
+ * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
+ * @base: address of mapped area
+ * @userpg: pointer to buffer's perf_event_mmap_page
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @len: size of mapped area
+ * @prev: previous aux_head
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap {
+	void		*base;
+	void		*userpg;
+	size_t		mask;
+	size_t		len;
+	u64		prev;
+	int		idx;
+	pid_t		tid;
+	int		cpu;
+};
+
+/**
+ * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap.
+ * @mask: %0 if @len is not a power of two, otherwise (@len - %1)
+ * @offset: file offset of mapped area
+ * @len: size of mapped area
+ * @prot: mmap memory protection
+ * @idx: index of this mmap
+ * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu
+ *       mmap) otherwise %0
+ * @cpu: cpu number for a per-cpu mmap otherwise %-1
+ */
+struct auxtrace_mmap_params {
+	size_t		mask;
+	off_t		offset;
+	size_t		len;
+	int		prot;
+	int		idx;
+	pid_t		tid;
+	int		cpu;
+};
+
+static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	u64 head = ACCESS_ONCE(pc->aux_head);
+#else
+	u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0);
+#endif
+
+	/* Ensure all reads are done after we read the head */
+	rmb();
+	return head;
+}
+
+static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	u64 old_tail;
+#endif
+
+	/* Ensure all reads are done before we write the tail out */
+	mb();
+#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT)
+	pc->aux_tail = tail;
+#else
+	do {
+		old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0);
+	} while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail));
+#endif
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct perf_evlist *evlist, int idx,
+				   bool per_cpu);
+
+#endif
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 080be93eea96..5b26f45ef8ff 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -725,6 +725,34 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 		perf_evlist__mmap_put(evlist, idx);
 }
 
+int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused,
+			       struct auxtrace_mmap_params *mp __maybe_unused,
+			       void *userpg __maybe_unused,
+			       int fd __maybe_unused)
+{
+	return 0;
+}
+
+void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__init(
+			struct auxtrace_mmap_params *mp __maybe_unused,
+			off_t auxtrace_offset __maybe_unused,
+			unsigned int auxtrace_pages __maybe_unused,
+			bool auxtrace_overwrite __maybe_unused)
+{
+}
+
+void __weak auxtrace_mmap_params__set_idx(
+			struct auxtrace_mmap_params *mp __maybe_unused,
+			struct perf_evlist *evlist __maybe_unused,
+			int idx __maybe_unused,
+			bool per_cpu __maybe_unused)
+{
+}
+
 static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
 {
 	if (evlist->mmap[idx].base != NULL) {
@@ -732,6 +760,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
 		evlist->mmap[idx].base = NULL;
 		evlist->mmap[idx].refcnt = 0;
 	}
+	auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
 }
 
 void perf_evlist__munmap(struct perf_evlist *evlist)
@@ -759,6 +788,7 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist)
 struct mmap_params {
 	int prot;
 	int mask;
+	struct auxtrace_mmap_params auxtrace_mp;
 };
 
 static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
@@ -789,6 +819,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
 		return -1;
 	}
 
+	if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap,
+				&mp->auxtrace_mp, evlist->mmap[idx].base, fd))
+		return -1;
+
 	return 0;
 }
 
@@ -853,6 +887,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist,
 	for (cpu = 0; cpu < nr_cpus; cpu++) {
 		int output = -1;
 
+		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu,
+					      true);
+
 		for (thread = 0; thread < nr_threads; thread++) {
 			if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu,
 							thread, &output))
@@ -878,6 +915,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist,
 	for (thread = 0; thread < nr_threads; thread++) {
 		int output = -1;
 
+		auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread,
+					      false);
+
 		if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread,
 						&output))
 			goto out_unmap;
@@ -981,19 +1021,25 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
 }
 
 /**
- * perf_evlist__mmap - Create mmaps to receive events.
+ * perf_evlist__mmap_ex - Create mmaps to receive events.
  * @evlist: list of events
  * @pages: map length in pages
  * @overwrite: overwrite older events?
+ * @auxtrace_pages - auxtrace map length in pages
+ * @auxtrace_overwrite - overwrite older auxtrace data?
  *
  * If @overwrite is %false the user needs to signal event consumption using
  * perf_mmap__write_tail().  Using perf_evlist__mmap_read() does this
  * automatically.
  *
+ * Similarly, if @auxtrace_overwrite is %false the user needs to signal data
+ * consumption using auxtrace_mmap__write_tail().
+ *
  * Return: %0 on success, negative error code otherwise.
  */
-int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
-		      bool overwrite)
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+			 bool overwrite, unsigned int auxtrace_pages,
+			 bool auxtrace_overwrite)
 {
 	struct perf_evsel *evsel;
 	const struct cpu_map *cpus = evlist->cpus;
@@ -1013,6 +1059,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 	pr_debug("mmap size %zuB\n", evlist->mmap_len);
 	mp.mask = evlist->mmap_len - page_size - 1;
 
+	auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len,
+				   auxtrace_pages, auxtrace_overwrite);
+
 	evlist__for_each(evlist, evsel) {
 		if ((evsel->attr.read_format & PERF_FORMAT_ID) &&
 		    evsel->sample_id == NULL &&
@@ -1026,6 +1075,12 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 	return perf_evlist__mmap_per_cpu(evlist, &mp);
 }
 
+int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
+		      bool overwrite)
+{
+	return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false);
+}
+
 int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target)
 {
 	evlist->threads = thread_map__new_str(target->pid, target->tid,
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index b5cce95d644e..8dc986ddaa01 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -8,6 +8,7 @@
 #include "event.h"
 #include "evsel.h"
 #include "util.h"
+#include "auxtrace.h"
 #include <unistd.h>
 
 struct pollfd;
@@ -28,6 +29,7 @@ struct perf_mmap {
 	int		 mask;
 	int		 refcnt;
 	u64		 prev;
+	struct auxtrace_mmap auxtrace_mmap;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));
 };
 
@@ -126,6 +128,9 @@ int perf_evlist__parse_mmap_pages(const struct option *opt,
 				  const char *str,
 				  int unset);
 
+int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages,
+			 bool overwrite, unsigned int auxtrace_pages,
+			 bool auxtrace_overwrite);
 int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages,
 		      bool overwrite);
 void perf_evlist__munmap(struct perf_evlist *evlist);

From a16ac0233ea1da8af3c2046a67c2527b4a452166 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:43 +0300
Subject: [PATCH 016/305] perf tools: Add user events for AUX area tracing

Add two user events for AUX area tracing.

PERF_RECORD_AUXTRACE_INFO contains metadata, consisting primarily the
type of the AUX area tracing data plus some amount of
architecture-specific information.  There should be only one
PERF_RECORD_AUXTRACE_INFO event.

PERF_RECORD_AUXTRACE identifies AUX area tracing data copied from the
mmapped AUX area tracing region.  The actual data is not part of the
event but immediately follows it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-4-git-send-email-adrian.hunter@intel.com
[ s/MIN/min/g and use cast to fix up wrt -Werror=sign-compare till we adopt min_t() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>

Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.c   |  2 ++
 tools/perf/util/event.h   | 22 +++++++++++++
 tools/perf/util/session.c | 69 +++++++++++++++++++++++++++++++++++++++
 tools/perf/util/tool.h    |  9 ++++-
 4 files changed, 101 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index ff866c4d2e2f..adada16a3f3b 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -29,6 +29,8 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_HEADER_BUILD_ID]		= "BUILD_ID",
 	[PERF_RECORD_FINISHED_ROUND]		= "FINISHED_ROUND",
 	[PERF_RECORD_ID_INDEX]			= "ID_INDEX",
+	[PERF_RECORD_AUXTRACE_INFO]		= "AUXTRACE_INFO",
+	[PERF_RECORD_AUXTRACE]			= "AUXTRACE",
 };
 
 const char *perf_event__name(unsigned int id)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 09b9e8d3fcf7..6d81beca1f27 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -215,6 +215,8 @@ enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_HEADER_BUILD_ID		= 67,
 	PERF_RECORD_FINISHED_ROUND		= 68,
 	PERF_RECORD_ID_INDEX			= 69,
+	PERF_RECORD_AUXTRACE_INFO		= 70,
+	PERF_RECORD_AUXTRACE			= 71,
 	PERF_RECORD_HEADER_MAX
 };
 
@@ -280,6 +282,24 @@ struct id_index_event {
 	struct id_index_entry entries[0];
 };
 
+struct auxtrace_info_event {
+	struct perf_event_header header;
+	u32 type;
+	u32 reserved__; /* For alignment */
+	u64 priv[];
+};
+
+struct auxtrace_event {
+	struct perf_event_header header;
+	u64 size;
+	u64 offset;
+	u64 reference;
+	u32 idx;
+	u32 tid;
+	u32 cpu;
+	u32 reserved__; /* For alignment */
+};
+
 union perf_event {
 	struct perf_event_header	header;
 	struct mmap_event		mmap;
@@ -295,6 +315,8 @@ union perf_event {
 	struct tracing_data_event	tracing_data;
 	struct build_id_event		build_id;
 	struct id_index_event		id_index;
+	struct auxtrace_info_event	auxtrace_info;
+	struct auxtrace_event		auxtrace;
 };
 
 void perf_event__print_totals(void);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0c74012575ac..d46ac4038fc7 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -262,6 +262,40 @@ static int process_id_index_stub(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
+static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused,
+				union perf_event *event __maybe_unused,
+				struct perf_session *session __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
+static int skipn(int fd, off_t n)
+{
+	char buf[4096];
+	ssize_t ret;
+
+	while (n > 0) {
+		ret = read(fd, buf, min(n, (off_t)sizeof(buf)));
+		if (ret <= 0)
+			return ret;
+		n -= ret;
+	}
+
+	return 0;
+}
+
+static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event,
+				       struct perf_session *session
+				       __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	if (perf_data_file__is_pipe(session->file))
+		skipn(perf_data_file__fd(session->file), event->auxtrace.size);
+	return event->auxtrace.size;
+}
+
 void perf_tool__fill_defaults(struct perf_tool *tool)
 {
 	if (tool->sample == NULL)
@@ -298,6 +332,10 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 	}
 	if (tool->id_index == NULL)
 		tool->id_index = process_id_index_stub;
+	if (tool->auxtrace_info == NULL)
+		tool->auxtrace_info = process_event_auxtrace_info_stub;
+	if (tool->auxtrace == NULL)
+		tool->auxtrace = process_event_auxtrace_stub;
 }
 
 static void swap_sample_id_all(union perf_event *event, void *data)
@@ -478,6 +516,29 @@ static void perf_event__tracing_data_swap(union perf_event *event,
 	event->tracing_data.size = bswap_32(event->tracing_data.size);
 }
 
+static void perf_event__auxtrace_info_swap(union perf_event *event,
+					   bool sample_id_all __maybe_unused)
+{
+	size_t size;
+
+	event->auxtrace_info.type = bswap_32(event->auxtrace_info.type);
+
+	size = event->header.size;
+	size -= (void *)&event->auxtrace_info.priv - (void *)event;
+	mem_bswap_64(event->auxtrace_info.priv, size);
+}
+
+static void perf_event__auxtrace_swap(union perf_event *event,
+				      bool sample_id_all __maybe_unused)
+{
+	event->auxtrace.size      = bswap_64(event->auxtrace.size);
+	event->auxtrace.offset    = bswap_64(event->auxtrace.offset);
+	event->auxtrace.reference = bswap_64(event->auxtrace.reference);
+	event->auxtrace.idx       = bswap_32(event->auxtrace.idx);
+	event->auxtrace.tid       = bswap_32(event->auxtrace.tid);
+	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
+}
+
 typedef void (*perf_event__swap_op)(union perf_event *event,
 				    bool sample_id_all);
 
@@ -497,6 +558,8 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
 	[PERF_RECORD_HEADER_BUILD_ID]	  = NULL,
 	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
+	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
+	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
 	[PERF_RECORD_HEADER_MAX]	  = NULL,
 };
 
@@ -980,6 +1043,12 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 		return tool->finished_round(tool, event, oe);
 	case PERF_RECORD_ID_INDEX:
 		return tool->id_index(tool, event, session);
+	case PERF_RECORD_AUXTRACE_INFO:
+		return tool->auxtrace_info(tool, event, session);
+	case PERF_RECORD_AUXTRACE:
+		/* setup for reading amidst mmap */
+		lseek(fd, file_offset + event->header.size, SEEK_SET);
+		return tool->auxtrace(tool, event, session);
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 51d9e56c0f84..0146f8ef5cc3 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -3,6 +3,8 @@
 
 #include <stdbool.h>
 
+#include <linux/types.h>
+
 struct perf_session;
 union perf_event;
 struct perf_evlist;
@@ -29,6 +31,9 @@ typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event,
 typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event,
 			struct ordered_events *oe);
 
+typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event,
+			 struct perf_session *session);
+
 struct perf_tool {
 	event_sample	sample,
 			read;
@@ -44,7 +49,9 @@ struct perf_tool {
 	event_op2	tracing_data;
 	event_oe	finished_round;
 	event_op2	build_id,
-			id_index;
+			id_index,
+			auxtrace_info;
+	event_op3	auxtrace;
 	bool		ordered_events;
 	bool		ordering_requires_timestamps;
 };

From 9e0cc4fe3752661220ee2cf7fcd335e233eea470 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:44 +0300
Subject: [PATCH 017/305] perf auxtrace: Add support for AUX area recording

Add support for reading from the AUX area tracing mmap and synthesizing
AUX area tracing events.

This patch introduces an abstraction for recording AUX area data.

Recording is initialized by auxtrace_record__init() which is a weak
function to be implemented by the architecture to provide recording
callbacks.

Recording is mainly handled by auxtrace_mmap__read() and
perf_event__synthesize_auxtrace() but there are callbacks for
miscellaneous needs including validating and processing user options,
populating private data in auxtrace_info_event, and freeing the
structure when finished.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/perf.h          |   2 +
 tools/perf/util/auxtrace.c | 176 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h |  56 +++++++++++-
 tools/perf/util/record.c   |  11 ++-
 4 files changed, 243 insertions(+), 2 deletions(-)

diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index e14bb637255c..5042093d5213 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -54,8 +54,10 @@ struct record_opts {
 	bool	     period;
 	bool	     sample_intr_regs;
 	bool	     running_time;
+	bool	     full_auxtrace;
 	unsigned int freq;
 	unsigned int mmap_pages;
+	unsigned int auxtrace_mmap_pages;
 	unsigned int user_freq;
 	u64          branch_stack;
 	u64	     default_interval;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index dedb64613000..2cafea2ec015 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -23,6 +23,10 @@
 #include <linux/bitops.h>
 #include <linux/log2.h>
 
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
 #include "../perf.h"
 #include "util.h"
 #include "evlist.h"
@@ -31,6 +35,9 @@
 #include "asm/bug.h"
 #include "auxtrace.h"
 
+#include "event.h"
+#include "debug.h"
+
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 			struct auxtrace_mmap_params *mp,
 			void *userpg, int fd)
@@ -111,3 +118,172 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 		mp->tid = evlist->threads->map[idx];
 	}
 }
+
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
+{
+	if (itr)
+		return itr->info_priv_size(itr);
+	return 0;
+}
+
+static int auxtrace_not_supported(void)
+{
+	pr_err("AUX area tracing is not supported on this architecture\n");
+	return -EINVAL;
+}
+
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+			       struct perf_session *session,
+			       struct auxtrace_info_event *auxtrace_info,
+			       size_t priv_size)
+{
+	if (itr)
+		return itr->info_fill(itr, session, auxtrace_info, priv_size);
+	return auxtrace_not_supported();
+}
+
+void auxtrace_record__free(struct auxtrace_record *itr)
+{
+	if (itr)
+		itr->free(itr);
+}
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+			     struct perf_evlist *evlist,
+			     struct record_opts *opts)
+{
+	if (itr)
+		return itr->recording_options(itr, evlist, opts);
+	return 0;
+}
+
+u64 auxtrace_record__reference(struct auxtrace_record *itr)
+{
+	if (itr)
+		return itr->reference(itr);
+	return 0;
+}
+
+struct auxtrace_record *__weak
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
+{
+	*err = 0;
+	return NULL;
+}
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+					 struct perf_tool *tool,
+					 struct perf_session *session,
+					 perf_event__handler_t process)
+{
+	union perf_event *ev;
+	size_t priv_size;
+	int err;
+
+	pr_debug2("Synthesizing auxtrace information\n");
+	priv_size = auxtrace_record__info_priv_size(itr);
+	ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size);
+	if (!ev)
+		return -ENOMEM;
+
+	ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO;
+	ev->auxtrace_info.header.size = sizeof(struct auxtrace_info_event) +
+					priv_size;
+	err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info,
+					 priv_size);
+	if (err)
+		goto out_free;
+
+	err = process(tool, ev, NULL, NULL);
+out_free:
+	free(ev);
+	return err;
+}
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+			struct perf_tool *tool, process_auxtrace_t fn)
+{
+	u64 head = auxtrace_mmap__read_head(mm);
+	u64 old = mm->prev, offset, ref;
+	unsigned char *data = mm->base;
+	size_t size, head_off, old_off, len1, len2, padding;
+	union perf_event ev;
+	void *data1, *data2;
+
+	if (old == head)
+		return 0;
+
+	pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n",
+		  mm->idx, old, head, head - old);
+
+	if (mm->mask) {
+		head_off = head & mm->mask;
+		old_off = old & mm->mask;
+	} else {
+		head_off = head % mm->len;
+		old_off = old % mm->len;
+	}
+
+	if (head_off > old_off)
+		size = head_off - old_off;
+	else
+		size = mm->len - (old_off - head_off);
+
+	ref = auxtrace_record__reference(itr);
+
+	if (head > old || size <= head || mm->mask) {
+		offset = head - size;
+	} else {
+		/*
+		 * When the buffer size is not a power of 2, 'head' wraps at the
+		 * highest multiple of the buffer size, so we have to subtract
+		 * the remainder here.
+		 */
+		u64 rem = (0ULL - mm->len) % mm->len;
+
+		offset = head - size - rem;
+	}
+
+	if (size > head_off) {
+		len1 = size - head_off;
+		data1 = &data[mm->len - len1];
+		len2 = head_off;
+		data2 = &data[0];
+	} else {
+		len1 = size;
+		data1 = &data[head_off - len1];
+		len2 = 0;
+		data2 = NULL;
+	}
+
+	/* padding must be written by fn() e.g. record__process_auxtrace() */
+	padding = size & 7;
+	if (padding)
+		padding = 8 - padding;
+
+	memset(&ev, 0, sizeof(ev));
+	ev.auxtrace.header.type = PERF_RECORD_AUXTRACE;
+	ev.auxtrace.header.size = sizeof(ev.auxtrace);
+	ev.auxtrace.size = size + padding;
+	ev.auxtrace.offset = offset;
+	ev.auxtrace.reference = ref;
+	ev.auxtrace.idx = mm->idx;
+	ev.auxtrace.tid = mm->tid;
+	ev.auxtrace.cpu = mm->cpu;
+
+	if (fn(tool, &ev, data1, len1, data2, len2))
+		return -1;
+
+	mm->prev = head;
+
+	auxtrace_mmap__write_tail(mm, head);
+	if (itr->read_finish) {
+		int err;
+
+		err = itr->read_finish(itr, mm->idx);
+		if (err < 0)
+			return err;
+	}
+
+	return 1;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 2071b36560a0..7ab4850703f0 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -18,13 +18,18 @@
 
 #include <sys/types.h>
 #include <stdbool.h>
-
+#include <stddef.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 
 #include "../perf.h"
 
+union perf_event;
+struct perf_session;
 struct perf_evlist;
+struct perf_tool;
+struct record_opts;
+struct auxtrace_info_event;
 
 /**
  * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
@@ -70,6 +75,29 @@ struct auxtrace_mmap_params {
 	int		cpu;
 };
 
+/**
+ * struct auxtrace_record - callbacks for recording AUX area data.
+ * @recording_options: validate and process recording options
+ * @info_priv_size: return the size of the private data in auxtrace_info_event
+ * @info_fill: fill-in the private data in auxtrace_info_event
+ * @free: free this auxtrace record structure
+ * @reference: provide a 64-bit reference number for auxtrace_event
+ * @read_finish: called after reading from an auxtrace mmap
+ */
+struct auxtrace_record {
+	int (*recording_options)(struct auxtrace_record *itr,
+				 struct perf_evlist *evlist,
+				 struct record_opts *opts);
+	size_t (*info_priv_size)(struct auxtrace_record *itr);
+	int (*info_fill)(struct auxtrace_record *itr,
+			 struct perf_session *session,
+			 struct auxtrace_info_event *auxtrace_info,
+			 size_t priv_size);
+	void (*free)(struct auxtrace_record *itr);
+	u64 (*reference)(struct auxtrace_record *itr);
+	int (*read_finish)(struct auxtrace_record *itr, int idx);
+};
+
 static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->userpg;
@@ -114,4 +142,30 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 				   struct perf_evlist *evlist, int idx,
 				   bool per_cpu);
 
+typedef int (*process_auxtrace_t)(struct perf_tool *tool,
+				  union perf_event *event, void *data1,
+				  size_t len1, void *data2, size_t len2);
+
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+			struct perf_tool *tool, process_auxtrace_t fn);
+
+struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
+					      int *err);
+
+int auxtrace_record__options(struct auxtrace_record *itr,
+			     struct perf_evlist *evlist,
+			     struct record_opts *opts);
+size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr);
+int auxtrace_record__info_fill(struct auxtrace_record *itr,
+			       struct perf_session *session,
+			       struct auxtrace_info_event *auxtrace_info,
+			       size_t priv_size);
+void auxtrace_record__free(struct auxtrace_record *itr);
+u64 auxtrace_record__reference(struct auxtrace_record *itr);
+
+int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
+					 struct perf_tool *tool,
+					 struct perf_session *session,
+					 perf_event__handler_t process);
+
 #endif
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 8acd0df88b5c..0ccfa498f7b8 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -119,7 +119,16 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts)
 			evsel->attr.comm_exec = 1;
 	}
 
-	if (evlist->nr_entries > 1) {
+	if (opts->full_auxtrace) {
+		/*
+		 * Need to be able to synthesize and parse selected events with
+		 * arbitrary sample types, which requires always being able to
+		 * match the id.
+		 */
+		use_sample_identifier = perf_can_sample_identifier();
+		evlist__for_each(evlist, evsel)
+			perf_evsel__set_sample_id(evsel, use_sample_identifier);
+	} else if (evlist->nr_entries > 1) {
 		struct perf_evsel *first = perf_evlist__first(evlist);
 
 		evlist__for_each(evlist, evsel) {

From ef149c2548b82e75f7f9d138457ef4dc4ed97448 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:45 +0300
Subject: [PATCH 018/305] perf record: Add basic AUX area tracing support

Amend the perf record tool to read the AUX area tracing mmap and
synthesize AUX area tracing events.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 81 +++++++++++++++++++++++++++++++++++--
 1 file changed, 77 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c3efdfb630b5..0fb11d4eaeee 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -27,6 +27,7 @@
 #include "util/cpumap.h"
 #include "util/thread_map.h"
 #include "util/data.h"
+#include "util/auxtrace.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -38,6 +39,7 @@ struct record {
 	struct record_opts	opts;
 	u64			bytes_written;
 	struct perf_data_file	file;
+	struct auxtrace_record	*itr;
 	struct perf_evlist	*evlist;
 	struct perf_session	*session;
 	const char		*progname;
@@ -110,6 +112,44 @@ out:
 	return rc;
 }
 
+static int record__process_auxtrace(struct perf_tool *tool,
+				    union perf_event *event, void *data1,
+				    size_t len1, void *data2, size_t len2)
+{
+	struct record *rec = container_of(tool, struct record, tool);
+	size_t padding;
+	u8 pad[8] = {0};
+
+	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
+	padding = (len1 + len2) & 7;
+	if (padding)
+		padding = 8 - padding;
+
+	record__write(rec, event, event->header.size);
+	record__write(rec, data1, len1);
+	if (len2)
+		record__write(rec, data2, len2);
+	record__write(rec, &pad, padding);
+
+	return 0;
+}
+
+static int record__auxtrace_mmap_read(struct record *rec,
+				      struct auxtrace_mmap *mm)
+{
+	int ret;
+
+	ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool,
+				  record__process_auxtrace);
+	if (ret < 0)
+		return ret;
+
+	if (ret)
+		rec->samples++;
+
+	return 0;
+}
+
 static volatile int done = 0;
 static volatile int signr = -1;
 static volatile int child_finished = 0;
@@ -169,13 +209,15 @@ try_again:
 		goto out;
 	}
 
-	if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) {
+	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
+				 opts->auxtrace_mmap_pages, false) < 0) {
 		if (errno == EPERM) {
 			pr_err("Permission error mapping pages.\n"
 			       "Consider increasing "
 			       "/proc/sys/kernel/perf_event_mlock_kb,\n"
 			       "or try again with a smaller value of -m/--mmap_pages.\n"
-			       "(current value: %u)\n", opts->mmap_pages);
+			       "(current value: %u,%u)\n",
+			       opts->mmap_pages, opts->auxtrace_mmap_pages);
 			rc = -errno;
 		} else {
 			pr_err("failed to mmap with %d (%s)\n", errno,
@@ -270,12 +312,20 @@ static int record__mmap_read_all(struct record *rec)
 	int rc = 0;
 
 	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+		struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap;
+
 		if (rec->evlist->mmap[i].base) {
 			if (record__mmap_read(rec, i) != 0) {
 				rc = -1;
 				goto out;
 			}
 		}
+
+		if (mm->base &&
+		    record__auxtrace_mmap_read(rec, mm) != 0) {
+			rc = -1;
+			goto out;
+		}
 	}
 
 	/*
@@ -305,6 +355,9 @@ static void record__init_features(struct record *rec)
 
 	if (!rec->opts.branch_stack)
 		perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK);
+
+	if (!rec->opts.full_auxtrace)
+		perf_header__clear_feat(&session->header, HEADER_AUXTRACE);
 }
 
 static volatile int workload_exec_errno;
@@ -421,6 +474,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		}
 	}
 
+	if (rec->opts.full_auxtrace) {
+		err = perf_event__synthesize_auxtrace_info(rec->itr, tool,
+					session, process_synthesized_event);
+		if (err)
+			goto out_delete_session;
+	}
+
 	err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event,
 						 machine);
 	if (err < 0)
@@ -553,7 +613,7 @@ out_child:
 	if (!err && !quiet) {
 		char samples[128];
 
-		if (rec->samples)
+		if (rec->samples && !rec->opts.full_auxtrace)
 			scnprintf(samples, sizeof(samples),
 				  " (%" PRIu64 " samples)", rec->samples);
 		else
@@ -936,7 +996,7 @@ struct option *record_options = __record_options;
 
 int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	int err = -ENOMEM;
+	int err;
 	struct record *rec = &record;
 	char errbuf[BUFSIZ];
 
@@ -957,6 +1017,14 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 		usage_with_options(record_usage, record_options);
 	}
 
+	if (!rec->itr) {
+		rec->itr = auxtrace_record__init(rec->evlist, &err);
+		if (err)
+			return err;
+	}
+
+	err = -ENOMEM;
+
 	symbol__init(NULL);
 
 	if (symbol_conf.kptr_restrict)
@@ -1002,6 +1070,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
 		usage_with_options(record_usage, record_options);
 
+	err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts);
+	if (err)
+		goto out_symbol_exit;
+
 	if (record_opts__config(&rec->opts)) {
 		err = -EINVAL;
 		goto out_symbol_exit;
@@ -1011,5 +1083,6 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 out_symbol_exit:
 	perf_evlist__delete(rec->evlist);
 	symbol__exit();
+	auxtrace_record__free(rec->itr);
 	return err;
 }

From e9db1310d95c9d02763643108d51e47d9eb79ac9 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:46 +0300
Subject: [PATCH 019/305] perf record: Extend -m option for AUX area tracing
 mmap pages

Extend the -m option so that the number of mmap pages for AUX area
tracing can be specified by adding a comma followed by the number of
pages.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |  2 +
 tools/perf/builtin-record.c              | 49 ++++++++++++++++++++++--
 tools/perf/util/evlist.c                 | 10 +++--
 tools/perf/util/evlist.h                 |  1 +
 4 files changed, 56 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 4847a793de65..76a141e326da 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -108,6 +108,8 @@ OPTIONS
 	Number of mmap data pages (must be a power of two) or size
 	specification with appended unit character - B/K/M/G. The
 	size is rounded up to have nearest pages power of two value.
+	Also, by adding a comma, the number of mmap pages for AUX
+	area tracing can be specified.
 
 --group::
 	Put all events in a single event group.  This precedes the --event
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0fb11d4eaeee..4c9aaa1f688a 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -855,6 +855,49 @@ static int parse_clockid(const struct option *opt, const char *str, int unset)
 	return -1;
 }
 
+static int record__parse_mmap_pages(const struct option *opt,
+				    const char *str,
+				    int unset __maybe_unused)
+{
+	struct record_opts *opts = opt->value;
+	char *s, *p;
+	unsigned int mmap_pages;
+	int ret;
+
+	if (!str)
+		return -EINVAL;
+
+	s = strdup(str);
+	if (!s)
+		return -ENOMEM;
+
+	p = strchr(s, ',');
+	if (p)
+		*p = '\0';
+
+	if (*s) {
+		ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s);
+		if (ret)
+			goto out_free;
+		opts->mmap_pages = mmap_pages;
+	}
+
+	if (!p) {
+		ret = 0;
+		goto out_free;
+	}
+
+	ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1);
+	if (ret)
+		goto out_free;
+
+	opts->auxtrace_mmap_pages = mmap_pages;
+
+out_free:
+	free(s);
+	return ret;
+}
+
 static const char * const __record_usage[] = {
 	"perf record [<options>] [<command>]",
 	"perf record [<options>] -- <command> [<options>]",
@@ -935,9 +978,9 @@ struct option __record_options[] = {
 			&record.opts.no_inherit_set,
 			"child tasks do not inherit counters"),
 	OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"),
-	OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages",
-		     "number of mmap data pages",
-		     perf_evlist__parse_mmap_pages),
+	OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]",
+		     "number of mmap data pages and AUX area tracing mmap pages",
+		     record__parse_mmap_pages),
 	OPT_BOOLEAN(0, "group", &record.opts.group,
 		    "put the counters into a counter group"),
 	OPT_CALLBACK_NOOPT('g', NULL, &record.opts,
diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 5b26f45ef8ff..0ec8c3bc8ab8 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -1000,10 +1000,8 @@ static long parse_pages_arg(const char *str, unsigned long min,
 	return pages;
 }
 
-int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
-				  int unset __maybe_unused)
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str)
 {
-	unsigned int *mmap_pages = opt->value;
 	unsigned long max = UINT_MAX;
 	long pages;
 
@@ -1020,6 +1018,12 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
 	return 0;
 }
 
+int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str,
+				  int unset __maybe_unused)
+{
+	return __perf_evlist__parse_mmap_pages(opt->value, str);
+}
+
 /**
  * perf_evlist__mmap_ex - Create mmaps to receive events.
  * @evlist: list of events
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 8dc986ddaa01..c07b1a94a724 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -124,6 +124,7 @@ int perf_evlist__start_workload(struct perf_evlist *evlist);
 
 struct option;
 
+int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str);
 int perf_evlist__parse_mmap_pages(const struct option *opt,
 				  const char *str,
 				  int unset);

From e9bf54d25f4f64c410c2aca644749a3325b96f5a Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:47 +0300
Subject: [PATCH 020/305] perf tools: Add a user event for AUX area tracing
 errors

Errors encountered when decoding an AUX area trace need to be reported
to the user. However the "user" might be a script or another tool, so
provide a new user event to capture those errors.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/event.c   |  1 +
 tools/perf/util/event.h   | 16 ++++++++++++++++
 tools/perf/util/session.c | 25 +++++++++++++++++++++++++
 tools/perf/util/tool.h    |  3 ++-
 4 files changed, 44 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index adada16a3f3b..32323e5d7b25 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -31,6 +31,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_ID_INDEX]			= "ID_INDEX",
 	[PERF_RECORD_AUXTRACE_INFO]		= "AUXTRACE_INFO",
 	[PERF_RECORD_AUXTRACE]			= "AUXTRACE",
+	[PERF_RECORD_AUXTRACE_ERROR]		= "AUXTRACE_ERROR",
 };
 
 const char *perf_event__name(unsigned int id)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 6d81beca1f27..657dcfaf0389 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -217,6 +217,7 @@ enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_ID_INDEX			= 69,
 	PERF_RECORD_AUXTRACE_INFO		= 70,
 	PERF_RECORD_AUXTRACE			= 71,
+	PERF_RECORD_AUXTRACE_ERROR		= 72,
 	PERF_RECORD_HEADER_MAX
 };
 
@@ -300,6 +301,20 @@ struct auxtrace_event {
 	u32 reserved__; /* For alignment */
 };
 
+#define MAX_AUXTRACE_ERROR_MSG 64
+
+struct auxtrace_error_event {
+	struct perf_event_header header;
+	u32 type;
+	u32 code;
+	u32 cpu;
+	u32 pid;
+	u32 tid;
+	u32 reserved__; /* For alignment */
+	u64 ip;
+	char msg[MAX_AUXTRACE_ERROR_MSG];
+};
+
 union perf_event {
 	struct perf_event_header	header;
 	struct mmap_event		mmap;
@@ -317,6 +332,7 @@ union perf_event {
 	struct id_index_event		id_index;
 	struct auxtrace_info_event	auxtrace_info;
 	struct auxtrace_event		auxtrace;
+	struct auxtrace_error_event	auxtrace_error;
 };
 
 void perf_event__print_totals(void);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index d46ac4038fc7..4008dab73424 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -296,6 +296,15 @@ static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused,
 	return event->auxtrace.size;
 }
 
+static
+int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused,
+				      union perf_event *event __maybe_unused,
+				      struct perf_session *session __maybe_unused)
+{
+	dump_printf(": unhandled!\n");
+	return 0;
+}
+
 void perf_tool__fill_defaults(struct perf_tool *tool)
 {
 	if (tool->sample == NULL)
@@ -336,6 +345,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->auxtrace_info = process_event_auxtrace_info_stub;
 	if (tool->auxtrace == NULL)
 		tool->auxtrace = process_event_auxtrace_stub;
+	if (tool->auxtrace_error == NULL)
+		tool->auxtrace_error = process_event_auxtrace_error_stub;
 }
 
 static void swap_sample_id_all(union perf_event *event, void *data)
@@ -539,6 +550,17 @@ static void perf_event__auxtrace_swap(union perf_event *event,
 	event->auxtrace.cpu       = bswap_32(event->auxtrace.cpu);
 }
 
+static void perf_event__auxtrace_error_swap(union perf_event *event,
+					    bool sample_id_all __maybe_unused)
+{
+	event->auxtrace_error.type = bswap_32(event->auxtrace_error.type);
+	event->auxtrace_error.code = bswap_32(event->auxtrace_error.code);
+	event->auxtrace_error.cpu  = bswap_32(event->auxtrace_error.cpu);
+	event->auxtrace_error.pid  = bswap_32(event->auxtrace_error.pid);
+	event->auxtrace_error.tid  = bswap_32(event->auxtrace_error.tid);
+	event->auxtrace_error.ip   = bswap_64(event->auxtrace_error.ip);
+}
+
 typedef void (*perf_event__swap_op)(union perf_event *event,
 				    bool sample_id_all);
 
@@ -560,6 +582,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_ID_INDEX]		  = perf_event__all64_swap,
 	[PERF_RECORD_AUXTRACE_INFO]	  = perf_event__auxtrace_info_swap,
 	[PERF_RECORD_AUXTRACE]		  = perf_event__auxtrace_swap,
+	[PERF_RECORD_AUXTRACE_ERROR]	  = perf_event__auxtrace_error_swap,
 	[PERF_RECORD_HEADER_MAX]	  = NULL,
 };
 
@@ -1049,6 +1072,8 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 		/* setup for reading amidst mmap */
 		lseek(fd, file_offset + event->header.size, SEEK_SET);
 		return tool->auxtrace(tool, event, session);
+	case PERF_RECORD_AUXTRACE_ERROR:
+		return tool->auxtrace_error(tool, event, session);
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 0146f8ef5cc3..20afe19a8eb0 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -50,7 +50,8 @@ struct perf_tool {
 	event_oe	finished_round;
 	event_op2	build_id,
 			id_index,
-			auxtrace_info;
+			auxtrace_info,
+			auxtrace_error;
 	event_op3	auxtrace;
 	bool		ordered_events;
 	bool		ordering_requires_timestamps;

From c446870d80f37281a927b5f6984bd47397a7cb03 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:48 +0300
Subject: [PATCH 021/305] perf session: Add hooks to allow transparent decoding
 of AUX area tracing data

Hook into session processing so that AUX area decoding can synthesize
events transparently to the tools.

The advantages of transparent decoding are that tools can be used
directly with perf.data files containing AUX area tracing data, which is
easier for the user and more efficient than having a separate decoding
tool.

This will work as follows:

1. Tools will feed auxtrace events to the decoder using
   perf_tool->auxtrace() (support for that still to come).

2. The decoder can process side-band events as needed due
   to the auxtrace->process_event() hook.

3. The decoder can deliver synthesized events into the
   event stream using perf_session__deliver_synth_event().

Note the expectation is that decoding will work on data that is
time-ordered with respect to the per-cpu or per-thread contexts that
were recorded.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.h | 55 ++++++++++++++++++++++++++++++++++++++
 tools/perf/util/session.c  | 54 ++++++++++++++++++++++++++++++-------
 tools/perf/util/session.h  |  3 +++
 3 files changed, 102 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 7ab4850703f0..199fc27b3954 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 #include "../perf.h"
+#include "session.h"
 
 union perf_event;
 struct perf_session;
@@ -31,6 +32,24 @@ struct perf_tool;
 struct record_opts;
 struct auxtrace_info_event;
 
+/**
+ * struct auxtrace - session callbacks to allow AUX area data decoding.
+ * @process_event: lets the decoder see all session events
+ * @flush_events: process any remaining data
+ * @free_events: free resources associated with event processing
+ * @free: free resources associated with the session
+ */
+struct auxtrace {
+	int (*process_event)(struct perf_session *session,
+			     union perf_event *event,
+			     struct perf_sample *sample,
+			     struct perf_tool *tool);
+	int (*flush_events)(struct perf_session *session,
+			    struct perf_tool *tool);
+	void (*free_events)(struct perf_session *session);
+	void (*free)(struct perf_session *session);
+};
+
 /**
  * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
  * @base: address of mapped area
@@ -168,4 +187,40 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
 					 struct perf_session *session,
 					 perf_event__handler_t process);
 
+static inline int auxtrace__process_event(struct perf_session *session,
+					  union perf_event *event,
+					  struct perf_sample *sample,
+					  struct perf_tool *tool)
+{
+	if (!session->auxtrace)
+		return 0;
+
+	return session->auxtrace->process_event(session, event, sample, tool);
+}
+
+static inline int auxtrace__flush_events(struct perf_session *session,
+					 struct perf_tool *tool)
+{
+	if (!session->auxtrace)
+		return 0;
+
+	return session->auxtrace->flush_events(session, tool);
+}
+
+static inline void auxtrace__free_events(struct perf_session *session)
+{
+	if (!session->auxtrace)
+		return;
+
+	return session->auxtrace->free_events(session);
+}
+
+static inline void auxtrace__free(struct perf_session *session)
+{
+	if (!session->auxtrace)
+		return;
+
+	return session->auxtrace->free(session);
+}
+
 #endif
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 4008dab73424..7271c6bcb2dc 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -15,12 +15,13 @@
 #include "cpumap.h"
 #include "perf_regs.h"
 #include "asm/bug.h"
+#include "auxtrace.h"
 
-static int machines__deliver_event(struct machines *machines,
-				   struct perf_evlist *evlist,
-				   union perf_event *event,
-				   struct perf_sample *sample,
-				   struct perf_tool *tool, u64 file_offset);
+static int perf_session__deliver_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_sample *sample,
+				       struct perf_tool *tool,
+				       u64 file_offset);
 
 static int perf_session__open(struct perf_session *session)
 {
@@ -105,8 +106,8 @@ static int ordered_events__deliver_event(struct ordered_events *oe,
 		return ret;
 	}
 
-	return machines__deliver_event(&session->machines, session->evlist, event->event,
-				       &sample, session->tool, event->file_offset);
+	return perf_session__deliver_event(session, event->event, &sample,
+					   session->tool, event->file_offset);
 }
 
 struct perf_session *perf_session__new(struct perf_data_file *file,
@@ -185,6 +186,7 @@ static void perf_session_env__delete(struct perf_session_env *env)
 
 void perf_session__delete(struct perf_session *session)
 {
+	auxtrace__free(session);
 	perf_session__destroy_kernel_maps(session);
 	perf_session__delete_threads(session);
 	perf_session_env__delete(&session->header.env);
@@ -1030,6 +1032,24 @@ static int machines__deliver_event(struct machines *machines,
 	}
 }
 
+static int perf_session__deliver_event(struct perf_session *session,
+				       union perf_event *event,
+				       struct perf_sample *sample,
+				       struct perf_tool *tool,
+				       u64 file_offset)
+{
+	int ret;
+
+	ret = auxtrace__process_event(session, event, sample, tool);
+	if (ret < 0)
+		return ret;
+	if (ret > 0)
+		return 0;
+
+	return machines__deliver_event(&session->machines, session->evlist,
+				       event, sample, tool, file_offset);
+}
+
 static s64 perf_session__process_user_event(struct perf_session *session,
 					    union perf_event *event,
 					    u64 file_offset)
@@ -1190,8 +1210,8 @@ static s64 perf_session__process_event(struct perf_session *session,
 			return ret;
 	}
 
-	return machines__deliver_event(&session->machines, evlist, event,
-				       &sample, tool, file_offset);
+	return perf_session__deliver_event(session, event, &sample, tool,
+					   file_offset);
 }
 
 void perf_event_header__bswap(struct perf_event_header *hdr)
@@ -1350,10 +1370,14 @@ more:
 done:
 	/* do the final flush for ordered samples */
 	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+	if (err)
+		goto out_err;
+	err = auxtrace__flush_events(session, tool);
 out_err:
 	free(buf);
 	perf_session__warn_about_errors(session);
 	ordered_events__free(&session->ordered_events);
+	auxtrace__free_events(session);
 	return err;
 }
 
@@ -1496,10 +1520,14 @@ more:
 out:
 	/* do the final flush for ordered samples */
 	err = ordered_events__flush(oe, OE_FLUSH__FINAL);
+	if (err)
+		goto out_err;
+	err = auxtrace__flush_events(session, tool);
 out_err:
 	ui_progress__finish();
 	perf_session__warn_about_errors(session);
 	ordered_events__free(&session->ordered_events);
+	auxtrace__free_events(session);
 	session->one_mmap = false;
 	return err;
 }
@@ -1582,7 +1610,13 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp
 
 size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp)
 {
-	size_t ret = fprintf(fp, "Aggregated stats:\n");
+	size_t ret;
+	const char *msg = "";
+
+	if (perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+		msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)";
+
+	ret = fprintf(fp, "Aggregated stats:%s\n", msg);
 
 	ret += events_stats__fprintf(&session->evlist->stats, fp);
 	return ret;
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index d5fa7b7916ef..8a69d3bfafdf 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -15,10 +15,13 @@
 struct ip_callchain;
 struct thread;
 
+struct auxtrace;
+
 struct perf_session {
 	struct perf_header	header;
 	struct machines		machines;
 	struct perf_evlist	*evlist;
+	struct auxtrace		*auxtrace;
 	struct trace_event	tevent;
 	bool			repipe;
 	bool			one_mmap;

From f6986c95af84ff2a76847910b4322f542b793bbf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:49 +0300
Subject: [PATCH 022/305] perf session: Add instruction tracing options

It is assumed that AUX area decoding will synthesize events for
consumption by other tools.

At this time, the main use of AUX area tracing will be to capture
instruction trace (aka processor trace) data.

The nature of instruction tracing suggests the initial inclusion of
options for "instructions" and "branches" events, but more could be
added as needed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-10-git-send-email-adrian.hunter@intel.com
[ Added ref to tools/perf/Documentation/perf-script.txt describing what is parsed ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 136 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h |  43 ++++++++++++
 tools/perf/util/session.h  |   2 +
 3 files changed, 181 insertions(+)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 2cafea2ec015..db40d6c6c010 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -37,6 +37,7 @@
 
 #include "event.h"
 #include "debug.h"
+#include "parse-options.h"
 
 int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
 			struct auxtrace_mmap_params *mp,
@@ -200,6 +201,141 @@ out_free:
 	return err;
 }
 
+#define PERF_ITRACE_DEFAULT_PERIOD_TYPE		PERF_ITRACE_PERIOD_NANOSECS
+#define PERF_ITRACE_DEFAULT_PERIOD		100000
+#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ	16
+#define PERF_ITRACE_MAX_CALLCHAIN_SZ		1024
+
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
+{
+	synth_opts->instructions = true;
+	synth_opts->branches = true;
+	synth_opts->errors = true;
+	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+}
+
+/*
+ * Please check tools/perf/Documentation/perf-script.txt for information
+ * about the options parsed here, which is introduced after this cset,
+ * when support in 'perf script' for these options is introduced.
+ */
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+			    int unset)
+{
+	struct itrace_synth_opts *synth_opts = opt->value;
+	const char *p;
+	char *endptr;
+
+	synth_opts->set = true;
+
+	if (unset) {
+		synth_opts->dont_decode = true;
+		return 0;
+	}
+
+	if (!str) {
+		itrace_synth_opts__set_default(synth_opts);
+		return 0;
+	}
+
+	for (p = str; *p;) {
+		switch (*p++) {
+		case 'i':
+			synth_opts->instructions = true;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				synth_opts->period = strtoull(p, &endptr, 10);
+				p = endptr;
+				while (*p == ' ' || *p == ',')
+					p += 1;
+				switch (*p++) {
+				case 'i':
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_INSTRUCTIONS;
+					break;
+				case 't':
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_TICKS;
+					break;
+				case 'm':
+					synth_opts->period *= 1000;
+					/* Fall through */
+				case 'u':
+					synth_opts->period *= 1000;
+					/* Fall through */
+				case 'n':
+					if (*p++ != 's')
+						goto out_err;
+					synth_opts->period_type =
+						PERF_ITRACE_PERIOD_NANOSECS;
+					break;
+				case '\0':
+					goto out;
+				default:
+					goto out_err;
+				}
+			}
+			break;
+		case 'b':
+			synth_opts->branches = true;
+			break;
+		case 'e':
+			synth_opts->errors = true;
+			break;
+		case 'd':
+			synth_opts->log = true;
+			break;
+		case 'c':
+			synth_opts->branches = true;
+			synth_opts->calls = true;
+			break;
+		case 'r':
+			synth_opts->branches = true;
+			synth_opts->returns = true;
+			break;
+		case 'g':
+			synth_opts->instructions = true;
+			synth_opts->callchain = true;
+			synth_opts->callchain_sz =
+					PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
+			while (*p == ' ' || *p == ',')
+				p += 1;
+			if (isdigit(*p)) {
+				unsigned int val;
+
+				val = strtoul(p, &endptr, 10);
+				p = endptr;
+				if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ)
+					goto out_err;
+				synth_opts->callchain_sz = val;
+			}
+			break;
+		case ' ':
+		case ',':
+			break;
+		default:
+			goto out_err;
+		}
+	}
+out:
+	if (synth_opts->instructions) {
+		if (!synth_opts->period_type)
+			synth_opts->period_type =
+					PERF_ITRACE_DEFAULT_PERIOD_TYPE;
+		if (!synth_opts->period)
+			synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
+	}
+
+	return 0;
+
+out_err:
+	pr_err("Bad Instruction Tracing options '%s'\n", str);
+	return -EINVAL;
+}
+
 int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn)
 {
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 199fc27b3954..6355315a91fb 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -29,9 +29,49 @@ union perf_event;
 struct perf_session;
 struct perf_evlist;
 struct perf_tool;
+struct option;
 struct record_opts;
 struct auxtrace_info_event;
 
+enum itrace_period_type {
+	PERF_ITRACE_PERIOD_INSTRUCTIONS,
+	PERF_ITRACE_PERIOD_TICKS,
+	PERF_ITRACE_PERIOD_NANOSECS,
+};
+
+/**
+ * struct itrace_synth_opts - AUX area tracing synthesis options.
+ * @set: indicates whether or not options have been set
+ * @inject: indicates the event (not just the sample) must be fully synthesized
+ *          because 'perf inject' will write it out
+ * @instructions: whether to synthesize 'instructions' events
+ * @branches: whether to synthesize 'branches' events
+ * @errors: whether to synthesize decoder error events
+ * @dont_decode: whether to skip decoding entirely
+ * @log: write a decoding log
+ * @calls: limit branch samples to calls (can be combined with @returns)
+ * @returns: limit branch samples to returns (can be combined with @calls)
+ * @callchain: add callchain to 'instructions' events
+ * @callchain_sz: maximum callchain size
+ * @period: 'instructions' events period
+ * @period_type: 'instructions' events period type
+ */
+struct itrace_synth_opts {
+	bool			set;
+	bool			inject;
+	bool			instructions;
+	bool			branches;
+	bool			errors;
+	bool			dont_decode;
+	bool			log;
+	bool			calls;
+	bool			returns;
+	bool			callchain;
+	unsigned int		callchain_sz;
+	unsigned long long	period;
+	enum itrace_period_type	period_type;
+};
+
 /**
  * struct auxtrace - session callbacks to allow AUX area data decoding.
  * @process_event: lets the decoder see all session events
@@ -186,6 +226,9 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
 					 struct perf_tool *tool,
 					 struct perf_session *session,
 					 perf_event__handler_t process);
+int itrace_parse_synth_opts(const struct option *opt, const char *str,
+			    int unset);
+void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
 
 static inline int auxtrace__process_event(struct perf_session *session,
 					  union perf_event *event,
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 8a69d3bfafdf..9ed51353a3d7 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -16,12 +16,14 @@ struct ip_callchain;
 struct thread;
 
 struct auxtrace;
+struct itrace_synth_opts;
 
 struct perf_session {
 	struct perf_header	header;
 	struct machines		machines;
 	struct perf_evlist	*evlist;
 	struct auxtrace		*auxtrace;
+	struct itrace_synth_opts *itrace_synth_opts;
 	struct trace_event	tevent;
 	bool			repipe;
 	bool			one_mmap;

From 85ed47299e979b861e314c2e177a6de5d9163a85 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:50 +0300
Subject: [PATCH 023/305] perf auxtrace: Add helpers for AUX area tracing
 errors

Add functions to synthesize, count and print AUX area tracing error
events.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-11-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 81 ++++++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h | 14 +++++++
 tools/perf/util/event.h    |  6 +++
 tools/perf/util/session.c  |  3 ++
 4 files changed, 104 insertions(+)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index db40d6c6c010..905188bd1dfa 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -24,6 +24,7 @@
 #include <linux/log2.h>
 
 #include <stdlib.h>
+#include <stdio.h>
 #include <string.h>
 #include <errno.h>
 
@@ -36,6 +37,7 @@
 #include "auxtrace.h"
 
 #include "event.h"
+#include "session.h"
 #include "debug.h"
 #include "parse-options.h"
 
@@ -172,6 +174,28 @@ auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
 	return NULL;
 }
 
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+			  const char *msg)
+{
+	size_t size;
+
+	memset(auxtrace_error, 0, sizeof(struct auxtrace_error_event));
+
+	auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR;
+	auxtrace_error->type = type;
+	auxtrace_error->code = code;
+	auxtrace_error->cpu = cpu;
+	auxtrace_error->pid = pid;
+	auxtrace_error->tid = tid;
+	auxtrace_error->ip = ip;
+	strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG);
+
+	size = (void *)auxtrace_error->msg - (void *)auxtrace_error +
+	       strlen(auxtrace_error->msg) + 1;
+	auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64));
+}
+
 int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
 					 struct perf_tool *tool,
 					 struct perf_session *session,
@@ -336,6 +360,63 @@ out_err:
 	return -EINVAL;
 }
 
+static const char * const auxtrace_error_type_name[] = {
+	[PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace",
+};
+
+static const char *auxtrace_error_name(int type)
+{
+	const char *error_type_name = NULL;
+
+	if (type < PERF_AUXTRACE_ERROR_MAX)
+		error_type_name = auxtrace_error_type_name[type];
+	if (!error_type_name)
+		error_type_name = "unknown AUX";
+	return error_type_name;
+}
+
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp)
+{
+	struct auxtrace_error_event *e = &event->auxtrace_error;
+	int ret;
+
+	ret = fprintf(fp, " %s error type %u",
+		      auxtrace_error_name(e->type), e->type);
+	ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n",
+		       e->cpu, e->pid, e->tid, e->ip, e->code, e->msg);
+	return ret;
+}
+
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+				      union perf_event *event)
+{
+	struct auxtrace_error_event *e = &event->auxtrace_error;
+
+	if (e->type < PERF_AUXTRACE_ERROR_MAX)
+		session->evlist->stats.nr_auxtrace_errors[e->type] += 1;
+}
+
+void events_stats__auxtrace_error_warn(const struct events_stats *stats)
+{
+	int i;
+
+	for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) {
+		if (!stats->nr_auxtrace_errors[i])
+			continue;
+		ui__warning("%u %s errors\n",
+			    stats->nr_auxtrace_errors[i],
+			    auxtrace_error_name(i));
+	}
+}
+
+int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
+				       union perf_event *event,
+				       struct perf_session *session __maybe_unused)
+{
+	perf_event__fprintf_auxtrace_error(event, stdout);
+	return 0;
+}
+
 int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn)
 {
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 6355315a91fb..bcfb9288bfea 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -23,6 +23,7 @@
 #include <linux/types.h>
 
 #include "../perf.h"
+#include "event.h"
 #include "session.h"
 
 union perf_event;
@@ -32,6 +33,7 @@ struct perf_tool;
 struct option;
 struct record_opts;
 struct auxtrace_info_event;
+struct events_stats;
 
 enum itrace_period_type {
 	PERF_ITRACE_PERIOD_INSTRUCTIONS,
@@ -222,14 +224,26 @@ int auxtrace_record__info_fill(struct auxtrace_record *itr,
 void auxtrace_record__free(struct auxtrace_record *itr);
 u64 auxtrace_record__reference(struct auxtrace_record *itr);
 
+void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
+			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
+			  const char *msg);
+
 int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
 					 struct perf_tool *tool,
 					 struct perf_session *session,
 					 perf_event__handler_t process);
+int perf_event__process_auxtrace_error(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_session *session);
 int itrace_parse_synth_opts(const struct option *opt, const char *str,
 			    int unset);
 void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts);
 
+size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp);
+void perf_session__auxtrace_error_inc(struct perf_session *session,
+				      union perf_event *event);
+void events_stats__auxtrace_error_warn(const struct events_stats *stats);
+
 static inline int auxtrace__process_event(struct perf_session *session,
 					  union perf_event *event,
 					  struct perf_sample *sample,
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 657dcfaf0389..8ef37251a7a9 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -221,6 +221,11 @@ enum perf_user_event_type { /* above any possible kernel type */
 	PERF_RECORD_HEADER_MAX
 };
 
+enum auxtrace_error_type {
+	PERF_AUXTRACE_ERROR_ITRACE  = 1,
+	PERF_AUXTRACE_ERROR_MAX
+};
+
 /*
  * The kernel collects the number of events it couldn't send in a stretch and
  * when possible sends this number in a PERF_RECORD_LOST event. The number of
@@ -245,6 +250,7 @@ struct events_stats {
 	u32 nr_invalid_chains;
 	u32 nr_unknown_id;
 	u32 nr_unprocessable_samples;
+	u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
 };
 
 struct attr_event {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 7271c6bcb2dc..1db3ce118a54 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1093,6 +1093,7 @@ static s64 perf_session__process_user_event(struct perf_session *session,
 		lseek(fd, file_offset + event->header.size, SEEK_SET);
 		return tool->auxtrace(tool, event, session);
 	case PERF_RECORD_AUXTRACE_ERROR:
+		perf_session__auxtrace_error_inc(session, event);
 		return tool->auxtrace_error(tool, event, session);
 	default:
 		return -EINVAL;
@@ -1282,6 +1283,8 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 
 	if (oe->nr_unordered_events != 0)
 		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
+
+	events_stats__auxtrace_error_warn(stats);
 }
 
 volatile int session_done;

From e502789302a6ece9fa4b9505df234c319bfa0650 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 21 Apr 2015 12:21:51 +0300
Subject: [PATCH 024/305] perf auxtrace: Add helpers for queuing AUX area
 tracing data

Provide functions to queue AUX area tracing data buffers for processing.
A AUX area decoder need not use the queues, however Intel BTS and Intel
PT will use them.

There is one queue for each of the mmap buffers that were used for
recording.  Because those mmaps were associated with per-cpu or
per-thread contexts, the data is time-ordered with respect to those
contexts.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429608111-18160-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 305 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h |  87 +++++++++++
 2 files changed, 392 insertions(+)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 905188bd1dfa..252417ac28e2 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -22,11 +22,15 @@
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/string.h>
 
+#include <sys/param.h>
 #include <stdlib.h>
 #include <stdio.h>
 #include <string.h>
+#include <limits.h>
 #include <errno.h>
+#include <linux/list.h>
 
 #include "../perf.h"
 #include "util.h"
@@ -122,6 +126,241 @@ void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
 	}
 }
 
+#define AUXTRACE_INIT_NR_QUEUES	32
+
+static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues)
+{
+	struct auxtrace_queue *queue_array;
+	unsigned int max_nr_queues, i;
+
+	max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue);
+	if (nr_queues > max_nr_queues)
+		return NULL;
+
+	queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue));
+	if (!queue_array)
+		return NULL;
+
+	for (i = 0; i < nr_queues; i++) {
+		INIT_LIST_HEAD(&queue_array[i].head);
+		queue_array[i].priv = NULL;
+	}
+
+	return queue_array;
+}
+
+int auxtrace_queues__init(struct auxtrace_queues *queues)
+{
+	queues->nr_queues = AUXTRACE_INIT_NR_QUEUES;
+	queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues);
+	if (!queues->queue_array)
+		return -ENOMEM;
+	return 0;
+}
+
+static int auxtrace_queues__grow(struct auxtrace_queues *queues,
+				 unsigned int new_nr_queues)
+{
+	unsigned int nr_queues = queues->nr_queues;
+	struct auxtrace_queue *queue_array;
+	unsigned int i;
+
+	if (!nr_queues)
+		nr_queues = AUXTRACE_INIT_NR_QUEUES;
+
+	while (nr_queues && nr_queues < new_nr_queues)
+		nr_queues <<= 1;
+
+	if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues)
+		return -EINVAL;
+
+	queue_array = auxtrace_alloc_queue_array(nr_queues);
+	if (!queue_array)
+		return -ENOMEM;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		list_splice_tail(&queues->queue_array[i].head,
+				 &queue_array[i].head);
+		queue_array[i].priv = queues->queue_array[i].priv;
+	}
+
+	queues->nr_queues = nr_queues;
+	queues->queue_array = queue_array;
+
+	return 0;
+}
+
+static void *auxtrace_copy_data(u64 size, struct perf_session *session)
+{
+	int fd = perf_data_file__fd(session->file);
+	void *p;
+	ssize_t ret;
+
+	if (size > SSIZE_MAX)
+		return NULL;
+
+	p = malloc(size);
+	if (!p)
+		return NULL;
+
+	ret = readn(fd, p, size);
+	if (ret != (ssize_t)size) {
+		free(p);
+		return NULL;
+	}
+
+	return p;
+}
+
+static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues,
+				       unsigned int idx,
+				       struct auxtrace_buffer *buffer)
+{
+	struct auxtrace_queue *queue;
+	int err;
+
+	if (idx >= queues->nr_queues) {
+		err = auxtrace_queues__grow(queues, idx + 1);
+		if (err)
+			return err;
+	}
+
+	queue = &queues->queue_array[idx];
+
+	if (!queue->set) {
+		queue->set = true;
+		queue->tid = buffer->tid;
+		queue->cpu = buffer->cpu;
+	} else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) {
+		pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n",
+		       queue->cpu, queue->tid, buffer->cpu, buffer->tid);
+		return -EINVAL;
+	}
+
+	buffer->buffer_nr = queues->next_buffer_nr++;
+
+	list_add_tail(&buffer->list, &queue->head);
+
+	queues->new_data = true;
+	queues->populated = true;
+
+	return 0;
+}
+
+/* Limit buffers to 32MiB on 32-bit */
+#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024)
+
+static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues,
+					 unsigned int idx,
+					 struct auxtrace_buffer *buffer)
+{
+	u64 sz = buffer->size;
+	bool consecutive = false;
+	struct auxtrace_buffer *b;
+	int err;
+
+	while (sz > BUFFER_LIMIT_FOR_32_BIT) {
+		b = memdup(buffer, sizeof(struct auxtrace_buffer));
+		if (!b)
+			return -ENOMEM;
+		b->size = BUFFER_LIMIT_FOR_32_BIT;
+		b->consecutive = consecutive;
+		err = auxtrace_queues__add_buffer(queues, idx, b);
+		if (err) {
+			auxtrace_buffer__free(b);
+			return err;
+		}
+		buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT;
+		sz -= BUFFER_LIMIT_FOR_32_BIT;
+		consecutive = true;
+	}
+
+	buffer->size = sz;
+	buffer->consecutive = consecutive;
+
+	return 0;
+}
+
+static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues,
+					     struct perf_session *session,
+					     unsigned int idx,
+					     struct auxtrace_buffer *buffer)
+{
+	if (session->one_mmap) {
+		buffer->data = buffer->data_offset - session->one_mmap_offset +
+			       session->one_mmap_addr;
+	} else if (perf_data_file__is_pipe(session->file)) {
+		buffer->data = auxtrace_copy_data(buffer->size, session);
+		if (!buffer->data)
+			return -ENOMEM;
+		buffer->data_needs_freeing = true;
+	} else if (BITS_PER_LONG == 32 &&
+		   buffer->size > BUFFER_LIMIT_FOR_32_BIT) {
+		int err;
+
+		err = auxtrace_queues__split_buffer(queues, idx, buffer);
+		if (err)
+			return err;
+	}
+
+	return auxtrace_queues__add_buffer(queues, idx, buffer);
+}
+
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+			       struct perf_session *session,
+			       union perf_event *event, off_t data_offset,
+			       struct auxtrace_buffer **buffer_ptr)
+{
+	struct auxtrace_buffer *buffer;
+	unsigned int idx;
+	int err;
+
+	buffer = zalloc(sizeof(struct auxtrace_buffer));
+	if (!buffer)
+		return -ENOMEM;
+
+	buffer->pid = -1;
+	buffer->tid = event->auxtrace.tid;
+	buffer->cpu = event->auxtrace.cpu;
+	buffer->data_offset = data_offset;
+	buffer->offset = event->auxtrace.offset;
+	buffer->reference = event->auxtrace.reference;
+	buffer->size = event->auxtrace.size;
+	idx = event->auxtrace.idx;
+
+	err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer);
+	if (err)
+		goto out_err;
+
+	if (buffer_ptr)
+		*buffer_ptr = buffer;
+
+	return 0;
+
+out_err:
+	auxtrace_buffer__free(buffer);
+	return err;
+}
+
+void auxtrace_queues__free(struct auxtrace_queues *queues)
+{
+	unsigned int i;
+
+	for (i = 0; i < queues->nr_queues; i++) {
+		while (!list_empty(&queues->queue_array[i].head)) {
+			struct auxtrace_buffer *buffer;
+
+			buffer = list_entry(queues->queue_array[i].head.next,
+					    struct auxtrace_buffer, list);
+			list_del(&buffer->list);
+			auxtrace_buffer__free(buffer);
+		}
+	}
+
+	zfree(&queues->queue_array);
+	queues->nr_queues = 0;
+}
+
 size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
 {
 	if (itr)
@@ -174,6 +413,72 @@ auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
 	return NULL;
 }
 
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+					      struct auxtrace_buffer *buffer)
+{
+	if (buffer) {
+		if (list_is_last(&buffer->list, &queue->head))
+			return NULL;
+		return list_entry(buffer->list.next, struct auxtrace_buffer,
+				  list);
+	} else {
+		if (list_empty(&queue->head))
+			return NULL;
+		return list_entry(queue->head.next, struct auxtrace_buffer,
+				  list);
+	}
+}
+
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd)
+{
+	size_t adj = buffer->data_offset & (page_size - 1);
+	size_t size = buffer->size + adj;
+	off_t file_offset = buffer->data_offset - adj;
+	void *addr;
+
+	if (buffer->data)
+		return buffer->data;
+
+	addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset);
+	if (addr == MAP_FAILED)
+		return NULL;
+
+	buffer->mmap_addr = addr;
+	buffer->mmap_size = size;
+
+	buffer->data = addr + adj;
+
+	return buffer->data;
+}
+
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer)
+{
+	if (!buffer->data || !buffer->mmap_addr)
+		return;
+	munmap(buffer->mmap_addr, buffer->mmap_size);
+	buffer->mmap_addr = NULL;
+	buffer->mmap_size = 0;
+	buffer->data = NULL;
+	buffer->use_data = NULL;
+}
+
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer)
+{
+	auxtrace_buffer__put_data(buffer);
+	if (buffer->data_needs_freeing) {
+		buffer->data_needs_freeing = false;
+		zfree(&buffer->data);
+		buffer->use_data = NULL;
+		buffer->size = 0;
+	}
+}
+
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer)
+{
+	auxtrace_buffer__drop_data(buffer);
+	free(buffer);
+}
+
 void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
 			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
 			  const char *msg)
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index bcfb9288bfea..c6b5981384de 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -19,6 +19,7 @@
 #include <sys/types.h>
 #include <stdbool.h>
 #include <stddef.h>
+#include <linux/list.h>
 #include <linux/perf_event.h>
 #include <linux/types.h>
 
@@ -92,6 +93,80 @@ struct auxtrace {
 	void (*free)(struct perf_session *session);
 };
 
+/**
+ * struct auxtrace_buffer - a buffer containing AUX area tracing data.
+ * @list: buffers are queued in a list held by struct auxtrace_queue
+ * @size: size of the buffer in bytes
+ * @pid: in per-thread mode, the pid this buffer is associated with
+ * @tid: in per-thread mode, the tid this buffer is associated with
+ * @cpu: in per-cpu mode, the cpu this buffer is associated with
+ * @data: actual buffer data (can be null if the data has not been loaded)
+ * @data_offset: file offset at which the buffer can be read
+ * @mmap_addr: mmap address at which the buffer can be read
+ * @mmap_size: size of the mmap at @mmap_addr
+ * @data_needs_freeing: @data was malloc'd so free it when it is no longer
+ *                      needed
+ * @consecutive: the original data was split up and this buffer is consecutive
+ *               to the previous buffer
+ * @offset: offset as determined by aux_head / aux_tail members of struct
+ *          perf_event_mmap_page
+ * @reference: an implementation-specific reference determined when the data is
+ *             recorded
+ * @buffer_nr: used to number each buffer
+ * @use_size: implementation actually only uses this number of bytes
+ * @use_data: implementation actually only uses data starting at this address
+ */
+struct auxtrace_buffer {
+	struct list_head	list;
+	size_t			size;
+	pid_t			pid;
+	pid_t			tid;
+	int			cpu;
+	void			*data;
+	off_t			data_offset;
+	void			*mmap_addr;
+	size_t			mmap_size;
+	bool			data_needs_freeing;
+	bool			consecutive;
+	u64			offset;
+	u64			reference;
+	u64			buffer_nr;
+	size_t			use_size;
+	void			*use_data;
+};
+
+/**
+ * struct auxtrace_queue - a queue of AUX area tracing data buffers.
+ * @head: head of buffer list
+ * @tid: in per-thread mode, the tid this queue is associated with
+ * @cpu: in per-cpu mode, the cpu this queue is associated with
+ * @set: %true once this queue has been dedicated to a specific thread or cpu
+ * @priv: implementation-specific data
+ */
+struct auxtrace_queue {
+	struct list_head	head;
+	pid_t			tid;
+	int			cpu;
+	bool			set;
+	void			*priv;
+};
+
+/**
+ * struct auxtrace_queues - an array of AUX area tracing queues.
+ * @queue_array: array of queues
+ * @nr_queues: number of queues
+ * @new_data: set whenever new data is queued
+ * @populated: queues have been fully populated using the auxtrace_index
+ * @next_buffer_nr: used to number each buffer
+ */
+struct auxtrace_queues {
+	struct auxtrace_queue	*queue_array;
+	unsigned int		nr_queues;
+	bool			new_data;
+	bool			populated;
+	u64			next_buffer_nr;
+};
+
 /**
  * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
  * @base: address of mapped area
@@ -210,6 +285,18 @@ typedef int (*process_auxtrace_t)(struct perf_tool *tool,
 int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn);
 
+int auxtrace_queues__init(struct auxtrace_queues *queues);
+int auxtrace_queues__add_event(struct auxtrace_queues *queues,
+			       struct perf_session *session,
+			       union perf_event *event, off_t data_offset,
+			       struct auxtrace_buffer **buffer_ptr);
+void auxtrace_queues__free(struct auxtrace_queues *queues);
+struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
+					      struct auxtrace_buffer *buffer);
+void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
+void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
+void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
 struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
 					      int *err);
 

From f939715586ea4f046eb52523ae25eb4d20b2a497 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:52 +0300
Subject: [PATCH 025/305] perf auxtrace: Add a heap for sorting AUX area
 tracing queues

In order to process AUX area tracing data in time order, the queue with
data with the lowest timestamp must be processed first.  Provide a heap
to keep track of which queue that is.

As with the queues, a decoder does not have to use the heap, but Intel
BTS and Intel PT will use it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-13-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 85 ++++++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h | 29 +++++++++++++
 2 files changed, 114 insertions(+)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 252417ac28e2..e13b1a14c859 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -361,6 +361,91 @@ void auxtrace_queues__free(struct auxtrace_queues *queues)
 	queues->nr_queues = 0;
 }
 
+static void auxtrace_heapify(struct auxtrace_heap_item *heap_array,
+			     unsigned int pos, unsigned int queue_nr,
+			     u64 ordinal)
+{
+	unsigned int parent;
+
+	while (pos) {
+		parent = (pos - 1) >> 1;
+		if (heap_array[parent].ordinal <= ordinal)
+			break;
+		heap_array[pos] = heap_array[parent];
+		pos = parent;
+	}
+	heap_array[pos].queue_nr = queue_nr;
+	heap_array[pos].ordinal = ordinal;
+}
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+		       u64 ordinal)
+{
+	struct auxtrace_heap_item *heap_array;
+
+	if (queue_nr >= heap->heap_sz) {
+		unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES;
+
+		while (heap_sz <= queue_nr)
+			heap_sz <<= 1;
+		heap_array = realloc(heap->heap_array,
+				     heap_sz * sizeof(struct auxtrace_heap_item));
+		if (!heap_array)
+			return -ENOMEM;
+		heap->heap_array = heap_array;
+		heap->heap_sz = heap_sz;
+	}
+
+	auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal);
+
+	return 0;
+}
+
+void auxtrace_heap__free(struct auxtrace_heap *heap)
+{
+	zfree(&heap->heap_array);
+	heap->heap_cnt = 0;
+	heap->heap_sz = 0;
+}
+
+void auxtrace_heap__pop(struct auxtrace_heap *heap)
+{
+	unsigned int pos, last, heap_cnt = heap->heap_cnt;
+	struct auxtrace_heap_item *heap_array;
+
+	if (!heap_cnt)
+		return;
+
+	heap->heap_cnt -= 1;
+
+	heap_array = heap->heap_array;
+
+	pos = 0;
+	while (1) {
+		unsigned int left, right;
+
+		left = (pos << 1) + 1;
+		if (left >= heap_cnt)
+			break;
+		right = left + 1;
+		if (right >= heap_cnt) {
+			heap_array[pos] = heap_array[left];
+			return;
+		}
+		if (heap_array[left].ordinal < heap_array[right].ordinal) {
+			heap_array[pos] = heap_array[left];
+			pos = left;
+		} else {
+			heap_array[pos] = heap_array[right];
+			pos = right;
+		}
+	}
+
+	last = heap_cnt - 1;
+	auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr,
+			 heap_array[last].ordinal);
+}
+
 size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr)
 {
 	if (itr)
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index c6b5981384de..c3514f3b7111 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -167,6 +167,29 @@ struct auxtrace_queues {
 	u64			next_buffer_nr;
 };
 
+/**
+ * struct auxtrace_heap_item - element of struct auxtrace_heap.
+ * @queue_nr: queue number
+ * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected
+ *           to be a timestamp
+ */
+struct auxtrace_heap_item {
+	unsigned int		queue_nr;
+	u64			ordinal;
+};
+
+/**
+ * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues.
+ * @heap_array: the heap
+ * @heap_cnt: the number of elements in the heap
+ * @heap_sz: maximum number of elements (grows as needed)
+ */
+struct auxtrace_heap {
+	struct auxtrace_heap_item	*heap_array;
+	unsigned int		heap_cnt;
+	unsigned int		heap_sz;
+};
+
 /**
  * struct auxtrace_mmap - records an mmap of the auxtrace buffer.
  * @base: address of mapped area
@@ -297,6 +320,12 @@ void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
 void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer);
 void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer);
 void auxtrace_buffer__free(struct auxtrace_buffer *buffer);
+
+int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
+		       u64 ordinal);
+void auxtrace_heap__pop(struct auxtrace_heap *heap);
+void auxtrace_heap__free(struct auxtrace_heap *heap);
+
 struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
 					      int *err);
 

From 73f75fb1bdc572f97f1eb375f2bbb68cfcba077c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:53 +0300
Subject: [PATCH 026/305] perf auxtrace: Add processing for AUX area tracing
 events

Provide hooks so that an AUX area decoder can process AUX area tracing
events.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-14-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 52 +++++++++++++++++++++++++++++++++++++-
 tools/perf/util/auxtrace.h | 13 ++++++++++
 2 files changed, 64 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index e13b1a14c859..c4515e1a9d7f 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -615,6 +615,53 @@ out_free:
 	return err;
 }
 
+static bool auxtrace__dont_decode(struct perf_session *session)
+{
+	return !session->itrace_synth_opts ||
+	       session->itrace_synth_opts->dont_decode;
+}
+
+int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
+				      union perf_event *event,
+				      struct perf_session *session __maybe_unused)
+{
+	enum auxtrace_type type = event->auxtrace_info.type;
+
+	if (dump_trace)
+		fprintf(stdout, " type: %u\n", type);
+
+	switch (type) {
+	case PERF_AUXTRACE_UNKNOWN:
+	default:
+		return -EINVAL;
+	}
+}
+
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session)
+{
+	s64 err;
+
+	if (dump_trace)
+		fprintf(stdout, " size: %#"PRIx64"  offset: %#"PRIx64"  ref: %#"PRIx64"  idx: %u  tid: %d  cpu: %d\n",
+			event->auxtrace.size, event->auxtrace.offset,
+			event->auxtrace.reference, event->auxtrace.idx,
+			event->auxtrace.tid, event->auxtrace.cpu);
+
+	if (auxtrace__dont_decode(session))
+		return event->auxtrace.size;
+
+	if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE)
+		return -EINVAL;
+
+	err = session->auxtrace->process_auxtrace_event(session, event, tool);
+	if (err < 0)
+		return err;
+
+	return event->auxtrace.size;
+}
+
 #define PERF_ITRACE_DEFAULT_PERIOD_TYPE		PERF_ITRACE_PERIOD_NANOSECS
 #define PERF_ITRACE_DEFAULT_PERIOD		100000
 #define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ	16
@@ -801,8 +848,11 @@ void events_stats__auxtrace_error_warn(const struct events_stats *stats)
 
 int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
 				       union perf_event *event,
-				       struct perf_session *session __maybe_unused)
+				       struct perf_session *session)
 {
+	if (auxtrace__dont_decode(session))
+		return 0;
+
 	perf_event__fprintf_auxtrace_error(event, stdout);
 	return 0;
 }
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index c3514f3b7111..ba78d825bf73 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -36,6 +36,10 @@ struct record_opts;
 struct auxtrace_info_event;
 struct events_stats;
 
+enum auxtrace_type {
+	PERF_AUXTRACE_UNKNOWN,
+};
+
 enum itrace_period_type {
 	PERF_ITRACE_PERIOD_INSTRUCTIONS,
 	PERF_ITRACE_PERIOD_TICKS,
@@ -87,6 +91,9 @@ struct auxtrace {
 			     union perf_event *event,
 			     struct perf_sample *sample,
 			     struct perf_tool *tool);
+	int (*process_auxtrace_event)(struct perf_session *session,
+				      union perf_event *event,
+				      struct perf_tool *tool);
 	int (*flush_events)(struct perf_session *session,
 			    struct perf_tool *tool);
 	void (*free_events)(struct perf_session *session);
@@ -348,6 +355,12 @@ int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr,
 					 struct perf_tool *tool,
 					 struct perf_session *session,
 					 perf_event__handler_t process);
+int perf_event__process_auxtrace_info(struct perf_tool *tool,
+				      union perf_event *event,
+				      struct perf_session *session);
+s64 perf_event__process_auxtrace(struct perf_tool *tool,
+				 union perf_event *event,
+				 struct perf_session *session);
 int perf_event__process_auxtrace_error(struct perf_tool *tool,
 				       union perf_event *event,
 				       struct perf_session *session);

From c3278f0231e833c6030c3aeab510415fdbbfd353 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:54 +0300
Subject: [PATCH 027/305] perf auxtrace: Add a hashtable for caching

Decoding AUX area data may involve walking object code.  Rather than
repetitively decoding the same instructions, a cache can be used to
cache the results.

This patch implements a fairly generic hashtable with a 32-bit key that
could be used for other purposes as well.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-15-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/auxtrace.c | 123 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h |  14 +++++
 2 files changed, 137 insertions(+)

diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index c4515e1a9d7f..3cd89eca1e88 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -40,6 +40,8 @@
 #include "asm/bug.h"
 #include "auxtrace.h"
 
+#include <linux/hash.h>
+
 #include "event.h"
 #include "session.h"
 #include "debug.h"
@@ -944,3 +946,124 @@ int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
 
 	return 1;
 }
+
+/**
+ * struct auxtrace_cache - hash table to implement a cache
+ * @hashtable: the hashtable
+ * @sz: hashtable size (number of hlists)
+ * @entry_size: size of an entry
+ * @limit: limit the number of entries to this maximum, when reached the cache
+ *         is dropped and caching begins again with an empty cache
+ * @cnt: current number of entries
+ * @bits: hashtable size (@sz = 2^@bits)
+ */
+struct auxtrace_cache {
+	struct hlist_head *hashtable;
+	size_t sz;
+	size_t entry_size;
+	size_t limit;
+	size_t cnt;
+	unsigned int bits;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+					   unsigned int limit_percent)
+{
+	struct auxtrace_cache *c;
+	struct hlist_head *ht;
+	size_t sz, i;
+
+	c = zalloc(sizeof(struct auxtrace_cache));
+	if (!c)
+		return NULL;
+
+	sz = 1UL << bits;
+
+	ht = calloc(sz, sizeof(struct hlist_head));
+	if (!ht)
+		goto out_free;
+
+	for (i = 0; i < sz; i++)
+		INIT_HLIST_HEAD(&ht[i]);
+
+	c->hashtable = ht;
+	c->sz = sz;
+	c->entry_size = entry_size;
+	c->limit = (c->sz * limit_percent) / 100;
+	c->bits = bits;
+
+	return c;
+
+out_free:
+	free(c);
+	return NULL;
+}
+
+static void auxtrace_cache__drop(struct auxtrace_cache *c)
+{
+	struct auxtrace_cache_entry *entry;
+	struct hlist_node *tmp;
+	size_t i;
+
+	if (!c)
+		return;
+
+	for (i = 0; i < c->sz; i++) {
+		hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) {
+			hlist_del(&entry->hash);
+			auxtrace_cache__free_entry(c, entry);
+		}
+	}
+
+	c->cnt = 0;
+}
+
+void auxtrace_cache__free(struct auxtrace_cache *c)
+{
+	if (!c)
+		return;
+
+	auxtrace_cache__drop(c);
+	free(c->hashtable);
+	free(c);
+}
+
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c)
+{
+	return malloc(c->entry_size);
+}
+
+void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused,
+				void *entry)
+{
+	free(entry);
+}
+
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+			struct auxtrace_cache_entry *entry)
+{
+	if (c->limit && ++c->cnt > c->limit)
+		auxtrace_cache__drop(c);
+
+	entry->key = key;
+	hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]);
+
+	return 0;
+}
+
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key)
+{
+	struct auxtrace_cache_entry *entry;
+	struct hlist_head *hlist;
+
+	if (!c)
+		return NULL;
+
+	hlist = &c->hashtable[hash_32(key, c->bits)];
+	hlist_for_each_entry(entry, hlist, hash) {
+		if (entry->key == key)
+			return entry;
+	}
+
+	return NULL;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index ba78d825bf73..53b60a64a693 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -333,6 +333,20 @@ int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr,
 void auxtrace_heap__pop(struct auxtrace_heap *heap);
 void auxtrace_heap__free(struct auxtrace_heap *heap);
 
+struct auxtrace_cache_entry {
+	struct hlist_node hash;
+	u32 key;
+};
+
+struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size,
+					   unsigned int limit_percent);
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache);
+void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c);
+void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry);
+int auxtrace_cache__add(struct auxtrace_cache *c, u32 key,
+			struct auxtrace_cache_entry *entry);
+void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
+
 struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
 					      int *err);
 

From cfe9174fcfe28f0f73dce422e3151991ee0d49bf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:55 +0300
Subject: [PATCH 028/305] perf tools: Add member to struct dso for an
 instruction cache

Add a member to struct dso that can be used by Instruction Trace
implementations to hold a cache for decoded instructions.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-16-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c | 2 ++
 tools/perf/util/dso.h | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index fc0ddd5792a9..13d9ae0bd15c 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -4,6 +4,7 @@
 #include "symbol.h"
 #include "dso.h"
 #include "machine.h"
+#include "auxtrace.h"
 #include "util.h"
 #include "debug.h"
 
@@ -961,6 +962,7 @@ void dso__delete(struct dso *dso)
 	}
 
 	dso__data_close(dso);
+	auxtrace_cache__free(dso->auxtrace_cache);
 	dso_cache__free(&dso->data.cache);
 	dso__free_a2l(dso);
 	zfree(&dso->symsrc_filename);
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index e0901b4ed8de..3d79c749934c 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -126,6 +126,8 @@ struct dsos {
 	struct rb_root	 root;	/* rbtree root sorted by long name */
 };
 
+struct auxtrace_cache;
+
 struct dso {
 	struct list_head node;
 	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
@@ -156,6 +158,7 @@ struct dso {
 	u16		 long_name_len;
 	u16		 short_name_len;
 	void		*dwfl;			/* DWARF debug info */
+	struct auxtrace_cache *auxtrace_cache;
 
 	/* dso data file */
 	struct {

From 7a680eb990b6646ecadf22dca7ded16a33f731b9 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:53:56 +0300
Subject: [PATCH 029/305] perf script: Add Instruction Tracing support

Add support for decoding an AUX area assuming it contains instruction
tracing data.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-17-git-send-email-adrian.hunter@intel.com
[ Do not use -Z as an alternative to --itrace ]
[ Fixed initialization of itrace_synth_opts struct fields on older gcc versions ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt | 27 ++++++++++++++++++++++++
 tools/perf/builtin-script.c              | 11 ++++++++++
 2 files changed, 38 insertions(+)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 79445750fcb3..05df64804def 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -221,6 +221,33 @@ OPTIONS
 --header-only
 	Show only perf.data header.
 
+--itrace::
+	Options for decoding instruction tracing data. The options are:
+
+		i	synthesize instructions events
+		b	synthesize branches events
+		c	synthesize branches events (calls only)
+		r	synthesize branches events (returns only)
+		e	synthesize error events
+		d	create a debug log
+		g	synthesize a call chain for instructions events
+
+	The default is all events i.e. the same as --itrace=ibe
+
+	In addition, the period (default 100000) for instructions events
+	can be specified in units of:
+
+		i	instructions
+		t	ticks
+		ms	milliseconds
+		us	microseconds
+		ns	nanoseconds (default)
+
+	Also the call chain size (default 16, max. 1024) for instructions
+	events can be specified.
+
+	To disable decoding entirely, use --no-itrace.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-script-perl[1],
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 58f10b8e6ff2..7682665456fe 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -16,6 +16,7 @@
 #include "util/evsel.h"
 #include "util/sort.h"
 #include "util/data.h"
+#include "util/auxtrace.h"
 #include <linux/bitmap.h>
 
 static char const		*script_name;
@@ -1497,6 +1498,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	char *rec_script_path = NULL;
 	char *rep_script_path = NULL;
 	struct perf_session *session;
+	struct itrace_synth_opts itrace_synth_opts = { .set = false, };
 	char *script_path = NULL;
 	const char **__argv;
 	int i, j, err = 0;
@@ -1511,6 +1513,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 			.attr		 = process_attr,
 			.tracing_data	 = perf_event__process_tracing_data,
 			.build_id	 = perf_event__process_build_id,
+			.id_index	 = perf_event__process_id_index,
+			.auxtrace_info	 = perf_event__process_auxtrace_info,
+			.auxtrace	 = perf_event__process_auxtrace,
+			.auxtrace_error	 = perf_event__process_auxtrace_error,
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
 		},
@@ -1570,6 +1576,9 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events,
 		    "Show the mmap events"),
 	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+			    "Instruction Tracing options",
+			    itrace_parse_synth_opts),
 	OPT_END()
 	};
 	const char * const script_subcommands[] = { "record", "report", NULL };
@@ -1765,6 +1774,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	script.session = session;
 
+	session->itrace_synth_opts = &itrace_synth_opts;
+
 	if (cpu_list) {
 		err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap);
 		if (err < 0)

From cd17a9b54463970a47fec33d94f220950c9990c4 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 21 Apr 2015 12:21:54 +0300
Subject: [PATCH 030/305] perf inject: Re-pipe AUX area tracing events

New AUX area tracing events must be re-piped by default.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429608114-18194-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-inject.c | 63 ++++++++++++++++++++++++++++++++++---
 1 file changed, 58 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 40a33d7334cc..8f4cad7e9fe7 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -38,14 +38,11 @@ struct event_entry {
 	union perf_event event[0];
 };
 
-static int perf_event__repipe_synth(struct perf_tool *tool,
-				    union perf_event *event)
+static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
 {
-	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	ssize_t size;
 
-	size = perf_data_file__write(&inject->output, event,
-				     event->header.size);
+	size = perf_data_file__write(&inject->output, buf, sz);
 	if (size < 0)
 		return -errno;
 
@@ -53,6 +50,34 @@ static int perf_event__repipe_synth(struct perf_tool *tool,
 	return 0;
 }
 
+static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
+{
+	char buf[4096];
+	ssize_t ssz;
+	int ret;
+
+	while (size > 0) {
+		ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
+		if (ssz < 0)
+			return -errno;
+		ret = output_bytes(inject, buf, ssz);
+		if (ret)
+			return ret;
+		size -= ssz;
+	}
+
+	return 0;
+}
+
+static int perf_event__repipe_synth(struct perf_tool *tool,
+				    union perf_event *event)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject,
+						  tool);
+
+	return output_bytes(inject, event, event->header.size);
+}
+
 static int perf_event__repipe_oe_synth(struct perf_tool *tool,
 				       union perf_event *event,
 				       struct ordered_events *oe __maybe_unused)
@@ -86,6 +111,31 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
+static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_session *session
+				       __maybe_unused)
+{
+	struct perf_inject *inject = container_of(tool, struct perf_inject,
+						  tool);
+	int ret;
+
+	if (perf_data_file__is_pipe(session->file) || !session->one_mmap) {
+		ret = output_bytes(inject, event, event->header.size);
+		if (ret < 0)
+			return ret;
+		ret = copy_bytes(inject, perf_data_file__fd(session->file),
+				 event->auxtrace.size);
+	} else {
+		ret = output_bytes(inject, event,
+				   event->header.size + event->auxtrace.size);
+	}
+	if (ret < 0)
+		return ret;
+
+	return event->auxtrace.size;
+}
+
 static int perf_event__repipe(struct perf_tool *tool,
 			      union perf_event *event,
 			      struct perf_sample *sample __maybe_unused,
@@ -413,6 +463,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 			.unthrottle	= perf_event__repipe,
 			.attr		= perf_event__repipe_attr,
 			.tracing_data	= perf_event__repipe_op2_synth,
+			.auxtrace_info	= perf_event__repipe_op2_synth,
+			.auxtrace	= perf_event__repipe_auxtrace,
+			.auxtrace_error	= perf_event__repipe_op2_synth,
 			.finished_round	= perf_event__repipe_oe_synth,
 			.build_id	= perf_event__repipe_op2_synth,
 			.id_index	= perf_event__repipe_op2_synth,

From 0f0aa5e0693ce4000a7657cc47ce4f32b86b91ba Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:54:00 +0300
Subject: [PATCH 031/305] perf inject: Add Instruction Tracing support

Add support for decoding an AUX area assuming it contains instruction
tracing data.  The AUX area tracing events are stripped and replaced by
synthesized events.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-21-git-send-email-adrian.hunter@intel.com
[ Do not use -Z as an alternative to --itrace ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-inject.txt | 26 +++++++++
 tools/perf/builtin-inject.c              | 71 +++++++++++++++++++++++-
 2 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index dc7442cf3d7f..bb1f655e1361 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -44,6 +44,32 @@ OPTIONS
 --kallsyms=<file>::
 	kallsyms pathname
 
+--itrace::
+	Decode Instruction Tracing data, replacing it with synthesized events.
+	Options are:
+
+		i	synthesize instructions events
+		b	synthesize branches events
+		c	synthesize branches events (calls only)
+		r	synthesize branches events (returns only)
+		e	synthesize error events
+		d	create a debug log
+		g	synthesize a call chain for instructions events
+
+	The default is all events i.e. the same as --itrace=ibe
+
+	In addition, the period (default 100000) for instructions events
+	can be specified in units of:
+
+		i	instructions
+		t	ticks
+		ms	milliseconds
+		us	microseconds
+		ns	nanoseconds (default)
+
+	Also the call chain size (default 16, max. 1024) for instructions
+	events can be specified.
+
 SEE ALSO
 --------
 linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1]
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 8f4cad7e9fe7..c5f6515f0723 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -16,6 +16,7 @@
 #include "util/debug.h"
 #include "util/build-id.h"
 #include "util/data.h"
+#include "util/auxtrace.h"
 
 #include "util/parse-options.h"
 
@@ -30,6 +31,7 @@ struct perf_inject {
 	struct perf_data_file	output;
 	u64			bytes_written;
 	struct list_head	samples;
+	struct itrace_synth_opts itrace_synth_opts;
 };
 
 struct event_entry {
@@ -205,6 +207,32 @@ static int perf_event__repipe_fork(struct perf_tool *tool,
 	return err;
 }
 
+static int perf_event__repipe_comm(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	int err;
+
+	err = perf_event__process_comm(tool, event, sample, machine);
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
+static int perf_event__repipe_exit(struct perf_tool *tool,
+				   union perf_event *event,
+				   struct perf_sample *sample,
+				   struct machine *machine)
+{
+	int err;
+
+	err = perf_event__process_exit(tool, event, sample, machine);
+	perf_event__repipe(tool, event, sample, machine);
+
+	return err;
+}
+
 static int perf_event__repipe_tracing_data(struct perf_tool *tool,
 					   union perf_event *event,
 					   struct perf_session *session)
@@ -217,6 +245,18 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool,
 	return err;
 }
 
+static int perf_event__repipe_id_index(struct perf_tool *tool,
+				       union perf_event *event,
+				       struct perf_session *session)
+{
+	int err;
+
+	perf_event__repipe_synth(tool, event);
+	err = perf_event__process_id_index(tool, event, session);
+
+	return err;
+}
+
 static int dso__read_build_id(struct dso *dso)
 {
 	if (dso->has_build_id)
@@ -401,16 +441,20 @@ static int __cmd_inject(struct perf_inject *inject)
 	struct perf_session *session = inject->session;
 	struct perf_data_file *file_out = &inject->output;
 	int fd = perf_data_file__fd(file_out);
+	u64 output_data_offset;
 
 	signal(SIGINT, sig_handler);
 
-	if (inject->build_ids || inject->sched_stat) {
+	if (inject->build_ids || inject->sched_stat ||
+	    inject->itrace_synth_opts.set) {
 		inject->tool.mmap	  = perf_event__repipe_mmap;
 		inject->tool.mmap2	  = perf_event__repipe_mmap2;
 		inject->tool.fork	  = perf_event__repipe_fork;
 		inject->tool.tracing_data = perf_event__repipe_tracing_data;
 	}
 
+	output_data_offset = session->header.data_offset;
+
 	if (inject->build_ids) {
 		inject->tool.sample = perf_event__inject_buildid;
 	} else if (inject->sched_stat) {
@@ -429,10 +473,22 @@ static int __cmd_inject(struct perf_inject *inject)
 			else if (!strncmp(name, "sched:sched_stat_", 17))
 				evsel->handler = perf_inject__sched_stat;
 		}
+	} else if (inject->itrace_synth_opts.set) {
+		session->itrace_synth_opts = &inject->itrace_synth_opts;
+		inject->itrace_synth_opts.inject = true;
+		inject->tool.comm	    = perf_event__repipe_comm;
+		inject->tool.exit	    = perf_event__repipe_exit;
+		inject->tool.id_index	    = perf_event__repipe_id_index;
+		inject->tool.auxtrace_info  = perf_event__process_auxtrace_info;
+		inject->tool.auxtrace	    = perf_event__process_auxtrace;
+		inject->tool.ordered_events = true;
+		inject->tool.ordering_requires_timestamps = true;
+		/* Allow space in the header for new attributes */
+		output_data_offset = 4096;
 	}
 
 	if (!file_out->is_pipe)
-		lseek(fd, session->header.data_offset, SEEK_SET);
+		lseek(fd, output_data_offset, SEEK_SET);
 
 	ret = perf_session__process_events(session);
 
@@ -440,6 +496,14 @@ static int __cmd_inject(struct perf_inject *inject)
 		if (inject->build_ids)
 			perf_header__set_feat(&session->header,
 					      HEADER_BUILD_ID);
+		/*
+		 * The AUX areas have been removed and replaced with
+		 * synthesized hardware events, so clear the feature flag.
+		 */
+		if (inject->itrace_synth_opts.set)
+			perf_header__clear_feat(&session->header,
+						HEADER_AUXTRACE);
+		session->header.data_offset = output_data_offset;
 		session->header.data_size = inject->bytes_written;
 		perf_session__write_header(session, session->evlist, fd, true);
 	}
@@ -497,6 +561,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file",
 			   "kallsyms pathname"),
 		OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
+		OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts,
+				    NULL, "opts", "Instruction Tracing options",
+				    itrace_parse_synth_opts),
 		OPT_END()
 	};
 	const char * const inject_usage[] = {

From 400ea6d327ff43311c73dd4a000eb064ae20140c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:54:05 +0300
Subject: [PATCH 032/305] perf script: Add field option 'flags' to print sample
 flags

Instruction tracing will typically have access to information about the
instruction being executed for a particular ip sample.  Some of that
information will be available in the 'flags' member of struct
perf_sample.

With the addition of transactions events synthesis to Instruction
Tracing options, there is a need to be able easily to see the flags
because they show whether the ip is at the start, commit or abort of a
tranasaction.

Consequently add an option to display the flags.

The flags are "bcrosyiABEx" which stand for branch, call, return,
conditional, system, asynchronous, interrupt, transaction abort, trace
begin, trace end, and in transaction, respectively.

Example using Intel PT:

perf script -fip,time,event,sym,addr,flags

...
 1288.721584105: branches:u:   bo              401146 main =>           401152 main
 1288.721584105: transactions:   x                   0           401164 main
 1288.721584105: branches:u:   bx              40117c main =>           40119b main
 1288.721584105: branches:u:   box             4011a4 main =>           40117e main
 1288.721584105: branches:u:   bcx             401187 main =>           401094 g
...
 1288.721591645: branches:u:   bx              4010c4 g =>           4010cb g
 1288.721591645: branches:u:   brx             4010cc g =>           401189 main
 1288.721591645: transactions:                       0           4011a6 main
 1288.721593199: branches:u:   b               4011a9 main =>           4011af main
 1288.721593199: branches:u:   bo              4011bc main =>           40113e main
 1288.721593199: branches:u:   b               401150 main =>           40115a main
 1288.721593199: transactions:   x                   0           401164 main
 1288.721593199: branches:u:   bx              40117c main =>           40119b main
 1288.721593199: branches:u:   box             4011a4 main =>           40117e main
 1288.721593199: branches:u:   bcx             401187 main =>           40105e f
...
 1288.722284747: branches:u:   brx             401093 f =>           401189 main
 1288.722284747: branches:u:   box             4011a4 main =>           40117e main
 1288.722284747: branches:u:   bcx             401187 main =>           40105e f
 1288.722285883: transactions:   bA                  0           401071 f
 1288.722285883: branches:u:   bA              401071 f =>           40116a main
 1288.722285883: branches:u:   bE              40116a main =>                0 [unknown]
 1288.722297174: branches:u:   bB                   0 [unknown] =>           40116a main
...

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-26-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-script.txt |  9 ++++++-
 tools/perf/builtin-script.c              | 34 ++++++++++++++++++++----
 tools/perf/util/event.h                  |  2 ++
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 05df64804def..b29cd2f17d13 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -115,7 +115,8 @@ OPTIONS
 -f::
 --fields::
         Comma separated list of fields to print. Options are:
-        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period.
+        comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
+	srcline, period, flags.
         Field list can be prepended with the type, trace, sw or hw,
         to indicate to which event type the field list applies.
         e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
@@ -165,6 +166,12 @@ OPTIONS
 
 	At this point usage is displayed, and perf-script exits.
 
+	The flags field is synthesized and may have a value when Instruction
+	Trace decoding. The flags are "bcrosyiABEx" which stand for branch,
+	call, return, conditional, system, asynchronous, interrupt,
+	transaction abort, trace begin, trace end, and in transaction,
+	respectively.
+
 	Finally, a user may not set fields to none for all event types.
 	i.e., -f "" is not allowed.
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 7682665456fe..cd2f38bf7573 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -27,6 +27,7 @@ static u64			nr_unordered;
 static bool			no_callchain;
 static bool			latency_format;
 static bool			system_wide;
+static bool			print_flags;
 static const char		*cpu_list;
 static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
 
@@ -446,6 +447,25 @@ static void print_sample_bts(union perf_event *event,
 	printf("\n");
 }
 
+static void print_sample_flags(u32 flags)
+{
+	const char *chars = PERF_IP_FLAG_CHARS;
+	const int n = strlen(PERF_IP_FLAG_CHARS);
+	char str[33];
+	int i, pos = 0;
+
+	for (i = 0; i < n; i++, flags >>= 1) {
+		if (flags & 1)
+			str[pos++] = chars[i];
+	}
+	for (; i < 32; i++, flags >>= 1) {
+		if (flags & 1)
+			str[pos++] = '?';
+	}
+	str[pos] = 0;
+	printf("  %-4s ", str);
+}
+
 static void process_event(union perf_event *event, struct perf_sample *sample,
 			  struct perf_evsel *evsel, struct addr_location *al)
 {
@@ -465,6 +485,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample,
 		printf("%s: ", evname ? evname : "[unknown]");
 	}
 
+	if (print_flags)
+		print_sample_flags(sample->flags);
+
 	if (is_bts_event(attr)) {
 		print_sample_bts(event, sample, evsel, thread, al);
 		return;
@@ -1000,12 +1023,15 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 		}
 	}
 
-	tok = strtok(tok, ",");
-	while (tok) {
+	for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) {
 		for (i = 0; i < imax; ++i) {
 			if (strcmp(tok, all_output_options[i].str) == 0)
 				break;
 		}
+		if (i == imax && strcmp(tok, "flags") == 0) {
+			print_flags = true;
+			continue;
+		}
 		if (i == imax) {
 			fprintf(stderr, "Invalid field requested.\n");
 			rc = -EINVAL;
@@ -1033,8 +1059,6 @@ static int parse_output_fields(const struct option *opt __maybe_unused,
 			}
 			output[type].fields |= all_output_options[i].field;
 		}
-
-		tok = strtok(NULL, ",");
 	}
 
 	if (type >= 0) {
@@ -1555,7 +1579,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "comma separated output fields prepend with 'type:'. "
 		     "Valid types: hw,sw,trace,raw. "
 		     "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
-		     "addr,symoff,period", parse_output_fields),
+		     "addr,symoff,period,flags", parse_output_fields),
 	OPT_BOOLEAN('a', "all-cpus", &system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8ef37251a7a9..80e9f5969a39 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -157,6 +157,8 @@ enum {
 	PERF_IP_FLAG_IN_TX		= 1ULL << 10,
 };
 
+#define PERF_IP_FLAG_CHARS "bcrosyiABEx"
+
 #define PERF_BRANCH_MASK		(\
 	PERF_IP_FLAG_BRANCH		|\
 	PERF_IP_FLAG_CALL		|\

From 70d73de4cd571df09670e2a7bf36a912e9ff5138 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 9 Apr 2015 18:54:06 +0300
Subject: [PATCH 033/305] perf tools: Add aux_watermark member of struct
 perf_event_attr

Add new AUX area member (aux_watermark) of struct perf_event_attr to
debug prints and byte swapping.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1428594864-29309-27-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c   | 1 +
 tools/perf/util/session.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 33e3fd8c2e68..c886b9f7a48d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1121,6 +1121,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr,
 	PRINT_ATTRf(sample_stack_user, p_unsigned);
 	PRINT_ATTRf(clockid, p_signed);
 	PRINT_ATTRf(sample_regs_intr, p_hex);
+	PRINT_ATTRf(aux_watermark, p_unsigned);
 
 	return ret;
 }
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 1db3ce118a54..90fa5674ccb4 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -500,6 +500,7 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
 	attr->branch_sample_type = bswap_64(attr->branch_sample_type);
 	attr->sample_regs_user	 = bswap_64(attr->sample_regs_user);
 	attr->sample_stack_user  = bswap_32(attr->sample_stack_user);
+	attr->aux_watermark	 = bswap_32(attr->aux_watermark);
 
 	swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
 }

From b39b839309ce8c5dd15cd95d26af153fa392c3e6 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:16 +0200
Subject: [PATCH 034/305] perf tools: Add parse_events_error interface

Adding support to return error information from parse_events function.
Following struct will be populated by parse_events function on return:

  struct parse_events_error {
    int   idx;
    char *str;
    char *help;
  };

where 'idx' is the position in the string where the parsing failed,
'str' contains dynamically allocated error string describing the error
and 'help' is optional help string.

The change contains reporting function, which currently does not display
anything. The code changes to supply error data for specific event types
are coming in next patches. However this is what the expected output is:

  $ sudo perf record -e 'sched:krava' ls
  event syntax error: 'sched:krava'
                       \___ unknown tracepoint
  ...

  $ perf record -e 'cpu/even=0x1/' ls
  event syntax error: 'cpu/even=0x1/'
                           \___ unknown term

  valid terms: pc,any,inv,edge,cmask,event,in_tx,ldlat,umask,in_tx_cp,offcore_rsp,config,config1,config2,name,period,branch_type
  ...

  $ perf record -e cycles,cache-mises ls
  event syntax error: '..es,cache-mises'
                                 \___ parser error
  ...

The output functions cut the beginning of the event string so the error
starts up to 10th character and cut the end of the string of it crosses
the terminal width.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-2-git-send-email-jolsa@kernel.org
[ Renamed 'error' variables to 'err', not to clash with util.h error() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c               |   2 +-
 tools/perf/tests/code-reading.c         |   2 +-
 tools/perf/tests/evsel-roundtrip-name.c |   4 +-
 tools/perf/tests/hists_cumulate.c       |   2 +-
 tools/perf/tests/hists_filter.c         |   4 +-
 tools/perf/tests/hists_link.c           |   4 +-
 tools/perf/tests/hists_output.c         |   2 +-
 tools/perf/tests/keep-tracking.c        |   4 +-
 tools/perf/tests/parse-events.c         |   2 +-
 tools/perf/tests/perf-time-to-tsc.c     |   2 +-
 tools/perf/tests/switch-tracking.c      |   8 +-
 tools/perf/util/parse-events.c          | 100 ++++++++++++++++++++++--
 tools/perf/util/parse-events.h          |  19 ++++-
 tools/perf/util/record.c                |   4 +-
 14 files changed, 127 insertions(+), 32 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f7b8218785f6..3dbd8c59efc5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1541,7 +1541,7 @@ static int setup_events(const char * const *attrs, unsigned len)
 	unsigned i;
 
 	for (i = 0; i < len; i++) {
-		if (parse_events(evsel_list, attrs[i]))
+		if (parse_events(evsel_list, attrs[i], NULL))
 			return -1;
 	}
 	return 0;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index f671ec37a7c4..ca0e480e741b 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -482,7 +482,7 @@ static int do_test_code_reading(bool try_kcore)
 		else
 			str = "cycles";
 		pr_debug("Parsing event '%s'\n", str);
-		ret = parse_events(evlist, str);
+		ret = parse_events(evlist, str, NULL);
 		if (ret < 0) {
 			pr_debug("parse_events failed\n");
 			goto out_err;
diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c
index b8d8341b383e..3fa715987a5e 100644
--- a/tools/perf/tests/evsel-roundtrip-name.c
+++ b/tools/perf/tests/evsel-roundtrip-name.c
@@ -23,7 +23,7 @@ static int perf_evsel__roundtrip_cache_name_test(void)
 			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
 				__perf_evsel__hw_cache_type_op_res_name(type, op, i,
 									name, sizeof(name));
-				err = parse_events(evlist, name);
+				err = parse_events(evlist, name, NULL);
 				if (err)
 					ret = err;
 			}
@@ -71,7 +71,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names)
                 return -ENOMEM;
 
 	for (i = 0; i < nr_names; ++i) {
-		err = parse_events(evlist, names[i]);
+		err = parse_events(evlist, names[i], NULL);
 		if (err) {
 			pr_debug("failed to parse event '%s', err %d\n",
 				 names[i], err);
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 18619966454c..b08a95a5ca1a 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -695,7 +695,7 @@ int test__hists_cumulate(void)
 
 	TEST_ASSERT_VAL("No memory", evlist);
 
-	err = parse_events(evlist, "cpu-clock");
+	err = parse_events(evlist, "cpu-clock", NULL);
 	if (err)
 		goto out;
 
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 59e53db7914c..108488cd71fa 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -108,10 +108,10 @@ int test__hists_filter(void)
 
 	TEST_ASSERT_VAL("No memory", evlist);
 
-	err = parse_events(evlist, "cpu-clock");
+	err = parse_events(evlist, "cpu-clock", NULL);
 	if (err)
 		goto out;
-	err = parse_events(evlist, "task-clock");
+	err = parse_events(evlist, "task-clock", NULL);
 	if (err)
 		goto out;
 
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 278ba8344c23..34c61e4d3352 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -282,10 +282,10 @@ int test__hists_link(void)
 	if (evlist == NULL)
                 return -ENOMEM;
 
-	err = parse_events(evlist, "cpu-clock");
+	err = parse_events(evlist, "cpu-clock", NULL);
 	if (err)
 		goto out;
-	err = parse_events(evlist, "task-clock");
+	err = parse_events(evlist, "task-clock", NULL);
 	if (err)
 		goto out;
 
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index b52c9faea224..d8a23db80094 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -590,7 +590,7 @@ int test__hists_output(void)
 
 	TEST_ASSERT_VAL("No memory", evlist);
 
-	err = parse_events(evlist, "cpu-clock");
+	err = parse_events(evlist, "cpu-clock", NULL);
 	if (err)
 		goto out;
 
diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c
index 7a5ab7b0b8f6..5b171d1e338b 100644
--- a/tools/perf/tests/keep-tracking.c
+++ b/tools/perf/tests/keep-tracking.c
@@ -78,8 +78,8 @@ int test__keep_tracking(void)
 
 	perf_evlist__set_maps(evlist, cpus, threads);
 
-	CHECK__(parse_events(evlist, "dummy:u"));
-	CHECK__(parse_events(evlist, "cycles:u"));
+	CHECK__(parse_events(evlist, "dummy:u", NULL));
+	CHECK__(parse_events(evlist, "cycles:u", NULL));
 
 	perf_evlist__config(evlist, &opts);
 
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 3de744961739..82d2a1636f7f 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -1571,7 +1571,7 @@ static int test_event(struct evlist_test *e)
 	if (evlist == NULL)
 		return -ENOMEM;
 
-	ret = parse_events(evlist, e->name);
+	ret = parse_events(evlist, e->name, NULL);
 	if (ret) {
 		pr_debug("failed to parse event '%s', err %d\n",
 			 e->name, ret);
diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c
index f238442b238a..5f49484f1abc 100644
--- a/tools/perf/tests/perf-time-to-tsc.c
+++ b/tools/perf/tests/perf-time-to-tsc.c
@@ -68,7 +68,7 @@ int test__perf_time_to_tsc(void)
 
 	perf_evlist__set_maps(evlist, cpus, threads);
 
-	CHECK__(parse_events(evlist, "cycles:u"));
+	CHECK__(parse_events(evlist, "cycles:u", NULL));
 
 	perf_evlist__config(evlist, &opts);
 
diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c
index cc68648c7c55..0d31403ea593 100644
--- a/tools/perf/tests/switch-tracking.c
+++ b/tools/perf/tests/switch-tracking.c
@@ -347,7 +347,7 @@ int test__switch_tracking(void)
 	perf_evlist__set_maps(evlist, cpus, threads);
 
 	/* First event */
-	err = parse_events(evlist, "cpu-clock:u");
+	err = parse_events(evlist, "cpu-clock:u", NULL);
 	if (err) {
 		pr_debug("Failed to parse event dummy:u\n");
 		goto out_err;
@@ -356,7 +356,7 @@ int test__switch_tracking(void)
 	cpu_clocks_evsel = perf_evlist__last(evlist);
 
 	/* Second event */
-	err = parse_events(evlist, "cycles:u");
+	err = parse_events(evlist, "cycles:u", NULL);
 	if (err) {
 		pr_debug("Failed to parse event cycles:u\n");
 		goto out_err;
@@ -371,7 +371,7 @@ int test__switch_tracking(void)
 		goto out;
 	}
 
-	err = parse_events(evlist, sched_switch);
+	err = parse_events(evlist, sched_switch, NULL);
 	if (err) {
 		pr_debug("Failed to parse event %s\n", sched_switch);
 		goto out_err;
@@ -401,7 +401,7 @@ int test__switch_tracking(void)
 	perf_evsel__set_sample_bit(cycles_evsel, TIME);
 
 	/* Fourth event */
-	err = parse_events(evlist, "dummy:u");
+	err = parse_events(evlist, "dummy:u", NULL);
 	if (err) {
 		pr_debug("Failed to parse event dummy:u\n");
 		goto out_err;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index be0655388b38..6978cc377957 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -17,6 +17,7 @@
 #include "parse-events-flex.h"
 #include "pmu.h"
 #include "thread_map.h"
+#include "asm/bug.h"
 
 #define MAX_NAME_LEN 100
 
@@ -1019,11 +1020,13 @@ int parse_events_terms(struct list_head *terms, const char *str)
 	return ret;
 }
 
-int parse_events(struct perf_evlist *evlist, const char *str)
+int parse_events(struct perf_evlist *evlist, const char *str,
+		 struct parse_events_error *err)
 {
 	struct parse_events_evlist data = {
-		.list = LIST_HEAD_INIT(data.list),
-		.idx  = evlist->nr_entries,
+		.list  = LIST_HEAD_INIT(data.list),
+		.idx   = evlist->nr_entries,
+		.error = err,
 	};
 	int ret;
 
@@ -1044,16 +1047,87 @@ int parse_events(struct perf_evlist *evlist, const char *str)
 	return ret;
 }
 
+#define MAX_WIDTH 1000
+static int get_term_width(void)
+{
+	struct winsize ws;
+
+	get_term_dimensions(&ws);
+	return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col;
+}
+
+static void parse_events_print_error(struct parse_events_error *err,
+				     const char *event)
+{
+	const char *str = "invalid or unsupported event: ";
+	char _buf[MAX_WIDTH];
+	char *buf = (char *) event;
+	int idx = 0;
+
+	if (err->str) {
+		/* -2 for extra '' in the final fprintf */
+		int width       = get_term_width() - 2;
+		int len_event   = strlen(event);
+		int len_str, max_len, cut = 0;
+
+		/*
+		 * Maximum error index indent, we will cut
+		 * the event string if it's bigger.
+		 */
+		int max_err_idx = 10;
+
+		/*
+		 * Let's be specific with the message when
+		 * we have the precise error.
+		 */
+		str     = "event syntax error: ";
+		len_str = strlen(str);
+		max_len = width - len_str;
+
+		buf = _buf;
+
+		/* We're cutting from the beggining. */
+		if (err->idx > max_err_idx)
+			cut = err->idx - max_err_idx;
+
+		strncpy(buf, event + cut, max_len);
+
+		/* Mark cut parts with '..' on both sides. */
+		if (cut)
+			buf[0] = buf[1] = '.';
+
+		if ((len_event - cut) > max_len) {
+			buf[max_len - 1] = buf[max_len - 2] = '.';
+			buf[max_len] = 0;
+		}
+
+		idx = len_str + err->idx - cut;
+	}
+
+	fprintf(stderr, "%s'%s'\n", str, buf);
+	if (idx) {
+		fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err->str);
+		if (err->help)
+			fprintf(stderr, "\n%s\n", err->help);
+		free(err->str);
+		free(err->help);
+	}
+
+	fprintf(stderr, "Run 'perf list' for a list of valid events\n");
+}
+
+#undef MAX_WIDTH
+
 int parse_events_option(const struct option *opt, const char *str,
 			int unset __maybe_unused)
 {
 	struct perf_evlist *evlist = *(struct perf_evlist **)opt->value;
-	int ret = parse_events(evlist, str);
+	struct parse_events_error err = { .idx = 0, };
+	int ret = parse_events(evlist, str, &err);
+
+	if (ret)
+		parse_events_print_error(&err, str);
 
-	if (ret) {
-		fprintf(stderr, "invalid or unsupported event: '%s'\n", str);
-		fprintf(stderr, "Run 'perf list' for a list of valid events\n");
-	}
 	return ret;
 }
 
@@ -1535,3 +1609,13 @@ void parse_events__free_terms(struct list_head *terms)
 	list_for_each_entry_safe(term, h, terms, list)
 		free(term);
 }
+
+void parse_events_evlist_error(struct parse_events_evlist *data,
+			       int idx, const char *str)
+{
+	struct parse_events_error *err = data->error;
+
+	err->idx = idx;
+	err->str = strdup(str);
+	WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
+}
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 52a2dda4f954..5ac2ffa0a145 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -12,6 +12,7 @@
 struct list_head;
 struct perf_evsel;
 struct perf_evlist;
+struct parse_events_error;
 
 struct option;
 
@@ -29,7 +30,8 @@ const char *event_type(int type);
 
 extern int parse_events_option(const struct option *opt, const char *str,
 			       int unset);
-extern int parse_events(struct perf_evlist *evlist, const char *str);
+extern int parse_events(struct perf_evlist *evlist, const char *str,
+			struct parse_events_error *error);
 extern int parse_events_terms(struct list_head *terms, const char *str);
 extern int parse_filter(const struct option *opt, const char *str, int unset);
 
@@ -74,10 +76,17 @@ struct parse_events_term {
 	bool used;
 };
 
+struct parse_events_error {
+	int   idx;	/* index in the parsed string */
+	char *str;      /* string to display at the index */
+	char *help;	/* optional help string */
+};
+
 struct parse_events_evlist {
-	struct list_head list;
-	int idx;
-	int nr_groups;
+	struct list_head	   list;
+	int			   idx;
+	int			   nr_groups;
+	struct parse_events_error *error;
 };
 
 struct parse_events_terms {
@@ -114,6 +123,8 @@ void parse_events__set_leader(char *name, struct list_head *list);
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all);
 void parse_events_error(void *data, void *scanner, char const *msg);
+void parse_events_evlist_error(struct parse_events_evlist *data,
+			       int idx, const char *str);
 
 void print_events(const char *event_glob, bool name_only);
 
diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c
index 0ccfa498f7b8..d457c523a33d 100644
--- a/tools/perf/util/record.c
+++ b/tools/perf/util/record.c
@@ -20,7 +20,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str)
 	if (!evlist)
 		return -ENOMEM;
 
-	if (parse_events(evlist, str))
+	if (parse_events(evlist, str, NULL))
 		goto out_delete;
 
 	evsel = perf_evlist__first(evlist);
@@ -216,7 +216,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str)
 	if (!temp_evlist)
 		return false;
 
-	err = parse_events(temp_evlist, str);
+	err = parse_events(temp_evlist, str, NULL);
 	if (err)
 		goto out_delete;
 

From 6297d42372b6ff02135ce170b0d90ccf0b1531e4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:17 +0200
Subject: [PATCH 035/305] perf tools: Add flex support for parse_events_error

Allowing flex parser to report back event parsing error, like:

  $ perf record -e cycles,cache-mises ls
  event syntax error: '..es,cache-mises'
                                 \___ parser error
  ...

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.h |  1 -
 tools/perf/util/parse-events.l | 37 ++++++++++++++++++++++++++++++----
 tools/perf/util/parse-events.y |  7 ++++---
 3 files changed, 37 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 5ac2ffa0a145..eb12bcd12642 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -122,7 +122,6 @@ perf_pmu__parse_check(const char *name);
 void parse_events__set_leader(char *name, struct list_head *list);
 void parse_events_update_lists(struct list_head *list_event,
 			       struct list_head *list_all);
-void parse_events_error(void *data, void *scanner, char const *msg);
 void parse_events_evlist_error(struct parse_events_evlist *data,
 			       int idx, const char *str);
 
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 8895cf3132ab..330dd2d35f5a 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -3,6 +3,8 @@
 %option bison-bridge
 %option prefix="parse_events_"
 %option stack
+%option bison-locations
+%option yylineno
 
 %{
 #include <errno.h>
@@ -51,6 +53,18 @@ static int str(yyscan_t scanner, int token)
 	return token;
 }
 
+#define REWIND(__alloc)				\
+do {								\
+	YYSTYPE *__yylval = parse_events_get_lval(yyscanner);	\
+	char *text = parse_events_get_text(yyscanner);		\
+								\
+	if (__alloc)						\
+		__yylval->str = strdup(text);			\
+								\
+	yycolumn -= strlen(text);				\
+	yyless(0);						\
+} while (0)
+
 static int pmu_str_check(yyscan_t scanner)
 {
 	YYSTYPE *yylval = parse_events_get_lval(scanner);
@@ -85,6 +99,13 @@ static int term(yyscan_t scanner, int type)
 	return PE_TERM;
 }
 
+#define YY_USER_ACTION					\
+do {							\
+	yylloc->last_column  = yylloc->first_column;	\
+	yylloc->first_column = yycolumn;		\
+	yycolumn += yyleng;				\
+} while (0);
+
 %}
 
 %x mem
@@ -119,6 +140,12 @@ modifier_bp	[rwx]{1,3}
 
 		if (start_token) {
 			parse_events_set_extra(NULL, yyscanner);
+			/*
+			 * The flex parser does not init locations variable
+			 * via the scan_string interface, so we need do the
+			 * init in here.
+			 */
+			yycolumn = 0;
 			return start_token;
 		}
          }
@@ -127,19 +154,21 @@ modifier_bp	[rwx]{1,3}
 <event>{
 
 {group}		{
-			BEGIN(INITIAL); yyless(0);
+			BEGIN(INITIAL);
+			REWIND(0);
 		}
 
 {event_pmu}	|
 {event}		{
-			str(yyscanner, PE_EVENT_NAME);
-			BEGIN(INITIAL); yyless(0);
+			BEGIN(INITIAL);
+			REWIND(1);
 			return PE_EVENT_NAME;
 		}
 
 .		|
 <<EOF>>		{
-			BEGIN(INITIAL); yyless(0);
+			BEGIN(INITIAL);
+			REWIND(0);
 		}
 
 }
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 72def077dbbf..14521ce534d9 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -2,6 +2,7 @@
 %parse-param {void *_data}
 %parse-param {void *scanner}
 %lex-param {void* scanner}
+%locations
 
 %{
 
@@ -14,8 +15,6 @@
 #include "parse-events.h"
 #include "parse-events-bison.h"
 
-extern int parse_events_lex (YYSTYPE* lvalp, void* scanner);
-
 #define ABORT_ON(val) \
 do { \
 	if (val) \
@@ -520,7 +519,9 @@ sep_slash_dc: '/' | ':' |
 
 %%
 
-void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused,
+void parse_events_error(YYLTYPE *loc, void *data,
+			void *scanner __maybe_unused,
 			char const *msg __maybe_unused)
 {
+	parse_events_evlist_error(data, loc->last_column, "parser error");
 }

From c056ba6a174f4d5d79fe27f259fc133041a451da Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:18 +0200
Subject: [PATCH 036/305] perf tools: Always bail out when config_attr function
 fails

Not sure why we allowed the fail state, but it's wrong.  Wrong type for
'name' term can cause segfault, and there's probably more fun hidden.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 6978cc377957..1e42f2ceec3d 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -549,6 +549,12 @@ do {								\
 } while (0)
 
 	switch (term->type_term) {
+	case PARSE_EVENTS__TERM_TYPE_USER:
+		/*
+		 * Always succeed for sysfs terms, as we dont know
+		 * at this point what type they need to have.
+		 */
+		return 0;
 	case PARSE_EVENTS__TERM_TYPE_CONFIG:
 		CHECK_TYPE_VAL(NUM);
 		attr->config = term->val.num;
@@ -583,12 +589,12 @@ do {								\
 }
 
 static int config_attr(struct perf_event_attr *attr,
-		       struct list_head *head, int fail)
+		       struct list_head *head)
 {
 	struct parse_events_term *term;
 
 	list_for_each_entry(term, head, list)
-		if (config_term(attr, term) && fail)
+		if (config_term(attr, term))
 			return -EINVAL;
 
 	return 0;
@@ -605,7 +611,7 @@ int parse_events_add_numeric(struct list_head *list, int *idx,
 	attr.config = config;
 
 	if (head_config &&
-	    config_attr(&attr, head_config, 1))
+	    config_attr(&attr, head_config))
 		return -EINVAL;
 
 	return add_event(list, idx, &attr, NULL);
@@ -659,7 +665,8 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
 	 * Configure hardcoded terms first, no need to check
 	 * return value when called with fail == 0 ;)
 	 */
-	config_attr(&attr, head_config, 0);
+	if (config_attr(&attr, head_config))
+		return -EINVAL;
 
 	if (perf_pmu__config(pmu, &attr, head_config))
 		return -EINVAL;

From 36adec85a86f2daa521cda48ea7be8a95c20ed10 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:19 +0200
Subject: [PATCH 037/305] perf tools: Change parse_events_add_pmu interface

Changing parse_events_add_pmu interface to allow propagating of the
parse_events_error info.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 11 ++++++-----
 tools/perf/util/parse-events.h |  5 +++--
 tools/perf/util/parse-events.y |  6 +++---
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 1e42f2ceec3d..749af0db94e7 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -633,8 +633,9 @@ static char *pmu_event_name(struct list_head *head_terms)
 	return NULL;
 }
 
-int parse_events_add_pmu(struct list_head *list, int *idx,
-			 char *name, struct list_head *head_config)
+int parse_events_add_pmu(struct parse_events_evlist *data,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config)
 {
 	struct perf_event_attr attr;
 	struct perf_pmu_info info;
@@ -654,7 +655,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
 
 	if (!head_config) {
 		attr.type = pmu->type;
-		evsel = __add_event(list, idx, &attr, NULL, pmu->cpus);
+		evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus);
 		return evsel ? 0 : -ENOMEM;
 	}
 
@@ -671,8 +672,8 @@ int parse_events_add_pmu(struct list_head *list, int *idx,
 	if (perf_pmu__config(pmu, &attr, head_config))
 		return -EINVAL;
 
-	evsel = __add_event(list, idx, &attr, pmu_event_name(head_config),
-			    pmu->cpus);
+	evsel = __add_event(list, &data->idx, &attr,
+			    pmu_event_name(head_config), pmu->cpus);
 	if (evsel) {
 		evsel->unit = info.unit;
 		evsel->scale = info.scale;
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index eb12bcd12642..76ea3de288da 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -115,8 +115,9 @@ int parse_events_add_cache(struct list_head *list, int *idx,
 			   char *type, char *op_result1, char *op_result2);
 int parse_events_add_breakpoint(struct list_head *list, int *idx,
 				void *ptr, char *type, u64 len);
-int parse_events_add_pmu(struct list_head *list, int *idx,
-			 char *pmu , struct list_head *head_config);
+int parse_events_add_pmu(struct parse_events_evlist *data,
+			 struct list_head *list, char *name,
+			 struct list_head *head_config);
 enum perf_pmu_event_symbol_type
 perf_pmu__parse_check(const char *name);
 void parse_events__set_leader(char *name, struct list_head *list);
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 14521ce534d9..84596617b355 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -207,7 +207,7 @@ PE_NAME '/' event_config '/'
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, $3));
+	ABORT_ON(parse_events_add_pmu(data, list, $1, $3));
 	parse_events__free_terms($3);
 	$$ = list;
 }
@@ -218,7 +218,7 @@ PE_NAME '/' '/'
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, NULL));
+	ABORT_ON(parse_events_add_pmu(data, list, $1, NULL));
 	$$ = list;
 }
 |
@@ -235,7 +235,7 @@ PE_KERNEL_PMU_EVENT sep_dc
 	list_add_tail(&term->list, head);
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head));
+	ABORT_ON(parse_events_add_pmu(data, list, "cpu", head));
 	parse_events__free_terms(head);
 	$$ = list;
 }

From cecf3a2e185c1d843428166d644ba3b564231293 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:20 +0200
Subject: [PATCH 038/305] perf tools: Add location to pmu event terms

Saving the terms location within term struct, so it could be used later
for report.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 32 ++++++++++++++++++++++++--------
 tools/perf/util/parse-events.h |  8 ++++----
 tools/perf/util/parse-events.y | 16 ++++++++--------
 3 files changed, 36 insertions(+), 20 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 749af0db94e7..2994cb41e0e4 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -25,6 +25,12 @@
 extern int parse_events_debug;
 #endif
 int parse_events_parse(void *data, void *scanner);
+int parse_events_term__num(struct parse_events_term **term,
+			   int type_term, char *config, u64 num,
+			   YYLTYPE *loc_term, YYLTYPE *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+			   int type_term, char *config, char *str,
+			   YYLTYPE *loc_term, YYLTYPE *loc_val);
 
 static struct perf_pmu_event_symbol *perf_pmu_events_list;
 /*
@@ -1542,7 +1548,7 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term)
 
 static int new_term(struct parse_events_term **_term, int type_val,
 		    int type_term, char *config,
-		    char *str, u64 num)
+		    char *str, u64 num, int err_term, int err_val)
 {
 	struct parse_events_term *term;
 
@@ -1554,6 +1560,8 @@ static int new_term(struct parse_events_term **_term, int type_val,
 	term->type_val  = type_val;
 	term->type_term = type_term;
 	term->config = config;
+	term->err_term = err_term;
+	term->err_val  = err_val;
 
 	switch (type_val) {
 	case PARSE_EVENTS__TERM_TYPE_NUM:
@@ -1572,17 +1580,23 @@ static int new_term(struct parse_events_term **_term, int type_val,
 }
 
 int parse_events_term__num(struct parse_events_term **term,
-			   int type_term, char *config, u64 num)
+			   int type_term, char *config, u64 num,
+			   YYLTYPE *loc_term, YYLTYPE *loc_val)
 {
 	return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
-			config, NULL, num);
+			config, NULL, num,
+			loc_term ? loc_term->first_column : 0,
+			loc_val ? loc_val->first_column : 0);
 }
 
 int parse_events_term__str(struct parse_events_term **term,
-			   int type_term, char *config, char *str)
+			   int type_term, char *config, char *str,
+			   YYLTYPE *loc_term, YYLTYPE *loc_val)
 {
 	return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
-			config, str, 0);
+			config, str, 0,
+			loc_term ? loc_term->first_column : 0,
+			loc_val ? loc_val->first_column : 0);
 }
 
 int parse_events_term__sym_hw(struct parse_events_term **term,
@@ -1596,18 +1610,20 @@ int parse_events_term__sym_hw(struct parse_events_term **term,
 	if (config)
 		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
 				PARSE_EVENTS__TERM_TYPE_USER, config,
-				(char *) sym->symbol, 0);
+				(char *) sym->symbol, 0, 0, 0);
 	else
 		return new_term(term, PARSE_EVENTS__TERM_TYPE_STR,
 				PARSE_EVENTS__TERM_TYPE_USER,
-				(char *) "event", (char *) sym->symbol, 0);
+				(char *) "event", (char *) sym->symbol,
+				0, 0, 0);
 }
 
 int parse_events_term__clone(struct parse_events_term **new,
 			     struct parse_events_term *term)
 {
 	return new_term(new, term->type_val, term->type_term, term->config,
-			term->val.str, term->val.num);
+			term->val.str, term->val.num,
+			term->err_term, term->err_val);
 }
 
 void parse_events__free_terms(struct list_head *terms)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 76ea3de288da..6286ffdf2295 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -74,6 +74,10 @@ struct parse_events_term {
 	int type_term;
 	struct list_head list;
 	bool used;
+
+	/* error string indexes for within parsed string */
+	int err_term;
+	int err_val;
 };
 
 struct parse_events_error {
@@ -94,10 +98,6 @@ struct parse_events_terms {
 };
 
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
-int parse_events_term__num(struct parse_events_term **_term,
-			   int type_term, char *config, u64 num);
-int parse_events_term__str(struct parse_events_term **_term,
-			   int type_term, char *config, char *str);
 int parse_events_term__sym_hw(struct parse_events_term **term,
 			      char *config, unsigned idx);
 int parse_events_term__clone(struct parse_events_term **new,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 84596617b355..486247739a39 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -231,7 +231,7 @@ PE_KERNEL_PMU_EVENT sep_dc
 
 	ALLOC_LIST(head);
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, 1));
+					$1, 1, &@1, NULL));
 	list_add_tail(&term->list, head);
 
 	ALLOC_LIST(list);
@@ -251,7 +251,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc
 
 	ALLOC_LIST(head);
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					&pmu_name, 1));
+					&pmu_name, 1, &@1, NULL));
 	list_add_tail(&term->list, head);
 
 	ALLOC_LIST(list);
@@ -449,7 +449,7 @@ PE_NAME '=' PE_NAME
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, $3));
+					$1, $3, &@1, &@3));
 	$$ = term;
 }
 |
@@ -458,7 +458,7 @@ PE_NAME '=' PE_VALUE
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, $3));
+					$1, $3, &@1, &@3));
 	$$ = term;
 }
 |
@@ -476,7 +476,7 @@ PE_NAME
 	struct parse_events_term *term;
 
 	ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER,
-					$1, 1));
+					$1, 1, &@1, NULL));
 	$$ = term;
 }
 |
@@ -493,7 +493,7 @@ PE_TERM '=' PE_NAME
 {
 	struct parse_events_term *term;
 
-	ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3));
+	ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3));
 	$$ = term;
 }
 |
@@ -501,7 +501,7 @@ PE_TERM '=' PE_VALUE
 {
 	struct parse_events_term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3));
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3));
 	$$ = term;
 }
 |
@@ -509,7 +509,7 @@ PE_TERM
 {
 	struct parse_events_term *term;
 
-	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1));
+	ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL));
 	$$ = term;
 }
 

From e64b020ba1adfd081a26c5a35a2990f91da043a0 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:21 +0200
Subject: [PATCH 039/305] perf tools: Add term support for parse_events_error

Allowing event's term processing to report back error, like:

  $ perf record -e 'cpu/even=0x1/' ls
  event syntax error: 'cpu/even=0x1/'
                           \___ unknown term

  valid terms: pc,any,inv,edge,cmask,event,in_tx,ldlat,umask,in_tx_cp,offcore_rsp,config,config1,config2,name,period,branch_type

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-7-git-send-email-jolsa@kernel.org
[ Renamed 'error' variables to 'err', not to clash with util.h error() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/pmu.c         |  3 +-
 tools/perf/util/parse-events.c |  2 +-
 tools/perf/util/parse-events.l |  4 +++
 tools/perf/util/pmu.c          | 57 ++++++++++++++++++++++++++++++----
 tools/perf/util/pmu.h          |  6 ++--
 5 files changed, 62 insertions(+), 10 deletions(-)

diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c
index eeb68bb1972d..faa04e9d5d5f 100644
--- a/tools/perf/tests/pmu.c
+++ b/tools/perf/tests/pmu.c
@@ -152,7 +152,8 @@ int test__pmu(void)
 		if (ret)
 			break;
 
-		ret = perf_pmu__config_terms(&formats, &attr, terms, false);
+		ret = perf_pmu__config_terms(&formats, &attr, terms,
+					     false, NULL);
 		if (ret)
 			break;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 2994cb41e0e4..1d810d1fc726 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -675,7 +675,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
 	if (config_attr(&attr, head_config))
 		return -EINVAL;
 
-	if (perf_pmu__config(pmu, &attr, head_config))
+	if (perf_pmu__config(pmu, &attr, head_config, data->error))
 		return -EINVAL;
 
 	evsel = __add_event(list, &data->idx, &attr,
diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l
index 330dd2d35f5a..09e738fe9ea2 100644
--- a/tools/perf/util/parse-events.l
+++ b/tools/perf/util/parse-events.l
@@ -174,6 +174,10 @@ modifier_bp	[rwx]{1,3}
 }
 
 <config>{
+	/*
+	 * Please update formats_error_string any time
+	 * new static term is added.
+	 */
 config			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); }
 config1			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); }
 config2			{ return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); }
diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 48411674da0f..244c66f89891 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -579,6 +579,38 @@ static int pmu_resolve_param_term(struct parse_events_term *term,
 	return -1;
 }
 
+static char *formats_error_string(struct list_head *formats)
+{
+	struct perf_pmu_format *format;
+	char *err, *str;
+	static const char *static_terms = "config,config1,config2,name,period,branch_type\n";
+	unsigned i = 0;
+
+	if (!asprintf(&str, "valid terms:"))
+		return NULL;
+
+	/* sysfs exported terms */
+	list_for_each_entry(format, formats, list) {
+		char c = i++ ? ',' : ' ';
+
+		err = str;
+		if (!asprintf(&str, "%s%c%s", err, c, format->name))
+			goto fail;
+		free(err);
+	}
+
+	/* static terms */
+	err = str;
+	if (!asprintf(&str, "%s,%s", err, static_terms))
+		goto fail;
+
+	free(err);
+	return str;
+fail:
+	free(err);
+	return NULL;
+}
+
 /*
  * Setup one of config[12] attr members based on the
  * user input data - term parameter.
@@ -587,7 +619,7 @@ static int pmu_config_term(struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct parse_events_term *term,
 			   struct list_head *head_terms,
-			   bool zero)
+			   bool zero, struct parse_events_error *err)
 {
 	struct perf_pmu_format *format;
 	__u64 *vp;
@@ -611,6 +643,11 @@ static int pmu_config_term(struct list_head *formats,
 	if (!format) {
 		if (verbose)
 			printf("Invalid event/parameter '%s'\n", term->config);
+		if (err) {
+			err->idx  = term->err_term;
+			err->str  = strdup("unknown term");
+			err->help = formats_error_string(formats);
+		}
 		return -EINVAL;
 	}
 
@@ -636,9 +673,14 @@ static int pmu_config_term(struct list_head *formats,
 		val = term->val.num;
 	else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) {
 		if (strcmp(term->val.str, "?")) {
-			if (verbose)
+			if (verbose) {
 				pr_info("Invalid sysfs entry %s=%s\n",
 						term->config, term->val.str);
+			}
+			if (err) {
+				err->idx = term->err_val;
+				err->str = strdup("expected numeric value");
+			}
 			return -EINVAL;
 		}
 
@@ -654,12 +696,13 @@ static int pmu_config_term(struct list_head *formats,
 int perf_pmu__config_terms(struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct list_head *head_terms,
-			   bool zero)
+			   bool zero, struct parse_events_error *err)
 {
 	struct parse_events_term *term;
 
 	list_for_each_entry(term, head_terms, list) {
-		if (pmu_config_term(formats, attr, term, head_terms, zero))
+		if (pmu_config_term(formats, attr, term, head_terms,
+				    zero, err))
 			return -EINVAL;
 	}
 
@@ -672,12 +715,14 @@ int perf_pmu__config_terms(struct list_head *formats,
  * 2) pmu format definitions - specified by pmu parameter
  */
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
-		     struct list_head *head_terms)
+		     struct list_head *head_terms,
+		     struct parse_events_error *err)
 {
 	bool zero = !!pmu->default_config;
 
 	attr->type = pmu->type;
-	return perf_pmu__config_terms(&pmu->format, attr, head_terms, zero);
+	return perf_pmu__config_terms(&pmu->format, attr, head_terms,
+				      zero, err);
 }
 
 static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu,
diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h
index 6b1249fbdb5f..7b9c8cf8ae3e 100644
--- a/tools/perf/util/pmu.h
+++ b/tools/perf/util/pmu.h
@@ -4,6 +4,7 @@
 #include <linux/bitmap.h>
 #include <linux/perf_event.h>
 #include <stdbool.h>
+#include "parse-events.h"
 
 enum {
 	PERF_PMU_FORMAT_VALUE_CONFIG,
@@ -47,11 +48,12 @@ struct perf_pmu_alias {
 
 struct perf_pmu *perf_pmu__find(const char *name);
 int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr,
-		     struct list_head *head_terms);
+		     struct list_head *head_terms,
+		     struct parse_events_error *error);
 int perf_pmu__config_terms(struct list_head *formats,
 			   struct perf_event_attr *attr,
 			   struct list_head *head_terms,
-			   bool zero);
+			   bool zero, struct parse_events_error *error);
 int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms,
 			  struct perf_pmu_info *info);
 struct list_head *perf_pmu__alias(struct perf_pmu *pmu,

From 3b0e371cc05dfb624f990ea5e1da2bff615adaef Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:22 +0200
Subject: [PATCH 040/305] perf tools: Add static terms support for
 parse_events_error

Allowing static terms like 'name,period,config,config1..' processing to
report back error.

  $ perf record -e 'cpu/event=1,name=1/' ls
  event syntax error: '..=1,name=1/'
                                 \___ expected string value

  $ perf record -e 'cpu/event=1,period=krava/' ls
  event syntax error: '..,period=krava/'
                                 \___ expected numeric value

  $ perf record -e 'cpu/config=krava1/' ls
  event syntax error: '../config=krava1/'
                                 \___ expected numeric value

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-8-git-send-email-jolsa@kernel.org
[ Renamed 'error' variables to 'err', not to clash with util.h error() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 39 +++++++++++++++++++++++++---------
 1 file changed, 29 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 1d810d1fc726..278aebe5d65b 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -545,13 +545,31 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx,
 	return add_event(list, idx, &attr, NULL);
 }
 
-static int config_term(struct perf_event_attr *attr,
-		       struct parse_events_term *term)
+static int check_type_val(struct parse_events_term *term,
+			  struct parse_events_error *err,
+			  int type)
 {
-#define CHECK_TYPE_VAL(type)					\
-do {								\
-	if (PARSE_EVENTS__TERM_TYPE_ ## type != term->type_val)	\
-		return -EINVAL;					\
+	if (type == term->type_val)
+		return 0;
+
+	if (err) {
+		err->idx = term->err_val;
+		if (type == PARSE_EVENTS__TERM_TYPE_NUM)
+			err->str = strdup("expected numeric value");
+		else
+			err->str = strdup("expected string value");
+	}
+	return -EINVAL;
+}
+
+static int config_term(struct perf_event_attr *attr,
+		       struct parse_events_term *term,
+		       struct parse_events_error *err)
+{
+#define CHECK_TYPE_VAL(type)						   \
+do {									   \
+	if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \
+		return -EINVAL;						   \
 } while (0)
 
 	switch (term->type_term) {
@@ -595,12 +613,13 @@ do {								\
 }
 
 static int config_attr(struct perf_event_attr *attr,
-		       struct list_head *head)
+		       struct list_head *head,
+		       struct parse_events_error *err)
 {
 	struct parse_events_term *term;
 
 	list_for_each_entry(term, head, list)
-		if (config_term(attr, term))
+		if (config_term(attr, term, err))
 			return -EINVAL;
 
 	return 0;
@@ -617,7 +636,7 @@ int parse_events_add_numeric(struct list_head *list, int *idx,
 	attr.config = config;
 
 	if (head_config &&
-	    config_attr(&attr, head_config))
+	    config_attr(&attr, head_config, NULL))
 		return -EINVAL;
 
 	return add_event(list, idx, &attr, NULL);
@@ -672,7 +691,7 @@ int parse_events_add_pmu(struct parse_events_evlist *data,
 	 * Configure hardcoded terms first, no need to check
 	 * return value when called with fail == 0 ;)
 	 */
-	if (config_attr(&attr, head_config))
+	if (config_attr(&attr, head_config, data->error))
 		return -EINVAL;
 
 	if (perf_pmu__config(pmu, &attr, head_config, data->error))

From 492d977444734e03c0633a238f1431b3c66b3e97 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:23 +0200
Subject: [PATCH 041/305] perf tools: Add tracepoint support for
 parse_events_error

Allowing tracepoint events processing to report back error.

  $ perf record -e 'sched:krava' ls
  event syntax error: 'sched:krava'
                       \___ unknown tracepoint
  ...

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-9-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.y | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 486247739a39..38a0f21fc433 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -388,7 +388,13 @@ PE_NAME ':' PE_NAME
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_tracepoint(list, &data->idx, $1, $3));
+	if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) {
+		struct parse_events_error *error = data->error;
+
+		error->idx = @1.first_column;
+		error->str = strdup("unknown tracepoint");
+		return -1;
+	}
 	$$ = list;
 }
 

From 87d650be1dcc9bd9bb200e73b985ddb740d067bc Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 22 Apr 2015 21:10:24 +0200
Subject: [PATCH 042/305] perf tools: Add symbolic events support for
 parse_events_error

Allowing symbolic events processing to report back error.

  $ perf record -e 'cycles/period=krava/' ls
  event syntax error: '../period=krava/'
                                 \___ expected numeric value

  $ perf record -e 'cycles/name=1/' ls
  event syntax error: '..es/name=1/'
                                 \___ expected string value

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429729824-13932-10-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c |  7 ++++---
 tools/perf/util/parse-events.h |  3 ++-
 tools/perf/util/parse-events.y | 11 ++++-------
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 278aebe5d65b..80a50fdb6d8a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -625,7 +625,8 @@ static int config_attr(struct perf_event_attr *attr,
 	return 0;
 }
 
-int parse_events_add_numeric(struct list_head *list, int *idx,
+int parse_events_add_numeric(struct parse_events_evlist *data,
+			     struct list_head *list,
 			     u32 type, u64 config,
 			     struct list_head *head_config)
 {
@@ -636,10 +637,10 @@ int parse_events_add_numeric(struct list_head *list, int *idx,
 	attr.config = config;
 
 	if (head_config &&
-	    config_attr(&attr, head_config, NULL))
+	    config_attr(&attr, head_config, data->error))
 		return -EINVAL;
 
-	return add_event(list, idx, &attr, NULL);
+	return add_event(list, &data->idx, &attr, NULL);
 }
 
 static int parse_events__is_name_term(struct parse_events_term *term)
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index 6286ffdf2295..e236f1b6ac6f 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -108,7 +108,8 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod);
 int parse_events_name(struct list_head *list, char *name);
 int parse_events_add_tracepoint(struct list_head *list, int *idx,
 				char *sys, char *event);
-int parse_events_add_numeric(struct list_head *list, int *idx,
+int parse_events_add_numeric(struct parse_events_evlist *data,
+			     struct list_head *list,
 			     u32 type, u64 config,
 			     struct list_head *head_config);
 int parse_events_add_cache(struct list_head *list, int *idx,
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 38a0f21fc433..3d11e00243e3 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -274,8 +274,7 @@ value_sym '/' event_config '/'
 	int config = $1 & 255;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(list, &data->idx,
-					  type, config, $3));
+	ABORT_ON(parse_events_add_numeric(data, list, type, config, $3));
 	parse_events__free_terms($3);
 	$$ = list;
 }
@@ -288,8 +287,7 @@ value_sym sep_slash_dc
 	int config = $1 & 255;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(list, &data->idx,
-					  type, config, NULL));
+	ABORT_ON(parse_events_add_numeric(data, list, type, config, NULL));
 	$$ = list;
 }
 
@@ -405,7 +403,7 @@ PE_VALUE ':' PE_VALUE
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(list, &data->idx, (u32)$1, $3, NULL));
+	ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL));
 	$$ = list;
 }
 
@@ -416,8 +414,7 @@ PE_RAW
 	struct list_head *list;
 
 	ALLOC_LIST(list);
-	ABORT_ON(parse_events_add_numeric(list, &data->idx,
-					  PERF_TYPE_RAW, $1, NULL));
+	ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL));
 	$$ = list;
 }
 

From b3ac032b7a76fea678de225d26ae04d10e47f0ac Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 23 Apr 2015 22:46:12 +0900
Subject: [PATCH 043/305] perf probe: Make --funcs option exclusive

The --funcs option should be given exclusively.  This adds
PARSE_OPT_EXCUSIVE flag on --funcs (-F) option.

Without this, 'perf probe --funcs -l' just shows the list of probes.

With this, it shows error message correctly.

This also fixes the help message and the documentation.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150423134612.26128.58189.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-probe.txt | 2 ++
 tools/perf/builtin-probe.c              | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index 239609c09f83..a4a3cc76b36b 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -19,6 +19,8 @@ or
 'perf probe' [options] --line='LINE'
 or
 'perf probe' [options] --vars='PROBEPOINT'
+or
+'perf probe' [options] --funcs
 
 DESCRIPTION
 -----------
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index f7b1af67e9f6..92dcce065e68 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -321,6 +321,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf probe [<options>] --line 'LINEDESC'",
 		"perf probe [<options>] --vars 'PROBEPOINT'",
 #endif
+		"perf probe [<options>] --funcs",
 		NULL
 };
 	struct option options[] = {
@@ -402,6 +403,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE);
 	set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE);
 #endif
+	set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE);
 
 	argc = parse_options(argc, argv, options, probe_usage,
 			     PARSE_OPT_STOP_AT_NON_OPTION);

From 6dbe31f7baf6d50fa396440dae0808bb712e9a37 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Thu, 23 Apr 2015 22:46:14 +0900
Subject: [PATCH 044/305] perf probe: Remove all probes matches given pattern
 at once

Fix perf-probe --del option to delete all matched probes in both
of kprobes and uprobes at once.

When we have 2 or more events on different binaries as below,

  ----
  # ./perf probe -l
  probe:vfs_read       (on vfs_read@ksrc/linux-3/fs/read_write.c)
  probe_libc:malloc    (on __libc_malloc@malloc/malloc.c in /usr/lib64/libc-2.17
  ----

Trying to remove all event with '*' just removes kprobe events at first.
  ----
  # ./perf probe -d \*
  Removed event: probe:vfs_read
  ----

And in 2nd try, it removes all uprobe events.
  ----
  # ./perf probe -d \*
  Removed event: probe_libc:malloc
  ----

This fixes to remove all event at once as below.
  ----
  # ./perf probe -d \*
  Removed event: probe:vfs_read
  Removed event: probe_libc:malloc
  ----

Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150423134614.26128.18106.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 26 +++++++++++++++++---------
 1 file changed, 17 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d8bb616ff57c..291bf23a5013 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2686,7 +2686,7 @@ static int del_trace_probe_event(int fd, const char *buf,
 						  struct strlist *namelist)
 {
 	struct str_node *ent, *n;
-	int ret = -1;
+	int ret = -ENOENT;
 
 	if (strpbrk(buf, "*?")) { /* Glob-exp */
 		strlist__for_each_safe(ent, n, namelist)
@@ -2710,7 +2710,7 @@ static int del_trace_probe_event(int fd, const char *buf,
 
 int del_perf_probe_events(struct strlist *dellist)
 {
-	int ret = -1, ufd = -1, kfd = -1;
+	int ret = -1, ret2, ufd = -1, kfd = -1;
 	char buf[128];
 	const char *group, *event;
 	char *p, *str;
@@ -2731,8 +2731,10 @@ int del_perf_probe_events(struct strlist *dellist)
 		goto error;
 	}
 
-	if (namelist == NULL && unamelist == NULL)
+	if (namelist == NULL && unamelist == NULL) {
+		ret = -ENOENT;
 		goto error;
+	}
 
 	strlist__for_each(ent, dellist) {
 		str = strdup(ent->s);
@@ -2759,17 +2761,23 @@ int del_perf_probe_events(struct strlist *dellist)
 		}
 
 		pr_debug("Group: %s, Event: %s\n", group, event);
+		free(str);
 
+		ret = ret2 = -ENOENT;
 		if (namelist)
 			ret = del_trace_probe_event(kfd, buf, namelist);
 
-		if (unamelist && ret != 0)
-			ret = del_trace_probe_event(ufd, buf, unamelist);
+		if ((ret >= 0 || ret == -ENOENT) && unamelist)
+			ret2 = del_trace_probe_event(ufd, buf, unamelist);
 
-		if (ret != 0)
-			pr_info("Info: Event \"%s\" does not exist.\n", buf);
-
-		free(str);
+		/* Since we can remove probes which already removed, don't check it */
+		if (ret == -ENOENT && ret2 == -ENOENT)
+			pr_debug("Event \"%s\" does not exist.\n", buf);
+		else if (ret < 0 || ret2 < 0) {
+			if (ret >= 0)
+				ret = ret2;
+			break;
+		}
 	}
 
 error:

From fa0e4ffe06f0823abdf6c401c340c08ba8977fad Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 23 Apr 2015 11:59:20 -0300
Subject: [PATCH 045/305] perf trace: Fix --filter-pids OPTION description

Cut't'paste error, fix it.

Reported-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-26abqh0wg9dci3fqcppyrpxy@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index e124741be187..23fb0818fec9 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2668,8 +2668,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "trace events on existing process id"),
 	OPT_STRING('t', "tid", &trace.opts.target.tid, "tid",
 		    "trace events on existing thread id"),
-	OPT_CALLBACK(0, "filter-pids", &trace, "float",
-		     "show only events with duration > N.M ms", trace__set_filter_pids),
+	OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids",
+		     "pids to filter (by the kernel)", trace__set_filter_pids),
 	OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide,
 		    "system-wide collection from all CPUs"),
 	OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu",

From d303e85a5c19eb8d913951d1aa81556fc20b20af Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 23 Apr 2015 12:02:07 -0300
Subject: [PATCH 046/305] perf trace: Clarify that -e is about syscalls, not
 perf events in general

This comes from the desire of having -e/--expr to have the same meaning
as for 'strace', while other perf tools use it for --event, which
'trace' honours, i.e. all perf tools have --event in common, but trace
uses -e for strace's --expr.

Clarify it in the --help output.

Reported-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-5j94bcsdmcbeu2xthnzsj60d@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-trace.txt | 2 +-
 tools/perf/builtin-trace.c              | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index ba03fd5d1a54..1db9c8b79880 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -35,7 +35,7 @@ OPTIONS
 
 -e::
 --expr::
-	List of events to show, currently only syscall names.
+	List of syscalls to show, currently only syscall names.
 	Prefixing with ! shows all syscalls but the ones specified.  You may
 	need to escape it.
 
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 23fb0818fec9..d1139b6bd534 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2660,8 +2660,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN(0, "comm", &trace.show_comm,
 		    "show the thread COMM next to its id"),
 	OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"),
-	OPT_STRING('e', "expr", &ev_qualifier_str, "expr",
-		    "list of events to trace"),
+	OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"),
 	OPT_STRING('o', "output", &output_name, "file", "output file name"),
 	OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"),
 	OPT_STRING('p', "pid", &trace.opts.target.pid, "pid",

From a0aa21e2281f89a197ad8e7bd8008f3b7b735dfc Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Tue, 7 Apr 2015 23:25:15 +0200
Subject: [PATCH 047/305] perf stat: Fix metrics calculation with event
 qualifiers

Currently in perf IPC and other metrics cannot be directly shown
separately for both user and kernel in a single run. The problem was
that the metrics matching code did not check event qualifiers.

With this patch the following case works correctly.

% perf stat -e cycles:k,cycles:u,instructions:k,instructions:u true

 Performance counter stats for 'true':

           531,718      cycles:k
           203,895      cycles:u
           338,151      instructions:k            #    0.64  insns per cycle
           105,961      instructions:u            #    0.52  insns per cycle

       0.002989739 seconds time elapsed

Previously it would misreport the ratios because they were matching the
wrong value.

The patch is fairly big, but quite mechanic as it just adds context
indexes everywhere.

Reported-by: William Cohen <wcohen@redhat.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: William Cohen <wcohen@redhat.com>
Link: http://lkml.kernel.org/r/1428441919-23099-3-git-send-email-jolsa@kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 129 +++++++++++++++++++++++---------------
 1 file changed, 77 insertions(+), 52 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 3dbd8c59efc5..52f433084779 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -247,21 +247,35 @@ out_free:
 	return -1;
 }
 
+#define NUM_CTX 3
+
+enum { CTX_USER, CTX_KERNEL, CTX_ALL };
+
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
-static struct stats runtime_branches_stats[MAX_NR_CPUS];
-static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
-static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
-static struct stats runtime_transaction_stats[MAX_NR_CPUS];
-static struct stats runtime_elision_stats[MAX_NR_CPUS];
+static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+	if (evsel->attr.exclude_kernel)
+		return CTX_USER;
+	if (evsel->attr.exclude_user)
+		return CTX_KERNEL;
+	/* Handle hypervisor too? */
+	return CTX_ALL;
+}
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -356,37 +370,39 @@ static struct perf_evsel *nth_evsel(int n)
 static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
 				int cpu)
 {
+	int ctx = evsel_context(counter);
+
 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
 		update_stats(&runtime_nsecs_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[cpu], count[0]);
+		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
-		update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]);
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
-		update_stats(&runtime_transaction_stats[cpu], count[0]);
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
-		update_stats(&runtime_elision_stats[cpu], count[0]);
+		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]);
+		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]);
+		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[cpu], count[0]);
+		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_stats(&runtime_cacherefs_stats[cpu], count[0]);
+		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_stats(&runtime_l1_dcache_stats[cpu], count[0]);
+		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_stats(&runtime_l1_icache_stats[cpu], count[0]);
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_stats(&runtime_ll_cache_stats[cpu], count[0]);
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_stats(&runtime_dtlb_cache_stats[cpu], count[0]);
+		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_stats(&runtime_itlb_cache_stats[cpu], count[0]);
+		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
 }
 
 static void zero_per_pkg(struct perf_evsel *counter)
@@ -908,8 +924,9 @@ static void print_stalled_cycles_frontend(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_cycles_stats[cpu]);
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -927,8 +944,9 @@ static void print_stalled_cycles_backend(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_cycles_stats[cpu]);
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -946,8 +964,9 @@ static void print_branch_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_branches_stats[cpu]);
+	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -965,8 +984,9 @@ static void print_l1_dcache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_l1_dcache_stats[cpu]);
+	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -984,8 +1004,9 @@ static void print_l1_icache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_l1_icache_stats[cpu]);
+	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1003,8 +1024,9 @@ static void print_dtlb_cache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
+	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1022,8 +1044,9 @@ static void print_itlb_cache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_itlb_cache_stats[cpu]);
+	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1041,8 +1064,9 @@ static void print_ll_cache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_ll_cache_stats[cpu]);
+	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1060,6 +1084,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	double sc =  evsel->scale;
 	const char *fmt;
 	int cpu = cpu_map__id_to_cpu(id);
+	int ctx = evsel_context(evsel);
 
 	if (csv_output) {
 		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
@@ -1091,15 +1116,15 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		return;
 
 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
-		total = avg_stats(&runtime_cycles_stats[cpu]);
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total) {
 			ratio = avg / total;
 			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
 		} else {
 			fprintf(output, "                                   ");
 		}
-		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
-		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
+		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
+		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
 		if (total && avg) {
 			ratio = total / avg;
@@ -1110,46 +1135,46 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		}
 
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-			runtime_branches_stats[cpu].n != 0) {
+			runtime_branches_stats[ctx][cpu].n != 0) {
 		print_branch_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_dcache_stats[cpu].n != 0) {
+			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
 		print_l1_dcache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_icache_stats[cpu].n != 0) {
+			runtime_l1_icache_stats[ctx][cpu].n != 0) {
 		print_l1_icache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_dtlb_cache_stats[cpu].n != 0) {
+			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
 		print_dtlb_cache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_itlb_cache_stats[cpu].n != 0) {
+			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
 		print_itlb_cache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_ll_cache_stats[cpu].n != 0) {
+			runtime_ll_cache_stats[ctx][cpu].n != 0) {
 		print_ll_cache_misses(cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-			runtime_cacherefs_stats[cpu].n != 0) {
-		total = avg_stats(&runtime_cacherefs_stats[cpu]);
+			runtime_cacherefs_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
 
 		if (total)
 			ratio = avg * 100 / total;
@@ -1171,15 +1196,15 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		}
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
-		total = avg_stats(&runtime_cycles_stats[cpu]);
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total)
 			fprintf(output,
 				" #   %5.2f%% transactional cycles   ",
 				100.0 * (avg / total));
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
-		total = avg_stats(&runtime_cycles_stats[cpu]);
-		total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
@@ -1189,8 +1214,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
 		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (total)
 			ratio = total / avg;
@@ -1199,8 +1224,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
 		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (total)
 			ratio = total / avg;

From 9f71b4f39d7c01cae9d80e938774ff6b0a9bfd80 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 7 Apr 2015 23:25:16 +0200
Subject: [PATCH 048/305] perf stat: Change metrics context calculation

Changing metrics context calculation to allow more than 2 types of
context.

Following patches will add support for the rest of the exclude_* bits so
we need separate array element for all context combinations.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: William Cohen <wcohen@redhat.com>
Link: http://lkml.kernel.org/r/1428441919-23099-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 52f433084779..cca100dc5fd0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -247,9 +247,13 @@ out_free:
 	return -1;
 }
 
-#define NUM_CTX 3
+enum {
+	CTX_BIT_USER	= 1 << 0,
+	CTX_BIT_KERNEL	= 1 << 1,
+	CTX_BIT_MAX	= 1 << 2,
+};
 
-enum { CTX_USER, CTX_KERNEL, CTX_ALL };
+#define NUM_CTX CTX_BIT_MAX
 
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
 static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
@@ -269,12 +273,13 @@ static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
 
 static int evsel_context(struct perf_evsel *evsel)
 {
+	int ctx = 0;
+
 	if (evsel->attr.exclude_kernel)
-		return CTX_USER;
+		ctx |= CTX_BIT_KERNEL;
 	if (evsel->attr.exclude_user)
-		return CTX_KERNEL;
-	/* Handle hypervisor too? */
-	return CTX_ALL;
+		ctx |= CTX_BIT_USER;
+	return ctx;
 }
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)

From afef2fbd1435050377f6487c43f2aa6edc37b30f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 7 Apr 2015 23:25:17 +0200
Subject: [PATCH 049/305] perf stat: Add metrics support for exclude_hv

Separating metrics values for exclude_hv bit.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: William Cohen <wcohen@redhat.com>
Link: http://lkml.kernel.org/r/1428441919-23099-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cca100dc5fd0..5a88a14d702b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -250,7 +250,8 @@ out_free:
 enum {
 	CTX_BIT_USER	= 1 << 0,
 	CTX_BIT_KERNEL	= 1 << 1,
-	CTX_BIT_MAX	= 1 << 2,
+	CTX_BIT_HV	= 1 << 2,
+	CTX_BIT_MAX	= 1 << 3,
 };
 
 #define NUM_CTX CTX_BIT_MAX
@@ -279,6 +280,8 @@ static int evsel_context(struct perf_evsel *evsel)
 		ctx |= CTX_BIT_KERNEL;
 	if (evsel->attr.exclude_user)
 		ctx |= CTX_BIT_USER;
+	if (evsel->attr.exclude_hv)
+		ctx |= CTX_BIT_HV;
 	return ctx;
 }
 

From a2270d38a53aa88c67a72978fc8717e26c7f27d9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 7 Apr 2015 23:25:18 +0200
Subject: [PATCH 050/305] perf stat: Add metrics support for
 exclude_(host|guest)

Separating metrics values for guest and host, so we get proper values.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: William Cohen <wcohen@redhat.com>
Link: http://lkml.kernel.org/r/1428441919-23099-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5a88a14d702b..ea5250890bd5 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -251,7 +251,8 @@ enum {
 	CTX_BIT_USER	= 1 << 0,
 	CTX_BIT_KERNEL	= 1 << 1,
 	CTX_BIT_HV	= 1 << 2,
-	CTX_BIT_MAX	= 1 << 3,
+	CTX_BIT_HOST	= 1 << 3,
+	CTX_BIT_MAX	= 1 << 4,
 };
 
 #define NUM_CTX CTX_BIT_MAX
@@ -282,6 +283,9 @@ static int evsel_context(struct perf_evsel *evsel)
 		ctx |= CTX_BIT_USER;
 	if (evsel->attr.exclude_hv)
 		ctx |= CTX_BIT_HV;
+	if (evsel->attr.exclude_host)
+		ctx |= CTX_BIT_HOST;
+
 	return ctx;
 }
 

From c4fa0d9c1e6aa360cfa2c36f7836a89da24a1b7a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Tue, 7 Apr 2015 23:25:19 +0200
Subject: [PATCH 051/305] perf stat: Add metrics support for exclude_idle

Separating metrics values for exclude_idle bit.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: William Cohen <wcohen@redhat.com>
Link: http://lkml.kernel.org/r/1428441919-23099-7-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ea5250890bd5..fd577f725d23 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -252,7 +252,8 @@ enum {
 	CTX_BIT_KERNEL	= 1 << 1,
 	CTX_BIT_HV	= 1 << 2,
 	CTX_BIT_HOST	= 1 << 3,
-	CTX_BIT_MAX	= 1 << 4,
+	CTX_BIT_IDLE	= 1 << 4,
+	CTX_BIT_MAX	= 1 << 5,
 };
 
 #define NUM_CTX CTX_BIT_MAX
@@ -285,6 +286,8 @@ static int evsel_context(struct perf_evsel *evsel)
 		ctx |= CTX_BIT_HV;
 	if (evsel->attr.exclude_host)
 		ctx |= CTX_BIT_HOST;
+	if (evsel->attr.exclude_idle)
+		ctx |= CTX_BIT_IDLE;
 
 	return ctx;
 }

From dd3092075c3263fca7a3e98b143ad2296aab71b4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 15:33:45 +0900
Subject: [PATCH 052/305] perf tools: Document --children option in more detail

As the --children option changes the output of perf report (and perf
top) it sometimes confuses users.  Add more words and examples to help
understanding of the option's behavior - and how to disable it ;-).

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1429684425-14987-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../callchain-overhead-calculation.txt        | 108 ++++++++++++++++++
 tools/perf/Documentation/perf-report.txt      |   4 +
 tools/perf/Documentation/perf-top.txt         |   3 +-
 3 files changed, 114 insertions(+), 1 deletion(-)
 create mode 100644 tools/perf/Documentation/callchain-overhead-calculation.txt

diff --git a/tools/perf/Documentation/callchain-overhead-calculation.txt b/tools/perf/Documentation/callchain-overhead-calculation.txt
new file mode 100644
index 000000000000..1a757927195e
--- /dev/null
+++ b/tools/perf/Documentation/callchain-overhead-calculation.txt
@@ -0,0 +1,108 @@
+Overhead calculation
+--------------------
+The overhead can be shown in two columns as 'Children' and 'Self' when
+perf collects callchains.  The 'self' overhead is simply calculated by
+adding all period values of the entry - usually a function (symbol).
+This is the value that perf shows traditionally and sum of all the
+'self' overhead values should be 100%.
+
+The 'children' overhead is calculated by adding all period values of
+the child functions so that it can show the total overhead of the
+higher level functions even if they don't directly execute much.
+'Children' here means functions that are called from another (parent)
+function.
+
+It might be confusing that the sum of all the 'children' overhead
+values exceeds 100% since each of them is already an accumulation of
+'self' overhead of its child functions.  But with this enabled, users
+can find which function has the most overhead even if samples are
+spread over the children.
+
+Consider the following example; there are three functions like below.
+
+-----------------------
+void foo(void) {
+    /* do something */
+}
+
+void bar(void) {
+    /* do something */
+    foo();
+}
+
+int main(void) {
+    bar()
+    return 0;
+}
+-----------------------
+
+In this case 'foo' is a child of 'bar', and 'bar' is an immediate
+child of 'main' so 'foo' also is a child of 'main'.  In other words,
+'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'.
+
+Suppose all samples are recorded in 'foo' and 'bar' only.  When it's
+recorded with callchains the output will show something like below
+in the usual (self-overhead-only) output of perf report:
+
+----------------------------------
+Overhead  Symbol
+........  .....................
+  60.00%  foo
+          |
+          --- foo
+              bar
+              main
+              __libc_start_main
+
+  40.00%  bar
+          |
+          --- bar
+              main
+              __libc_start_main
+----------------------------------
+
+When the --children option is enabled, the 'self' overhead values of
+child functions (i.e. 'foo' and 'bar') are added to the parents to
+calculate the 'children' overhead.  In this case the report could be
+displayed as:
+
+-------------------------------------------
+Children      Self  Symbol
+........  ........  ....................
+ 100.00%     0.00%  __libc_start_main
+          |
+          --- __libc_start_main
+
+ 100.00%     0.00%  main
+          |
+          --- main
+              __libc_start_main
+
+ 100.00%    40.00%  bar
+          |
+          --- bar
+              main
+              __libc_start_main
+
+  60.00%    60.00%  foo
+          |
+          --- foo
+              bar
+              main
+              __libc_start_main
+-------------------------------------------
+
+In the above output, the 'self' overhead of 'foo' (60%) was add to the
+'children' overhead of 'bar', 'main' and '\_\_libc_start_main'.
+Likewise, the 'self' overhead of 'bar' (40%) was added to the
+'children' overhead of 'main' and '\_\_libc_start_main'.
+
+So '\_\_libc_start_main' and 'main' are shown first since they have
+same (100%) 'children' overhead (even though they have zero 'self'
+overhead) and they are the parents of 'foo' and 'bar'.
+
+Since v3.16 the 'children' overhead is shown by default and the output
+is sorted by its values. The 'children' overhead is disabled by
+specifying --no-children option on the command line or by adding
+'report.children = false' or 'top.children = false' in the perf config
+file.
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 4879cf638824..896672badba3 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -193,6 +193,7 @@ OPTIONS
 	Accumulate callchain of children to parent entry so that then can
 	show up in the output.  The output will have a new "Children" column
 	and will be sorted on the data.  It requires callchains are recorded.
+	See the `overhead calculation' section for more details.
 
 --max-stack::
 	Set the stack depth limit when parsing the callchain, anything
@@ -323,6 +324,9 @@ OPTIONS
 --header-only::
 	Show only perf.data header (forces --stdio).
 
+
+include::callchain-overhead-calculation.txt[]
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 3265b1070518..9e5b07eb7d35 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -168,7 +168,7 @@ Default is to monitor all CPUS.
 	Accumulate callchain of children to parent entry so that then can
 	show up in the output.  The output will have a new "Children" column
 	and will be sorted on the data.  It requires -g/--call-graph option
-	enabled.
+	enabled.  See the `overhead calculation' section for more details.
 
 --max-stack::
 	Set the stack depth limit when parsing the callchain, anything
@@ -234,6 +234,7 @@ INTERACTIVE PROMPTING KEYS
 
 Pressing any unmapped key displays a menu, and prompts for input.
 
+include::callchain-overhead-calculation.txt[]
 
 SEE ALSO
 --------

From 762abdc0c6c013425958cd9f5105f4e32268d434 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Thu, 23 Apr 2015 15:00:16 +0100
Subject: [PATCH 053/305] perf tools: Use getconf to determine number of online
 CPUs

Parsing /proc/cpuinfo is a fiddly, arch-dependent business and a recent
change to get it working for Sparc broke arm and arm64 platforms.

Use sysconf to determine the number of online CPUs only parsing
/proc/cpuinfo when sysconf is not available.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <david.ahern@oracle.com>
Cc: Mark Rutland <Mark.Rutland@arm.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/r/20150423140454.GJ1652@arm.com
[ Made it fall back to parsing /proc when getconf not found ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile b/tools/perf/Makefile
index c699dc35eef9..d31a7bbd7cee 100644
--- a/tools/perf/Makefile
+++ b/tools/perf/Makefile
@@ -24,7 +24,7 @@ unexport MAKEFLAGS
 # (To override it, run 'make JOBS=1' and similar.)
 #
 ifeq ($(JOBS),)
-  JOBS := $(shell egrep -c '^processor|^CPU' /proc/cpuinfo 2>/dev/null)
+  JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null)
   ifeq ($(JOBS),0)
     JOBS := 1
   endif

From b64aa553d8430aabd24f303899cfa4de678e2c3a Mon Sep 17 00:00:00 2001
From: Petr Holasek <pholasek@redhat.com>
Date: Thu, 16 Apr 2015 17:38:18 +0200
Subject: [PATCH 054/305] perf bench numa: Show more stats of particular
 threads in verbose mode

In verbose mode perf bench numa shows also GB/s speed, system and user cpu
time for each particular thread. Using of getrusage() can provide much more
per process or per thread stats in future.

Signed-off-by: Petr Holasek <pholasek@redhat.com>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1429198699-25039-3-git-send-email-pholasek@redhat.com
[ Rename 'usage' variable to not shadow util.h's usage() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/numa.c | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index ebfa163b80b5..0b704c5f6d90 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -23,6 +23,7 @@
 #include <pthread.h>
 #include <sys/mman.h>
 #include <sys/time.h>
+#include <sys/resource.h>
 #include <sys/wait.h>
 #include <sys/prctl.h>
 #include <sys/types.h>
@@ -51,6 +52,9 @@ struct thread_data {
 	unsigned int		loops_done;
 	u64			val;
 	u64			runtime_ns;
+	u64			system_time_ns;
+	u64			user_time_ns;
+	double			speed_gbs;
 	pthread_mutex_t		*process_lock;
 };
 
@@ -1034,6 +1038,7 @@ static void *worker_thread(void *__tdata)
 	u64 bytes_done;
 	long work_done;
 	u32 l;
+	struct rusage rusage;
 
 	bind_to_cpumask(td->bind_cpumask);
 	bind_to_memnode(td->bind_node);
@@ -1186,6 +1191,13 @@ static void *worker_thread(void *__tdata)
 	timersub(&stop, &start0, &diff);
 	td->runtime_ns = diff.tv_sec * 1000000000ULL;
 	td->runtime_ns += diff.tv_usec * 1000ULL;
+	td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9;
+
+	getrusage(RUSAGE_THREAD, &rusage);
+	td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL;
+	td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL;
+	td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL;
+	td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL;
 
 	free_data(thread_data, g->p.bytes_thread);
 
@@ -1412,7 +1424,7 @@ static int __bench_numa(const char *name)
 	double runtime_sec_min;
 	int wait_stat;
 	double bytes;
-	int i, t;
+	int i, t, p;
 
 	if (init())
 		return -1;
@@ -1548,6 +1560,24 @@ static int __bench_numa(const char *name)
 	print_res(name, bytes / runtime_sec_max / 1e9,
 		"GB/sec,", "total-speed",	"GB/sec total speed");
 
+	if (g->p.show_details >= 2) {
+		char tname[32];
+		struct thread_data *td;
+		for (p = 0; p < g->p.nr_proc; p++) {
+			for (t = 0; t < g->p.nr_threads; t++) {
+				memset(tname, 0, 32);
+				td = g->threads + p*g->p.nr_threads + t;
+				snprintf(tname, 32, "process%d:thread%d", p, t);
+				print_res(tname, td->speed_gbs,
+					"GB/sec",	"thread-speed", "GB/sec/thread speed");
+				print_res(tname, td->system_time_ns / 1e9,
+					"secs",	"thread-system-time", "system CPU time/thread");
+				print_res(tname, td->user_time_ns / 1e9,
+					"secs",	"thread-user-time", "user CPU time/thread");
+			}
+		}
+	}
+
 	free(pids);
 
 	deinit();

From d2332098331fffe9358b50cebc8954ecd6560448 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 28 Apr 2015 17:35:35 +0530
Subject: [PATCH 055/305] perf probe ppc: Fix symbol fixup issues due to ELF
 type

If using the symbol table, symbol addresses are not being fixed up
properly, resulting in probes being placed at wrong addresses:

  # perf probe do_fork
  Added new event:
    probe:do_fork        (on do_fork)

  You can now use it in all perf tools, such as:

	  perf record -e probe:do_fork -aR sleep 1

  # cat /sys/kernel/debug/tracing/kprobe_events
  p:probe/do_fork _text+635952
  # printf "%x" 635952
  9b430
  # grep do_fork /boot/System.map
  c0000000000ab430 T .do_fork

Fix by checking for ELF type ET_DYN used by ppc64 kernels.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/41392bb856ef62d929995e0b61967689b7915207.1430217967.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/Build          |  1 +
 tools/perf/arch/powerpc/util/sym-handling.c | 19 +++++++++++++++++++
 tools/perf/util/symbol-elf.c                |  8 ++++++--
 tools/perf/util/symbol.h                    |  4 ++++
 4 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 tools/perf/arch/powerpc/util/sym-handling.c

diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build
index 0af6e9b3f728..7b8b0d1a1b62 100644
--- a/tools/perf/arch/powerpc/util/Build
+++ b/tools/perf/arch/powerpc/util/Build
@@ -1,4 +1,5 @@
 libperf-y += header.o
+libperf-y += sym-handling.o
 
 libperf-$(CONFIG_DWARF) += dwarf-regs.o
 libperf-$(CONFIG_DWARF) += skip-callchain-idx.o
diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
new file mode 100644
index 000000000000..c9de0012195f
--- /dev/null
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -0,0 +1,19 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ *
+ * Copyright (C) 2015 Naveen N. Rao, IBM Corporation
+ */
+
+#include "debug.h"
+#include "symbol.h"
+
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+	return ehdr.e_type == ET_EXEC ||
+	       ehdr.e_type == ET_REL ||
+	       ehdr.e_type == ET_DYN;
+}
+#endif
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a7ab6063e038..54347ba17c3e 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -630,6 +630,11 @@ void symsrc__destroy(struct symsrc *ss)
 	close(ss->fd);
 }
 
+bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr)
+{
+	return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL;
+}
+
 int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 		 enum dso_binary_type type)
 {
@@ -711,8 +716,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 						     ".gnu.prelink_undo",
 						     NULL) != NULL);
 	} else {
-		ss->adjust_symbols = ehdr.e_type == ET_EXEC ||
-				     ehdr.e_type == ET_REL;
+		ss->adjust_symbols = elf__needs_adjust_symbols(ehdr);
 	}
 
 	ss->name   = strdup(name);
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 09561500164a..8cb0af48ec93 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -303,4 +303,8 @@ int setup_list(struct strlist **list, const char *list_str,
 int setup_intlist(struct intlist **list, const char *list_str,
 		  const char *list_name);
 
+#ifdef HAVE_LIBELF_SUPPORT
+bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+#endif
+
 #endif /* __PERF_SYMBOL */

From fb6d59423115b10125f5db6acb8471f6f0af4ad7 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 28 Apr 2015 17:35:36 +0530
Subject: [PATCH 056/305] perf probe ppc: Use the right prefix when ignoring
 SyS symbols on ppc

Use the proper prefix when ignoring SyS symbols on ppc ABIv1. While at
it, generalize symbol selection so architectures can implement their own
logic.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/adf1f98b121ecaf292777fe5cc69fe1038feabce.1430217967.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/sym-handling.c | 20 ++++++++++++++++++++
 tools/perf/util/symbol.c                    | 21 ++++++++++++---------
 tools/perf/util/symbol.h                    |  5 +++++
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index c9de0012195f..5522a4000c22 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -17,3 +17,23 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
 	       ehdr.e_type == ET_DYN;
 }
 #endif
+
+#if !defined(_CALL_ELF) || _CALL_ELF != 2
+int arch__choose_best_symbol(struct symbol *syma,
+			     struct symbol *symb __maybe_unused)
+{
+	char *sym = syma->name;
+
+	/* Skip over any initial dot */
+	if (*sym == '.')
+		sym++;
+
+	/* Avoid "SyS" kernel syscall aliases */
+	if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3))
+		return SYMBOL_B;
+	if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10))
+		return SYMBOL_B;
+
+	return SYMBOL_A;
+}
+#endif
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 201f6c4ca738..f80575728c64 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -85,8 +85,17 @@ static int prefix_underscores_count(const char *str)
 	return tail - str;
 }
 
-#define SYMBOL_A 0
-#define SYMBOL_B 1
+int __weak arch__choose_best_symbol(struct symbol *syma,
+				    struct symbol *symb __maybe_unused)
+{
+	/* Avoid "SyS" kernel syscall aliases */
+	if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3))
+		return SYMBOL_B;
+	if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10))
+		return SYMBOL_B;
+
+	return SYMBOL_A;
+}
 
 static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 {
@@ -134,13 +143,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb)
 	else if (na < nb)
 		return SYMBOL_B;
 
-	/* Avoid "SyS" kernel syscall aliases */
-	if (na >= 3 && !strncmp(syma->name, "SyS", 3))
-		return SYMBOL_B;
-	if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10))
-		return SYMBOL_B;
-
-	return SYMBOL_A;
+	return arch__choose_best_symbol(syma, symb);
 }
 
 void symbols__fixup_duplicate(struct rb_root *symbols)
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 8cb0af48ec93..bd50ba0dde63 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -307,4 +307,9 @@ int setup_intlist(struct intlist **list, const char *list_str,
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
 #endif
 
+#define SYMBOL_A 0
+#define SYMBOL_B 1
+
+int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb);
+
 #endif /* __PERF_SYMBOL */

From 031b84c407c3153ffbcb4f8f832edf48af988719 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 28 Apr 2015 17:35:37 +0530
Subject: [PATCH 057/305] perf probe ppc: Enable matching against dot symbols
 automatically

Allow perf probe to work on ppc ABIv1 without the need to specify the
leading dot '.' for functions. 'perf probe do_fork' works with this
patch.

We do this by changing how symbol name comparison works on ppc ABIv1 -
we simply ignore and skip over the initial dot, if one exists, during
symbol name comparison.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/652a8f3bfa919bd02a1836a128370eaed59b4a34.1430217967.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/sym-handling.c | 13 +++++++++++++
 tools/perf/util/map.c                       |  5 +++++
 tools/perf/util/map.h                       |  3 ++-
 tools/perf/util/symbol.c                    |  4 ++--
 4 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 5522a4000c22..2de2cc484d6e 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -8,6 +8,7 @@
 
 #include "debug.h"
 #include "symbol.h"
+#include "map.h"
 
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
@@ -36,4 +37,16 @@ int arch__choose_best_symbol(struct symbol *syma,
 
 	return SYMBOL_A;
 }
+
+/* Allow matching against dot variants */
+int arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+	/* Skip over initial dot */
+	if (*namea == '.')
+		namea++;
+	if (*nameb == '.')
+		nameb++;
+
+	return strcmp(namea, nameb);
+}
 #endif
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index a14f08f41686..cd0e335008b4 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -292,6 +292,11 @@ int map__load(struct map *map, symbol_filter_t filter)
 	return 0;
 }
 
+int __weak arch__compare_symbol_names(const char *namea, const char *nameb)
+{
+	return strcmp(namea, nameb);
+}
+
 struct symbol *map__find_symbol(struct map *map, u64 addr,
 				symbol_filter_t filter)
 {
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index ec19c59ca38e..4e0c729841ab 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -124,7 +124,7 @@ struct thread;
  */
 #define __map__for_each_symbol_by_name(map, sym_name, pos, filter)	\
 	for (pos = map__find_symbol_by_name(map, sym_name, filter);	\
-	     pos && strcmp(pos->name, sym_name) == 0;		\
+	     pos && arch__compare_symbol_names(pos->name, sym_name) == 0;	\
 	     pos = symbol__next_by_name(pos))
 
 #define map__for_each_symbol_by_name(map, sym_name, pos)		\
@@ -132,6 +132,7 @@ struct thread;
 
 typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym);
 
+int arch__compare_symbol_names(const char *namea, const char *nameb);
 void map__init(struct map *map, enum map_type type,
 	       u64 start, u64 end, u64 pgoff, struct dso *dso);
 struct map *map__new(struct machine *machine, u64 start, u64 len,
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index f80575728c64..45ba48a7acb3 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -411,7 +411,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 		int cmp;
 
 		s = rb_entry(n, struct symbol_name_rb_node, rb_node);
-		cmp = strcmp(name, s->sym.name);
+		cmp = arch__compare_symbol_names(name, s->sym.name);
 
 		if (cmp < 0)
 			n = n->rb_left;
@@ -429,7 +429,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 		struct symbol_name_rb_node *tmp;
 
 		tmp = rb_entry(n, struct symbol_name_rb_node, rb_node);
-		if (strcmp(tmp->sym.name, s->sym.name))
+		if (arch__compare_symbol_names(tmp->sym.name, s->sym.name))
 			break;
 
 		s = tmp;

From c50fc0a43e33a6c3257c5cbb954cd747d7b9a680 Mon Sep 17 00:00:00 2001
From: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Date: Tue, 28 Apr 2015 17:35:38 +0530
Subject: [PATCH 058/305] perf probe ppc64le: Fix ppc64 ABIv2 symbol decoding

ppc64 ELF ABIv2 has a Global Entry Point (GEP) and a Local Entry Point
(LEP). For purposes of probing, we need the LEP - the offset to which is
encoded in st_other.

Signed-off-by: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/ab9cc5e2b9de4cbaaf50f6ef2346a6a81100bad1.1430217967.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/sym-handling.c | 7 +++++++
 tools/perf/util/symbol-elf.c                | 4 ++++
 tools/perf/util/symbol.h                    | 1 +
 3 files changed, 12 insertions(+)

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 2de2cc484d6e..012a0f8098de 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -17,6 +17,13 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
 	       ehdr.e_type == ET_REL ||
 	       ehdr.e_type == ET_DYN;
 }
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+void arch__elf_sym_adjust(GElf_Sym *sym)
+{
+	sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
+}
+#endif
 #endif
 
 #if !defined(_CALL_ELF) || _CALL_ELF != 2
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 54347ba17c3e..d99b4424d4e1 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -775,6 +775,8 @@ static bool want_demangle(bool is_kernel_sym)
 	return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
 }
 
+void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { }
+
 int dso__load_sym(struct dso *dso, struct map *map,
 		  struct symsrc *syms_ss, struct symsrc *runtime_ss,
 		  symbol_filter_t filter, int kmodule)
@@ -939,6 +941,8 @@ int dso__load_sym(struct dso *dso, struct map *map,
 		    (sym.st_value & 1))
 			--sym.st_value;
 
+		arch__elf_sym_adjust(&sym);
+
 		if (dso->kernel || kmodule) {
 			char dso_name[PATH_MAX];
 
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index bd50ba0dde63..90965296c129 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -305,6 +305,7 @@ int setup_intlist(struct intlist **list, const char *list_str,
 
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr);
+void arch__elf_sym_adjust(GElf_Sym *sym);
 #endif
 
 #define SYMBOL_A 0

From d5c2e2c17ae1d630ddbceb53a264f24cc99703a4 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 28 Apr 2015 17:35:39 +0530
Subject: [PATCH 059/305] perf probe ppc64le: Prefer symbol table lookup over
 DWARF

Use symbol table lookups by default if DWARF is not necessary, since
powerpc ABIv2 encodes local entry points in the symbol table and the
function entry address in DWARF may not be appropriate for kprobes, as
described here:

https://sourceware.org/bugzilla/show_bug.cgi?id=17638

"The DWARF address ranges deliberately include the *whole* function,
both global and local entry points."
...
"If you want to set probes on a local entry point, you should look up
the symbol in the main symbol table (not DWARF), and check the st_other
bits; they will indicate whether the function has a local entry point,
and what its offset from the global entry point is.  Note that GDB does
the same when setting a breakpoint on a function entry."

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/88a10e22f4aaba2aef812824ca4b10d7beeea012.1430217967.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/sym-handling.c | 8 ++++++++
 tools/perf/util/probe-event.c               | 8 ++++++++
 tools/perf/util/probe-event.h               | 1 +
 3 files changed, 17 insertions(+)

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index 012a0f8098de..a1700609e145 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -9,6 +9,7 @@
 #include "debug.h"
 #include "symbol.h"
 #include "map.h"
+#include "probe-event.h"
 
 #ifdef HAVE_LIBELF_SUPPORT
 bool elf__needs_adjust_symbols(GElf_Ehdr ehdr)
@@ -57,3 +58,10 @@ int arch__compare_symbol_names(const char *namea, const char *nameb)
 	return strcmp(namea, nameb);
 }
 #endif
+
+#if defined(_CALL_ELF) && _CALL_ELF == 2
+bool arch__prefers_symtab(void)
+{
+	return true;
+}
+#endif
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 291bf23a5013..4dfb412ff1f4 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2567,6 +2567,8 @@ err_out:
 	goto out;
 }
 
+bool __weak arch__prefers_symtab(void) { return false; }
+
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 					  struct probe_trace_event **tevs,
 					  int max_tevs, const char *target)
@@ -2582,6 +2584,12 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 		}
 	}
 
+	if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
+		ret = find_probe_trace_events_from_map(pev, tevs, max_tevs, target);
+		if (ret > 0)
+			return ret; /* Found in symbol table */
+	}
+
 	/* Convert perf_probe_event with debuginfo */
 	ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
 	if (ret != 0)
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index d6b783447be9..52bca4bf07ca 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -135,6 +135,7 @@ extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
 			       struct strfilter *filter, bool externs);
 extern int show_available_funcs(const char *module, struct strfilter *filter,
 				bool user);
+bool arch__prefers_symtab(void);
 
 /* Maximum index number of event-name postfix */
 #define MAX_EVENT_INDEX	1024

From 7b6ff0bdbf4f7f429c2116cca92a6d171217449e Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 28 Apr 2015 17:35:40 +0530
Subject: [PATCH 060/305] perf probe ppc64le: Fixup function entry if using
 kallsyms lookup

On powerpc ABIv2, if no debug-info is found and we use kallsyms, we need
to fixup the function entry to point to the local entry point. Use
offset of 8 since current toolchains always generate 2 instructions (8
bytes).

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/92253021e77a104b23b615c8c23bf9501dfe60bf.1430217967.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/powerpc/util/sym-handling.c | 15 +++++++++++++++
 tools/perf/util/probe-event.c               |  5 +++++
 tools/perf/util/probe-event.h               |  2 ++
 3 files changed, 22 insertions(+)

diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c
index a1700609e145..bbc1a50768dd 100644
--- a/tools/perf/arch/powerpc/util/sym-handling.c
+++ b/tools/perf/arch/powerpc/util/sym-handling.c
@@ -64,4 +64,19 @@ bool arch__prefers_symtab(void)
 {
 	return true;
 }
+
+#define PPC64LE_LEP_OFFSET	8
+
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+			     struct probe_trace_event *tev, struct map *map)
+{
+	/*
+	 * ppc64 ABIv2 local entry point is currently always 2 instructions
+	 * (8 bytes) after the global entry point.
+	 */
+	if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) {
+		tev->point.address += PPC64LE_LEP_OFFSET;
+		tev->point.offset += PPC64LE_LEP_OFFSET;
+	}
+}
 #endif
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 4dfb412ff1f4..eb75a5efcfd7 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2447,6 +2447,10 @@ static int find_probe_functions(struct map *map, char *name)
 #define strdup_or_goto(str, label)	\
 	({ char *__p = strdup(str); if (!__p) goto label; __p; })
 
+void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
+				struct probe_trace_event *tev __maybe_unused,
+				struct map *map __maybe_unused) { }
+
 /*
  * Find probe function addresses from map.
  * Return an error or the number of found probe_trace_event
@@ -2553,6 +2557,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 					strdup_or_goto(pev->args[i].type,
 							nomem_out);
 		}
+		arch__fix_tev_from_maps(pev, tev, map);
 	}
 
 out:
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 52bca4bf07ca..180f142cc170 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -136,6 +136,8 @@ extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
 extern int show_available_funcs(const char *module, struct strfilter *filter,
 				bool user);
 bool arch__prefers_symtab(void);
+void arch__fix_tev_from_maps(struct perf_probe_event *pev,
+			     struct probe_trace_event *tev, struct map *map);
 
 /* Maximum index number of event-name postfix */
 #define MAX_EVENT_INDEX	1024

From b72e74d1f140eb32865540f9e26e9fe0fcf983b9 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 24 Apr 2015 22:29:43 +0300
Subject: [PATCH 061/305] perf evlist: Amend mmap ref counting for the AUX area
 mmap

Reference counting of the mmap buffer does not work correctly when there
is an AUX area mmap also.

In snapshot mode it is not easy to know if the AUX area mmap buffer
contains usefull information. Equally the evlist does not know if the
recording is in sanpshot mode anyway.

Consequently, for now just assume the AUX area mmap always has data,
which will just cause the mmap buffer to remain mmapped for the duration
of the recording.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429903807-20559-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 0ec8c3bc8ab8..7ec1bf93ab28 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -695,7 +695,7 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 
 static bool perf_mmap__empty(struct perf_mmap *md)
 {
-	return perf_mmap__read_head(md) == md->prev;
+	return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base;
 }
 
 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)

From 6d5cdd64f314e03c8606c777dc44b09769f8e038 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 24 Apr 2015 22:29:44 +0300
Subject: [PATCH 062/305] perf script: Always allow fields 'addr' and 'cpu' for
 auxtrace

If a file contains AUX area tracing data then always allow fields 'addr'
and 'cpu' to be selected as options for perf script.  This is necessary
because AUX area decoding may synthesize events with that information.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429903807-20559-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-script.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index cd2f38bf7573..6805098e3751 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -148,9 +148,10 @@ static const char *output_field2str(enum perf_output_field field)
 
 #define PRINT_FIELD(x)  (output[attr->type].fields & PERF_OUTPUT_##x)
 
-static int perf_evsel__check_stype(struct perf_evsel *evsel,
-				   u64 sample_type, const char *sample_msg,
-				   enum perf_output_field field)
+static int perf_evsel__do_check_stype(struct perf_evsel *evsel,
+				      u64 sample_type, const char *sample_msg,
+				      enum perf_output_field field,
+				      bool allow_user_set)
 {
 	struct perf_event_attr *attr = &evsel->attr;
 	int type = attr->type;
@@ -160,6 +161,8 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
 		return 0;
 
 	if (output[type].user_set) {
+		if (allow_user_set)
+			return 0;
 		evname = perf_evsel__name(evsel);
 		pr_err("Samples for '%s' event do not have %s attribute set. "
 		       "Cannot print '%s' field.\n",
@@ -177,10 +180,22 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel,
 	return 0;
 }
 
+static int perf_evsel__check_stype(struct perf_evsel *evsel,
+				   u64 sample_type, const char *sample_msg,
+				   enum perf_output_field field)
+{
+	return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field,
+					  false);
+}
+
 static int perf_evsel__check_attr(struct perf_evsel *evsel,
 				  struct perf_session *session)
 {
 	struct perf_event_attr *attr = &evsel->attr;
+	bool allow_user_set;
+
+	allow_user_set = perf_header__has_feat(&session->header,
+					       HEADER_AUXTRACE);
 
 	if (PRINT_FIELD(TRACE) &&
 		!perf_session__has_traces(session, "record -R"))
@@ -193,8 +208,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 	}
 
 	if (PRINT_FIELD(ADDR) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
-					PERF_OUTPUT_ADDR))
+		perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR",
+					   PERF_OUTPUT_ADDR, allow_user_set))
 		return -EINVAL;
 
 	if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) {
@@ -231,8 +246,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
 		return -EINVAL;
 
 	if (PRINT_FIELD(CPU) &&
-		perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
-					PERF_OUTPUT_CPU))
+		perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU",
+					   PERF_OUTPUT_CPU, allow_user_set))
 		return -EINVAL;
 
 	if (PRINT_FIELD(PERIOD) &&

From 520a2ebc2f0bd46b65feb7ca73a9ed65ea1a36bd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 24 Apr 2015 22:29:45 +0300
Subject: [PATCH 063/305] perf report: Add Instruction Tracing support

Add support for decoding an AUX area assuming it contains instruction
tracing data.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1429903807-20559-4-git-send-email-adrian.hunter@intel.com
[ Do not use -Z as an alternative to --itrace ]
[ Fixed initialization of itrace_synth_opts struct fields on older gcc versions ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt | 27 ++++++++++++++++++++++++
 tools/perf/builtin-report.c              | 11 ++++++++++
 2 files changed, 38 insertions(+)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 896672badba3..b66f6b41b179 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -327,6 +327,33 @@ OPTIONS
 
 include::callchain-overhead-calculation.txt[]
 
+--itrace::
+	Options for decoding instruction tracing data. The options are:
+
+		i	synthesize instructions events
+		b	synthesize branches events
+		c	synthesize branches events (calls only)
+		r	synthesize branches events (returns only)
+		e	synthesize error events
+		d	create a debug log
+		g	synthesize a call chain for instructions events
+
+	The default is all events i.e. the same as --itrace=ibe
+
+	In addition, the period (default 100000) for instructions events
+	can be specified in units of:
+
+		i	instructions
+		t	ticks
+		ms	milliseconds
+		us	microseconds
+		ns	nanoseconds (default)
+
+	Also the call chain size (default 16, max. 1024) for instructions
+	events can be specified.
+
+	To disable decoding entirely, use --no-itrace.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-annotate[1]
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 476cdf7afcca..18cb0ff39b4e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -36,6 +36,8 @@
 #include "util/data.h"
 #include "arch/common.h"
 
+#include "util/auxtrace.h"
+
 #include <dlfcn.h>
 #include <linux/bitmap.h>
 
@@ -585,6 +587,7 @@ parse_percent_limit(const struct option *opt, const char *str,
 int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	struct perf_session *session;
+	struct itrace_synth_opts itrace_synth_opts = { .set = 0, };
 	struct stat st;
 	bool has_br_stack = false;
 	int branch_mode = -1;
@@ -607,6 +610,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 			.attr		 = perf_event__process_attr,
 			.tracing_data	 = perf_event__process_tracing_data,
 			.build_id	 = perf_event__process_build_id,
+			.id_index	 = perf_event__process_id_index,
+			.auxtrace_info	 = perf_event__process_auxtrace_info,
+			.auxtrace	 = perf_event__process_auxtrace,
 			.ordered_events	 = true,
 			.ordering_requires_timestamps = true,
 		},
@@ -717,6 +723,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Don't show entries under that percent", parse_percent_limit),
 	OPT_CALLBACK(0, "percentage", NULL, "relative|absolute",
 		     "how to display percentage of filtered entries", parse_filter_percentage),
+	OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts",
+			    "Instruction Tracing options",
+			    itrace_parse_synth_opts),
 	OPT_END()
 	};
 	struct perf_data_file file = {
@@ -761,6 +770,8 @@ repeat:
 					       report.queue_size);
 	}
 
+	session->itrace_synth_opts = &itrace_synth_opts;
+
 	report.session = session;
 
 	has_br_stack = perf_header__has_feat(&session->header,

From 468f3d292fe60f0d744253b746e4b5d06bac0826 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Sat, 25 Apr 2015 01:14:46 +0530
Subject: [PATCH 064/305] perf symbols: Warn on build id mismatch

Add a debug message to indicate that the build id didn't match.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1429904686-16516-1-git-send-email-naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol-elf.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index d99b4424d4e1..9d526a5312b1 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -683,6 +683,7 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name,
 		}
 
 		if (!dso__build_id_equal(dso, build_id)) {
+			pr_debug("%s: build id mismatch for %s.\n", __func__, name);
 			dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID;
 			goto out_elf_end;
 		}

From 539f3aa2ebb42a409bc9028e38af091a86087165 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 29 Apr 2015 12:55:00 -0300
Subject: [PATCH 065/305] perf tools: Fix bison-related build failure on CentOS
 6

The YYLTYPE_IS_TRIVIAL is defined in the Build file, but unlike
pmu-bison.c, gcc complained about it for parse-events-bison.c:

    CC       util/parse-events-bison.o
  In file included from util/parse-events.y:16:
  util/parse-events-bison.h:101:1: error: "YYLTYPE_IS_TRIVIAL" redefined
  <command-line>: error: this is the location of the previous definition
  make[3]: *** [util/parse-events-bison.o] Error 1

Comments from Jiri Olsa:

"Reason is the parse error handling that was added just recently: it
adds YYLTYPE type (which is not present in pmu-bison.h), so
YYLTYPE_IS_TRIVIAL gets redefined, which is ok in F20 that handle the
error via '-w' option, but it's not ok for RHEL6 where the '-w' does not
work for this kind of error."

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1430322871-18107-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/Build | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index b6c3f39f081a..28af8e2825b3 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -118,7 +118,7 @@ $(OUTPUT)util/pmu-bison.c: util/pmu.y
 
 CFLAGS_parse-events-flex.o  += -w
 CFLAGS_pmu-flex.o           += -w
-CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
+CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w
 CFLAGS_pmu-bison.o          += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w
 
 $(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c

From 3099c026002e97b8c173d9d0bbdfc39257d14402 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Tue, 28 Apr 2015 17:35:34 +0530
Subject: [PATCH 066/305] perf probe: Improve detection of file/function name
 in the probe pattern

Currently, perf probe considers patterns including a '.' to be a file.
However, this causes problems on powerpc ABIv1 where all functions have
a leading '.':

  $ perf probe -F | grep schedule_timeout_interruptible
  .schedule_timeout_interruptible
  $ perf probe .schedule_timeout_interruptible
  Semantic error :File always requires line number or lazy pattern.
    Error: Command Parse Error.

Fix this:
- by checking the probe pattern in more detail, and
- skipping leading dot if one exists when creating/deleting events.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Reviewed-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/db680f7cb11c4452b632f908e67151f3aa0f4602.1430217967.git.naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 29 ++++++++++++++++++++++++++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index eb75a5efcfd7..416c10f8fd2a 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1077,6 +1077,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 	struct perf_probe_point *pp = &pev->point;
 	char *ptr, *tmp;
 	char c, nc = 0;
+	bool file_spec = false;
 	/*
 	 * <Syntax>
 	 * perf probe [EVENT=]SRC[:LN|;PTN]
@@ -1105,6 +1106,23 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 		arg = tmp;
 	}
 
+	/*
+	 * Check arg is function or file name and copy it.
+	 *
+	 * We consider arg to be a file spec if and only if it satisfies
+	 * all of the below criteria::
+	 * - it does not include any of "+@%",
+	 * - it includes one of ":;", and
+	 * - it has a period '.' in the name.
+	 *
+	 * Otherwise, we consider arg to be a function specification.
+	 */
+	if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) {
+		/* This is a file spec if it includes a '.' before ; or : */
+		if (memchr(arg, '.', ptr - arg))
+			file_spec = true;
+	}
+
 	ptr = strpbrk(arg, ";:+@%");
 	if (ptr) {
 		nc = *ptr;
@@ -1115,10 +1133,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 	if (tmp == NULL)
 		return -ENOMEM;
 
-	/* Check arg is function or file and copy it */
-	if (strchr(tmp, '.'))	/* File */
+	if (file_spec)
 		pp->file = tmp;
-	else			/* Function */
+	else
 		pp->function = tmp;
 
 	/* Parse other options */
@@ -2265,6 +2282,9 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
 {
 	int i, ret;
 
+	if (*base == '.')
+		base++;
+
 	/* Try no suffix */
 	ret = e_snprintf(buf, len, "%s", base);
 	if (ret < 0) {
@@ -2766,6 +2786,9 @@ int del_perf_probe_events(struct strlist *dellist)
 			event = str;
 		}
 
+		if (event && *event == '.')
+			event++;
+
 		ret = e_snprintf(buf, 128, "%s:%s", group, event);
 		if (ret < 0) {
 			pr_err("Failed to copy event.");

From 4e60a2caefd1920867a84b978abc1eac118de596 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 24 Apr 2015 18:47:44 +0900
Subject: [PATCH 067/305] perf tools: Improve strfilter to append additional
 rules

Add strfilter__or/and to append additional rules to existing strfilter.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150424094744.23967.97614.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/strfilter.c | 40 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/strfilter.h | 26 ++++++++++++++++++++++++
 2 files changed, 66 insertions(+)

diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
index 79a757a2a15c..f3429cd16b1e 100644
--- a/tools/perf/util/strfilter.c
+++ b/tools/perf/util/strfilter.c
@@ -170,6 +170,46 @@ struct strfilter *strfilter__new(const char *rules, const char **err)
 	return filter;
 }
 
+static int strfilter__append(struct strfilter *filter, bool _or,
+			     const char *rules, const char **err)
+{
+	struct strfilter_node *right, *root;
+	const char *ep = NULL;
+
+	if (!filter || !rules)
+		return -EINVAL;
+
+	right = strfilter_node__new(rules, &ep);
+	if (!right || *ep != '\0') {
+		if (err)
+			*err = ep;
+		goto error;
+	}
+	root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right);
+	if (!root) {
+		ep = NULL;
+		goto error;
+	}
+
+	filter->root = root;
+	return 0;
+
+error:
+	strfilter_node__delete(right);
+	return ep ? -EINVAL : -ENOMEM;
+}
+
+int strfilter__or(struct strfilter *filter, const char *rules, const char **err)
+{
+	return strfilter__append(filter, true, rules, err);
+}
+
+int strfilter__and(struct strfilter *filter, const char *rules,
+		   const char **err)
+{
+	return strfilter__append(filter, false, rules, err);
+}
+
 static bool strfilter_node__compare(struct strfilter_node *node,
 				    const char *str)
 {
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
index fe611f3c9e39..d007cdc2c68b 100644
--- a/tools/perf/util/strfilter.h
+++ b/tools/perf/util/strfilter.h
@@ -28,6 +28,32 @@ struct strfilter {
  */
 struct strfilter *strfilter__new(const char *rules, const char **err);
 
+/**
+ * strfilter__or - Append an additional rule by logical-or
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-or.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-or.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__or(struct strfilter *filter,
+		  const char *rules, const char **err);
+
+/**
+ * strfilter__add - Append an additional rule by logical-and
+ * @filter: Original string filter
+ * @rules: Filter rule to be appended at left of the root of
+ *         @filter by using logical-and.
+ * @err: Pointer which points an error detected on @rules
+ *
+ * Parse @rules and join it to the @filter by using logical-and.
+ * Return 0 if success, or return the error code.
+ */
+int strfilter__and(struct strfilter *filter,
+		   const char *rules, const char **err);
+
 /**
  * strfilter__compare - compare given string and a string filter
  * @filter: String filter

From 3f51972c599cf95702819bd06a7a5412c523ebfe Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 24 Apr 2015 18:47:46 +0900
Subject: [PATCH 068/305] perf tools: Add strfilter__string to recover rules
 string

Add strfilter__string to recover rules string from strfilter.  This will
be good for debugging.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150424094746.23967.52434.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/strfilter.c | 67 +++++++++++++++++++++++++++++++++++++
 tools/perf/util/strfilter.h |  9 +++++
 2 files changed, 76 insertions(+)

diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c
index f3429cd16b1e..bcae659b6546 100644
--- a/tools/perf/util/strfilter.c
+++ b/tools/perf/util/strfilter.c
@@ -237,3 +237,70 @@ bool strfilter__compare(struct strfilter *filter, const char *str)
 		return false;
 	return strfilter_node__compare(filter->root, str);
 }
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf);
+
+/* sprint node in parenthesis if needed */
+static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf)
+{
+	int len;
+	int pt = node->r ? 2 : 0;	/* don't need to check node->l */
+
+	if (buf && pt)
+		*buf++ = '(';
+	len = strfilter_node__sprint(node, buf);
+	if (len < 0)
+		return len;
+	if (buf && pt)
+		*(buf + len) = ')';
+	return len + pt;
+}
+
+static int strfilter_node__sprint(struct strfilter_node *node, char *buf)
+{
+	int len = 0, rlen;
+
+	if (!node || !node->p)
+		return -EINVAL;
+
+	switch (*node->p) {
+	case '|':
+	case '&':
+		len = strfilter_node__sprint_pt(node->l, buf);
+		if (len < 0)
+			return len;
+	case '!':
+		if (buf) {
+			*(buf + len++) = *node->p;
+			buf += len;
+		} else
+			len++;
+		rlen = strfilter_node__sprint_pt(node->r, buf);
+		if (rlen < 0)
+			return rlen;
+		len += rlen;
+		break;
+	default:
+		len = strlen(node->p);
+		if (buf)
+			strcpy(buf, node->p);
+	}
+
+	return len;
+}
+
+char *strfilter__string(struct strfilter *filter)
+{
+	int len;
+	char *ret = NULL;
+
+	len = strfilter_node__sprint(filter->root, NULL);
+	if (len < 0)
+		return NULL;
+
+	ret = malloc(len + 1);
+	if (ret)
+		strfilter_node__sprint(filter->root, ret);
+
+	return ret;
+}
diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h
index d007cdc2c68b..cff5eda88728 100644
--- a/tools/perf/util/strfilter.h
+++ b/tools/perf/util/strfilter.h
@@ -71,4 +71,13 @@ bool strfilter__compare(struct strfilter *filter, const char *str);
  */
 void strfilter__delete(struct strfilter *filter);
 
+/**
+ * strfilter__string - Reconstruct a rule string from filter
+ * @filter: String filter to reconstruct
+ *
+ * Reconstruct a rule string from @filter. This will be good for
+ * debug messages. Note that returning string must be freed afterward.
+ */
+char *strfilter__string(struct strfilter *filter);
+
 #endif

From 96b55e39237b8bc92d8e6b96f896c106f2d39cf6 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 24 Apr 2015 18:47:48 +0900
Subject: [PATCH 069/305] perf probe: Accept multiple filter options

Accept multiple filter options. Each filters are combined by logical-or.
E.g. --filter abc* --filter *def is same as --filter abc*|*def

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150424094748.23967.63355.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 92dcce065e68..be170757ebd6 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -262,21 +262,25 @@ static int opt_set_filter(const struct option *opt __maybe_unused,
 			  const char *str, int unset __maybe_unused)
 {
 	const char *err;
+	int ret = 0;
 
 	if (str) {
 		pr_debug2("Set filter: %s\n", str);
-		if (params.filter)
-			strfilter__delete(params.filter);
-		params.filter = strfilter__new(str, &err);
 		if (!params.filter) {
+			params.filter = strfilter__new(str, &err);
+			if (!params.filter)
+				ret = err ? -EINVAL : -ENOMEM;
+		} else
+			ret = strfilter__or(params.filter, str, &err);
+
+		if (ret == -EINVAL) {
 			pr_err("Filter parse error at %td.\n", err - str + 1);
 			pr_err("Source: \"%s\"\n", str);
 			pr_err("         %*c\n", (int)(err - str + 1), '^');
-			return -EINVAL;
 		}
 	}
 
-	return 0;
+	return ret;
 }
 
 static int init_params(void)

From b6a896438b3275df434a8f99bee58292b31693bd Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 24 Apr 2015 18:47:50 +0900
Subject: [PATCH 070/305] perf probe: Accept filter argument for --list

Currently, perf-probe --list option ignores given event filter.
  ----
  # ./perf probe -l vfs\*
    probe:vfs_read       (on vfs_read@ksrc/linux-3/fs/read_write.c)
    probe_libc:malloc    (on __libc_malloc@malloc/malloc.c in /usr/lib64/libc-2.17.so)
  ----

This changes --list option to accept the event filter argument as below.
  ----
  # ./perf probe -l vfs\*
    probe:vfs_read       (on vfs_read@ksrc/linux-3/fs/read_write.c)
  # ./perf probe -l \*libc:\*
    probe_libc:malloc    (on __libc_malloc@malloc/malloc.c in /usr/lib64/libc-2.17.so)
  ----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150424094750.23967.53868.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-probe.txt |  6 +--
 tools/perf/builtin-probe.c              | 70 ++++++++++++++++---------
 tools/perf/util/probe-event.c           | 27 ++++++++--
 tools/perf/util/probe-event.h           |  2 +-
 4 files changed, 73 insertions(+), 32 deletions(-)

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index a4a3cc76b36b..d0feb8e6eeef 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -14,7 +14,7 @@ or
 or
 'perf probe' [options] --del='[GROUP:]EVENT' [...]
 or
-'perf probe' --list
+'perf probe' --list[=[GROUP:]EVENT]
 or
 'perf probe' [options] --line='LINE'
 or
@@ -66,8 +66,8 @@ OPTIONS
 	classes(e.g. [a-z], [!A-Z]).
 
 -l::
---list::
-	List up current probe events.
+--list[=[GROUP:]EVENT]::
+	List up current probe events. This can also accept filtering patterns of event names.
 
 -L::
 --line=::
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index be170757ebd6..feca3165f957 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -44,6 +44,7 @@
 
 #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*"
 #define DEFAULT_FUNC_FILTER "!_*"
+#define DEFAULT_LIST_FILTER "*:*"
 
 /* Session management structure */
 static struct {
@@ -93,6 +94,28 @@ static int parse_probe_event(const char *str)
 	return ret;
 }
 
+static int params_add_filter(const char *str)
+{
+	const char *err = NULL;
+	int ret = 0;
+
+	pr_debug2("Add filter: %s\n", str);
+	if (!params.filter) {
+		params.filter = strfilter__new(str, &err);
+		if (!params.filter)
+			ret = err ? -EINVAL : -ENOMEM;
+	} else
+		ret = strfilter__or(params.filter, str, &err);
+
+	if (ret == -EINVAL) {
+		pr_err("Filter parse error at %td.\n", err - str + 1);
+		pr_err("Source: \"%s\"\n", str);
+		pr_err("         %*c\n", (int)(err - str + 1), '^');
+	}
+
+	return ret;
+}
+
 static int set_target(const char *ptr)
 {
 	int found = 0;
@@ -180,6 +203,18 @@ static int opt_del_probe_event(const struct option *opt __maybe_unused,
 	return 0;
 }
 
+static int opt_list_probe_event(const struct option *opt __maybe_unused,
+				const char *str, int unset)
+{
+	if (!unset)
+		params.list_events = true;
+
+	if (str)
+		return params_add_filter(str);
+
+	return 0;
+}
+
 static int opt_set_target(const struct option *opt, const char *str,
 			int unset __maybe_unused)
 {
@@ -261,26 +296,10 @@ static int opt_show_vars(const struct option *opt __maybe_unused,
 static int opt_set_filter(const struct option *opt __maybe_unused,
 			  const char *str, int unset __maybe_unused)
 {
-	const char *err;
-	int ret = 0;
+	if (str)
+		return params_add_filter(str);
 
-	if (str) {
-		pr_debug2("Set filter: %s\n", str);
-		if (!params.filter) {
-			params.filter = strfilter__new(str, &err);
-			if (!params.filter)
-				ret = err ? -EINVAL : -ENOMEM;
-		} else
-			ret = strfilter__or(params.filter, str, &err);
-
-		if (ret == -EINVAL) {
-			pr_err("Filter parse error at %td.\n", err - str + 1);
-			pr_err("Source: \"%s\"\n", str);
-			pr_err("         %*c\n", (int)(err - str + 1), '^');
-		}
-	}
-
-	return ret;
+	return 0;
 }
 
 static int init_params(void)
@@ -320,21 +339,22 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		"perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]",
 		"perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]",
 		"perf probe [<options>] --del '[GROUP:]EVENT' ...",
-		"perf probe --list",
+		"perf probe --list [GROUP:]EVENT ...",
 #ifdef HAVE_DWARF_SUPPORT
 		"perf probe [<options>] --line 'LINEDESC'",
 		"perf probe [<options>] --vars 'PROBEPOINT'",
 #endif
 		"perf probe [<options>] --funcs",
 		NULL
-};
+	};
 	struct option options[] = {
 	OPT_INCR('v', "verbose", &verbose,
 		    "be more verbose (show parsed arguments, etc)"),
 	OPT_BOOLEAN('q', "quiet", &params.quiet,
 		    "be quiet (do not show any mesages)"),
-	OPT_BOOLEAN('l', "list", &params.list_events,
-		    "list up current probe events"),
+	OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
+			     "list up probe events", opt_list_probe_event,
+			     DEFAULT_LIST_FILTER),
 	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
 		opt_del_probe_event),
 	OPT_CALLBACK('a', "add", NULL,
@@ -448,7 +468,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 			pr_warning("  Error: Don't use --list with --exec.\n");
 			usage_with_options(probe_usage, options);
 		}
-		ret = show_perf_probe_events();
+		ret = show_perf_probe_events(params.filter);
+		strfilter__delete(params.filter);
+		params.filter = NULL;
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show event list.", ret);
 		return ret;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 416c10f8fd2a..5995d81d2bad 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2146,7 +2146,23 @@ static int show_perf_probe_event(struct perf_probe_event *pev,
 	return ret;
 }
 
-static int __show_perf_probe_events(int fd, bool is_kprobe)
+static bool filter_probe_trace_event(struct probe_trace_event *tev,
+				     struct strfilter *filter)
+{
+	char tmp[128];
+
+	/* At first, check the event name itself */
+	if (strfilter__compare(filter, tev->event))
+		return true;
+
+	/* Next, check the combination of name and group */
+	if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0)
+		return false;
+	return strfilter__compare(filter, tmp);
+}
+
+static int __show_perf_probe_events(int fd, bool is_kprobe,
+				    struct strfilter *filter)
 {
 	int ret = 0;
 	struct probe_trace_event tev;
@@ -2164,12 +2180,15 @@ static int __show_perf_probe_events(int fd, bool is_kprobe)
 	strlist__for_each(ent, rawlist) {
 		ret = parse_probe_trace_command(ent->s, &tev);
 		if (ret >= 0) {
+			if (!filter_probe_trace_event(&tev, filter))
+				goto next;
 			ret = convert_to_perf_probe_event(&tev, &pev,
 								is_kprobe);
 			if (ret >= 0)
 				ret = show_perf_probe_event(&pev,
 							    tev.point.module);
 		}
+next:
 		clear_perf_probe_event(&pev);
 		clear_probe_trace_event(&tev);
 		if (ret < 0)
@@ -2181,7 +2200,7 @@ static int __show_perf_probe_events(int fd, bool is_kprobe)
 }
 
 /* List up current perf-probe events */
-int show_perf_probe_events(void)
+int show_perf_probe_events(struct strfilter *filter)
 {
 	int kp_fd, up_fd, ret;
 
@@ -2193,7 +2212,7 @@ int show_perf_probe_events(void)
 
 	kp_fd = open_kprobe_events(false);
 	if (kp_fd >= 0) {
-		ret = __show_perf_probe_events(kp_fd, true);
+		ret = __show_perf_probe_events(kp_fd, true, filter);
 		close(kp_fd);
 		if (ret < 0)
 			goto out;
@@ -2207,7 +2226,7 @@ int show_perf_probe_events(void)
 	}
 
 	if (up_fd >= 0) {
-		ret = __show_perf_probe_events(up_fd, false);
+		ret = __show_perf_probe_events(up_fd, false, filter);
 		close(up_fd);
 	}
 out:
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 180f142cc170..ec13362d882f 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -127,7 +127,7 @@ extern const char *kernel_get_module_path(const char *module);
 extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
 				 int max_probe_points, bool force_add);
 extern int del_perf_probe_events(struct strlist *dellist);
-extern int show_perf_probe_events(void);
+extern int show_perf_probe_events(struct strfilter *filter);
 extern int show_line_range(struct line_range *lr, const char *module,
 			   bool user);
 extern int show_available_vars(struct perf_probe_event *pevs, int npevs,

From c9758cc4569955c6d8ad519adf539848e8824c72 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 21 Apr 2015 13:55:02 +0900
Subject: [PATCH 071/305] perf kmem: Implement stat --page --caller

It is 'perf kmem' support caller statistics for page.  Unlike slab case,
the tracepoints in page allocator don't provide callsite info.  So it
records with callchain and extracts callsite info.

Note that the callchain contains several memory allocation functions
which has no meaning for users.  So skip those functions to get proper
callsites.  I used following regex pattern to skip the allocator
functions:

  ^_?_?(alloc|get_free|get_zeroed)_pages?

This gave me a following list of functions:

  # perf kmem record --page sleep 3
  # perf kmem stat --page -v
  ...
  alloc func: __get_free_pages
  alloc func: get_zeroed_page
  alloc func: alloc_pages_exact
  alloc func: __alloc_pages_direct_compact
  alloc func: __alloc_pages_nodemask
  alloc func: alloc_page_interleave
  alloc func: alloc_pages_current
  alloc func: alloc_pages_vma
  alloc func: alloc_page_buffers
  alloc func: alloc_pages_exact_nid
  ...

The output looks mostly same as --alloc (I also added callsite column
to that) but groups entries by callsite.  Currently, the order,
migrate type and GFP flag info is for the last allocation and not
guaranteed to be same for all allocations from the callsite.

  ---------------------------------------------------------------------------------------------
   Total_alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite
  ---------------------------------------------------------------------------------------------
              1,064 |       266 |     0 | UNMOVABL |  000000d0 | __pollwait
                 52 |        13 |     0 | UNMOVABL |  002084d0 | pte_alloc_one
                 44 |        11 |     0 |  MOVABLE |  000280da | handle_mm_fault
                 20 |         5 |     0 |  MOVABLE |  000200da | do_cow_fault
                 20 |         5 |     0 |  MOVABLE |  000200da | do_wp_page
                 16 |         4 |     0 | UNMOVABL |  000084d0 | __pmd_alloc
                 16 |         4 |     0 | UNMOVABL |  00000200 | __tlb_remove_page
                 12 |         3 |     0 | UNMOVABL |  000084d0 | __pud_alloc
                  8 |         2 |     0 | UNMOVABL |  00000010 | bio_copy_user_iov
                  4 |         1 |     0 | UNMOVABL |  000200d2 | pipe_write
                  4 |         1 |     0 |  MOVABLE |  000280da | do_wp_page
                  4 |         1 |     0 | UNMOVABL |  002084d0 | pgd_alloc
  ---------------------------------------------------------------------------------------------

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1429592107-1807-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 327 +++++++++++++++++++++++++++++++++++---
 1 file changed, 306 insertions(+), 21 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 4f0f38462d97..3649eec6807f 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -10,6 +10,7 @@
 #include "util/header.h"
 #include "util/session.h"
 #include "util/tool.h"
+#include "util/callchain.h"
 
 #include "util/parse-options.h"
 #include "util/trace-event.h"
@@ -21,6 +22,7 @@
 #include <linux/rbtree.h>
 #include <linux/string.h>
 #include <locale.h>
+#include <regex.h>
 
 static int	kmem_slab;
 static int	kmem_page;
@@ -241,6 +243,7 @@ static unsigned long nr_page_fails;
 static unsigned long nr_page_nomatch;
 
 static bool use_pfn;
+static struct perf_session *kmem_session;
 
 #define MAX_MIGRATE_TYPES  6
 #define MAX_PAGE_ORDER     11
@@ -250,6 +253,7 @@ static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES];
 struct page_stat {
 	struct rb_node 	node;
 	u64 		page;
+	u64 		callsite;
 	int 		order;
 	unsigned 	gfp_flags;
 	unsigned 	migrate_type;
@@ -262,8 +266,144 @@ struct page_stat {
 static struct rb_root page_tree;
 static struct rb_root page_alloc_tree;
 static struct rb_root page_alloc_sorted;
+static struct rb_root page_caller_tree;
+static struct rb_root page_caller_sorted;
 
-static struct page_stat *search_page(unsigned long page, bool create)
+struct alloc_func {
+	u64 start;
+	u64 end;
+	char *name;
+};
+
+static int nr_alloc_funcs;
+static struct alloc_func *alloc_func_list;
+
+static int funcmp(const void *a, const void *b)
+{
+	const struct alloc_func *fa = a;
+	const struct alloc_func *fb = b;
+
+	if (fa->start > fb->start)
+		return 1;
+	else
+		return -1;
+}
+
+static int callcmp(const void *a, const void *b)
+{
+	const struct alloc_func *fa = a;
+	const struct alloc_func *fb = b;
+
+	if (fb->start <= fa->start && fa->end < fb->end)
+		return 0;
+
+	if (fa->start > fb->start)
+		return 1;
+	else
+		return -1;
+}
+
+static int build_alloc_func_list(void)
+{
+	int ret;
+	struct map *kernel_map;
+	struct symbol *sym;
+	struct rb_node *node;
+	struct alloc_func *func;
+	struct machine *machine = &kmem_session->machines.host;
+	regex_t alloc_func_regex;
+	const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?";
+
+	ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED);
+	if (ret) {
+		char err[BUFSIZ];
+
+		regerror(ret, &alloc_func_regex, err, sizeof(err));
+		pr_err("Invalid regex: %s\n%s", pattern, err);
+		return -EINVAL;
+	}
+
+	kernel_map = machine->vmlinux_maps[MAP__FUNCTION];
+	if (map__load(kernel_map, NULL) < 0) {
+		pr_err("cannot load kernel map\n");
+		return -ENOENT;
+	}
+
+	map__for_each_symbol(kernel_map, sym, node) {
+		if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0))
+			continue;
+
+		func = realloc(alloc_func_list,
+			       (nr_alloc_funcs + 1) * sizeof(*func));
+		if (func == NULL)
+			return -ENOMEM;
+
+		pr_debug("alloc func: %s\n", sym->name);
+		func[nr_alloc_funcs].start = sym->start;
+		func[nr_alloc_funcs].end   = sym->end;
+		func[nr_alloc_funcs].name  = sym->name;
+
+		alloc_func_list = func;
+		nr_alloc_funcs++;
+	}
+
+	qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp);
+
+	regfree(&alloc_func_regex);
+	return 0;
+}
+
+/*
+ * Find first non-memory allocation function from callchain.
+ * The allocation functions are in the 'alloc_func_list'.
+ */
+static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample)
+{
+	struct addr_location al;
+	struct machine *machine = &kmem_session->machines.host;
+	struct callchain_cursor_node *node;
+
+	if (alloc_func_list == NULL) {
+		if (build_alloc_func_list() < 0)
+			goto out;
+	}
+
+	al.thread = machine__findnew_thread(machine, sample->pid, sample->tid);
+	sample__resolve_callchain(sample, NULL, evsel, &al, 16);
+
+	callchain_cursor_commit(&callchain_cursor);
+	while (true) {
+		struct alloc_func key, *caller;
+		u64 addr;
+
+		node = callchain_cursor_current(&callchain_cursor);
+		if (node == NULL)
+			break;
+
+		key.start = key.end = node->ip;
+		caller = bsearch(&key, alloc_func_list, nr_alloc_funcs,
+				 sizeof(key), callcmp);
+		if (!caller) {
+			/* found */
+			if (node->map)
+				addr = map__unmap_ip(node->map, node->ip);
+			else
+				addr = node->ip;
+
+			return addr;
+		} else
+			pr_debug3("skipping alloc function: %s\n", caller->name);
+
+		callchain_cursor_advance(&callchain_cursor);
+	}
+
+out:
+	pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip);
+	return sample->ip;
+}
+
+static struct page_stat *
+__page_stat__findnew_page(u64 page, bool create)
 {
 	struct rb_node **node = &page_tree.rb_node;
 	struct rb_node *parent = NULL;
@@ -298,6 +438,16 @@ static struct page_stat *search_page(unsigned long page, bool create)
 	return data;
 }
 
+static struct page_stat *page_stat__find_page(u64 page)
+{
+	return __page_stat__findnew_page(page, false);
+}
+
+static struct page_stat *page_stat__findnew_page(u64 page)
+{
+	return __page_stat__findnew_page(page, true);
+}
+
 static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
 {
 	if (a->page > b->page)
@@ -319,7 +469,8 @@ static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
 	return 0;
 }
 
-static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool create)
+static struct page_stat *
+__page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 {
 	struct rb_node **node = &page_alloc_tree.rb_node;
 	struct rb_node *parent = NULL;
@@ -357,6 +508,62 @@ static struct page_stat *search_page_alloc_stat(struct page_stat *pstat, bool cr
 	return data;
 }
 
+static struct page_stat *page_stat__find_alloc(struct page_stat *pstat)
+{
+	return __page_stat__findnew_alloc(pstat, false);
+}
+
+static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
+{
+	return __page_stat__findnew_alloc(pstat, true);
+}
+
+static struct page_stat *
+__page_stat__findnew_caller(u64 callsite, bool create)
+{
+	struct rb_node **node = &page_caller_tree.rb_node;
+	struct rb_node *parent = NULL;
+	struct page_stat *data;
+
+	while (*node) {
+		s64 cmp;
+
+		parent = *node;
+		data = rb_entry(*node, struct page_stat, node);
+
+		cmp = data->callsite - callsite;
+		if (cmp < 0)
+			node = &parent->rb_left;
+		else if (cmp > 0)
+			node = &parent->rb_right;
+		else
+			return data;
+	}
+
+	if (!create)
+		return NULL;
+
+	data = zalloc(sizeof(*data));
+	if (data != NULL) {
+		data->callsite = callsite;
+
+		rb_link_node(&data->node, parent, node);
+		rb_insert_color(&data->node, &page_caller_tree);
+	}
+
+	return data;
+}
+
+static struct page_stat *page_stat__find_caller(u64 callsite)
+{
+	return __page_stat__findnew_caller(callsite, false);
+}
+
+static struct page_stat *page_stat__findnew_caller(u64 callsite)
+{
+	return __page_stat__findnew_caller(callsite, true);
+}
+
 static bool valid_page(u64 pfn_or_page)
 {
 	if (use_pfn && pfn_or_page == -1UL)
@@ -375,6 +582,7 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	unsigned int migrate_type = perf_evsel__intval(evsel, sample,
 						       "migratetype");
 	u64 bytes = kmem_page_size << order;
+	u64 callsite;
 	struct page_stat *pstat;
 	struct page_stat this = {
 		.order = order,
@@ -397,23 +605,38 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 		return 0;
 	}
 
+	callsite = find_callsite(evsel, sample);
+
 	/*
 	 * This is to find the current page (with correct gfp flags and
 	 * migrate type) at free event.
 	 */
-	pstat = search_page(page, true);
+	pstat = page_stat__findnew_page(page);
 	if (pstat == NULL)
 		return -ENOMEM;
 
 	pstat->order = order;
 	pstat->gfp_flags = gfp_flags;
 	pstat->migrate_type = migrate_type;
+	pstat->callsite = callsite;
 
 	this.page = page;
-	pstat = search_page_alloc_stat(&this, true);
+	pstat = page_stat__findnew_alloc(&this);
 	if (pstat == NULL)
 		return -ENOMEM;
 
+	pstat->nr_alloc++;
+	pstat->alloc_bytes += bytes;
+	pstat->callsite = callsite;
+
+	pstat = page_stat__findnew_caller(callsite);
+	if (pstat == NULL)
+		return -ENOMEM;
+
+	pstat->order = order;
+	pstat->gfp_flags = gfp_flags;
+	pstat->migrate_type = migrate_type;
+
 	pstat->nr_alloc++;
 	pstat->alloc_bytes += bytes;
 
@@ -441,7 +664,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	nr_page_frees++;
 	total_page_free_bytes += bytes;
 
-	pstat = search_page(page, false);
+	pstat = page_stat__find_page(page);
 	if (pstat == NULL) {
 		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 			  page, order);
@@ -455,11 +678,19 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	this.page = page;
 	this.gfp_flags = pstat->gfp_flags;
 	this.migrate_type = pstat->migrate_type;
+	this.callsite = pstat->callsite;
 
 	rb_erase(&pstat->node, &page_tree);
 	free(pstat);
 
-	pstat = search_page_alloc_stat(&this, false);
+	pstat = page_stat__find_alloc(&this);
+	if (pstat == NULL)
+		return -ENOENT;
+
+	pstat->nr_free++;
+	pstat->free_bytes += bytes;
+
+	pstat = page_stat__find_caller(this.callsite);
 	if (pstat == NULL)
 		return -ENOENT;
 
@@ -576,41 +807,89 @@ static const char * const migrate_type_str[] = {
 	"UNKNOWN",
 };
 
-static void __print_page_result(struct rb_root *root,
-				struct perf_session *session __maybe_unused,
-				int n_lines)
+static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 {
-	struct rb_node *next = rb_first(root);
+	struct rb_node *next = rb_first(&page_alloc_sorted);
+	struct machine *machine = &session->machines.host;
 	const char *format;
 
-	printf("\n%.80s\n", graph_dotted_line);
-	printf(" %-16s | Total alloc (KB) | Hits      | Order | Mig.type | GFP flags\n",
+	printf("\n%.105s\n", graph_dotted_line);
+	printf(" %-16s | Total alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n",
 	       use_pfn ? "PFN" : "Page");
-	printf("%.80s\n", graph_dotted_line);
+	printf("%.105s\n", graph_dotted_line);
 
 	if (use_pfn)
-		format = " %16llu | %'16llu | %'9d | %5d | %8s |  %08lx\n";
+		format = " %16llu | %'16llu | %'9d | %5d | %8s |  %08lx | %s\n";
 	else
-		format = " %016llx | %'16llu | %'9d | %5d | %8s |  %08lx\n";
+		format = " %016llx | %'16llu | %'9d | %5d | %8s |  %08lx | %s\n";
 
 	while (next && n_lines--) {
 		struct page_stat *data;
+		struct symbol *sym;
+		struct map *map;
+		char buf[32];
+		char *caller = buf;
 
 		data = rb_entry(next, struct page_stat, node);
+		sym = machine__find_kernel_function(machine, data->callsite,
+						    &map, NULL);
+		if (sym && sym->name)
+			caller = sym->name;
+		else
+			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
 
 		printf(format, (unsigned long long)data->page,
 		       (unsigned long long)data->alloc_bytes / 1024,
 		       data->nr_alloc, data->order,
 		       migrate_type_str[data->migrate_type],
-		       (unsigned long)data->gfp_flags);
+		       (unsigned long)data->gfp_flags, caller);
 
 		next = rb_next(next);
 	}
 
 	if (n_lines == -1)
-		printf(" ...              | ...              | ...       | ...   | ...      | ...     \n");
+		printf(" ...              | ...              | ...       | ...   | ...      | ...       | ...\n");
 
-	printf("%.80s\n", graph_dotted_line);
+	printf("%.105s\n", graph_dotted_line);
+}
+
+static void __print_page_caller_result(struct perf_session *session, int n_lines)
+{
+	struct rb_node *next = rb_first(&page_caller_sorted);
+	struct machine *machine = &session->machines.host;
+
+	printf("\n%.105s\n", graph_dotted_line);
+	printf(" Total alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n");
+	printf("%.105s\n", graph_dotted_line);
+
+	while (next && n_lines--) {
+		struct page_stat *data;
+		struct symbol *sym;
+		struct map *map;
+		char buf[32];
+		char *caller = buf;
+
+		data = rb_entry(next, struct page_stat, node);
+		sym = machine__find_kernel_function(machine, data->callsite,
+						    &map, NULL);
+		if (sym && sym->name)
+			caller = sym->name;
+		else
+			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
+
+		printf(" %'16llu | %'9d | %5d | %8s |  %08lx | %s\n",
+		       (unsigned long long)data->alloc_bytes / 1024,
+		       data->nr_alloc, data->order,
+		       migrate_type_str[data->migrate_type],
+		       (unsigned long)data->gfp_flags, caller);
+
+		next = rb_next(next);
+	}
+
+	if (n_lines == -1)
+		printf(" ...              | ...       | ...   | ...      | ...       | ...\n");
+
+	printf("%.105s\n", graph_dotted_line);
 }
 
 static void print_slab_summary(void)
@@ -682,8 +961,10 @@ static void print_slab_result(struct perf_session *session)
 
 static void print_page_result(struct perf_session *session)
 {
+	if (caller_flag)
+		__print_page_caller_result(session, caller_lines);
 	if (alloc_flag)
-		__print_page_result(&page_alloc_sorted, session, alloc_lines);
+		__print_page_alloc_result(session, alloc_lines);
 	print_page_summary();
 }
 
@@ -802,6 +1083,7 @@ static void sort_result(void)
 	}
 	if (kmem_page) {
 		__sort_page_result(&page_alloc_tree, &page_alloc_sorted);
+		__sort_page_result(&page_caller_tree, &page_caller_sorted);
 	}
 }
 
@@ -1084,7 +1366,7 @@ static int __cmd_record(int argc, const char **argv)
 	if (kmem_slab)
 		rec_argc += ARRAY_SIZE(slab_events);
 	if (kmem_page)
-		rec_argc += ARRAY_SIZE(page_events);
+		rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */
 
 	rec_argv = calloc(rec_argc + 1, sizeof(char *));
 
@@ -1099,6 +1381,8 @@ static int __cmd_record(int argc, const char **argv)
 			rec_argv[i] = strdup(slab_events[j]);
 	}
 	if (kmem_page) {
+		rec_argv[i++] = strdup("-g");
+
 		for (j = 0; j < ARRAY_SIZE(page_events); j++, i++)
 			rec_argv[i] = strdup(page_events[j]);
 	}
@@ -1159,7 +1443,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 
 	file.path = input_name;
 
-	session = perf_session__new(&file, false, &perf_kmem);
+	kmem_session = session = perf_session__new(&file, false, &perf_kmem);
 	if (session == NULL)
 		return -1;
 
@@ -1172,6 +1456,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 		}
 
 		kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent);
+		symbol_conf.use_callchain = true;
 	}
 
 	symbol__init(&session->header.env);

From fb4f313d304b0a5120e870a6cd9ecf90c1023037 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 21 Apr 2015 13:55:03 +0900
Subject: [PATCH 072/305] perf kmem: Support sort keys on page analysis

Add new sort keys for page: page, order, migtype, gfp - existing
'bytes', 'hit' and 'callsite' sort keys also work for page.  Note that
-s/--sort option should be preceded by either of --slab or --page option
to determine where the sort keys applies.

Now it properly groups and sorts allocation stats - so same
page/caller with different order/migtype/gfp will be printed on a
different line.

 # perf kmem stat --page --caller -l 10 -s order,hit

 -----------------------------------------------------------------------------
 Total alloc (KB) | Hits   | Order | Mig.type | GFP flags | Callsite
 -----------------------------------------------------------------------------
               64 |      4 |     2 |  RECLAIM |  00285250 | new_slab
           50,144 | 12,536 |     0 |  MOVABLE |  0102005a | __page_cache_alloc
               52 |     13 |     0 | UNMOVABL |  002084d0 | pte_alloc_one
               40 |     10 |     0 |  MOVABLE |  000280da | handle_mm_fault
               28 |      7 |     0 | UNMOVABL |  000000d0 | __pollwait
               20 |      5 |     0 |  MOVABLE |  000200da | do_wp_page
               20 |      5 |     0 |  MOVABLE |  000200da | do_cow_fault
               16 |      4 |     0 | UNMOVABL |  00000200 | __tlb_remove_page
               16 |      4 |     0 | UNMOVABL |  000084d0 | __pmd_alloc
                8 |      2 |     0 | UNMOVABL |  000084d0 | __pud_alloc
 ...              | ...    | ...   | ...      | ...       | ...
 -----------------------------------------------------------------------------

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Pekka Enberg <penberg@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1429592107-1807-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-kmem.txt |   6 +-
 tools/perf/builtin-kmem.c              | 391 +++++++++++++++++++------
 2 files changed, 312 insertions(+), 85 deletions(-)

diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 23219c65c16f..69e181272c51 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -37,7 +37,11 @@ OPTIONS
 
 -s <key[,key2...]>::
 --sort=<key[,key2...]>::
-	Sort the output (default: frag,hit,bytes)
+	Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit'
+	for page).  Available sort keys are 'ptr, callsite, bytes, hit,
+	pingpong, frag' for slab and 'page, callsite, bytes, hit, order,
+	migtype, gfp' for page.  This option should be preceded by one of the
+	mode selection options - i.e. --slab, --page, --alloc and/or --caller.
 
 -l <num>::
 --line=<num>::
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 3649eec6807f..0393a7f3fa35 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -30,7 +30,7 @@ static int	kmem_page;
 static long	kmem_page_size;
 
 struct alloc_stat;
-typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *);
+typedef int (*sort_fn_t)(void *, void *);
 
 static int			alloc_flag;
 static int			caller_flag;
@@ -181,8 +181,8 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel,
 	return ret;
 }
 
-static int ptr_cmp(struct alloc_stat *, struct alloc_stat *);
-static int callsite_cmp(struct alloc_stat *, struct alloc_stat *);
+static int ptr_cmp(void *, void *);
+static int slab_callsite_cmp(void *, void *);
 
 static struct alloc_stat *search_alloc_stat(unsigned long ptr,
 					    unsigned long call_site,
@@ -223,7 +223,8 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel,
 		s_alloc->pingpong++;
 
 		s_caller = search_alloc_stat(0, s_alloc->call_site,
-					     &root_caller_stat, callsite_cmp);
+					     &root_caller_stat,
+					     slab_callsite_cmp);
 		if (!s_caller)
 			return -1;
 		s_caller->pingpong++;
@@ -448,26 +449,14 @@ static struct page_stat *page_stat__findnew_page(u64 page)
 	return __page_stat__findnew_page(page, true);
 }
 
-static int page_stat_cmp(struct page_stat *a, struct page_stat *b)
-{
-	if (a->page > b->page)
-		return -1;
-	if (a->page < b->page)
-		return 1;
-	if (a->order > b->order)
-		return -1;
-	if (a->order < b->order)
-		return 1;
-	if (a->migrate_type > b->migrate_type)
-		return -1;
-	if (a->migrate_type < b->migrate_type)
-		return 1;
-	if (a->gfp_flags > b->gfp_flags)
-		return -1;
-	if (a->gfp_flags < b->gfp_flags)
-		return 1;
-	return 0;
-}
+struct sort_dimension {
+	const char		name[20];
+	sort_fn_t		cmp;
+	struct list_head	list;
+};
+
+static LIST_HEAD(page_alloc_sort_input);
+static LIST_HEAD(page_caller_sort_input);
 
 static struct page_stat *
 __page_stat__findnew_alloc(struct page_stat *pstat, bool create)
@@ -475,14 +464,20 @@ __page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 	struct rb_node **node = &page_alloc_tree.rb_node;
 	struct rb_node *parent = NULL;
 	struct page_stat *data;
+	struct sort_dimension *sort;
 
 	while (*node) {
-		s64 cmp;
+		int cmp = 0;
 
 		parent = *node;
 		data = rb_entry(*node, struct page_stat, node);
 
-		cmp = page_stat_cmp(data, pstat);
+		list_for_each_entry(sort, &page_alloc_sort_input, list) {
+			cmp = sort->cmp(pstat, data);
+			if (cmp)
+				break;
+		}
+
 		if (cmp < 0)
 			node = &parent->rb_left;
 		else if (cmp > 0)
@@ -519,19 +514,25 @@ static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat)
 }
 
 static struct page_stat *
-__page_stat__findnew_caller(u64 callsite, bool create)
+__page_stat__findnew_caller(struct page_stat *pstat, bool create)
 {
 	struct rb_node **node = &page_caller_tree.rb_node;
 	struct rb_node *parent = NULL;
 	struct page_stat *data;
+	struct sort_dimension *sort;
 
 	while (*node) {
-		s64 cmp;
+		int cmp = 0;
 
 		parent = *node;
 		data = rb_entry(*node, struct page_stat, node);
 
-		cmp = data->callsite - callsite;
+		list_for_each_entry(sort, &page_caller_sort_input, list) {
+			cmp = sort->cmp(pstat, data);
+			if (cmp)
+				break;
+		}
+
 		if (cmp < 0)
 			node = &parent->rb_left;
 		else if (cmp > 0)
@@ -545,7 +546,10 @@ __page_stat__findnew_caller(u64 callsite, bool create)
 
 	data = zalloc(sizeof(*data));
 	if (data != NULL) {
-		data->callsite = callsite;
+		data->callsite = pstat->callsite;
+		data->order = pstat->order;
+		data->gfp_flags = pstat->gfp_flags;
+		data->migrate_type = pstat->migrate_type;
 
 		rb_link_node(&data->node, parent, node);
 		rb_insert_color(&data->node, &page_caller_tree);
@@ -554,14 +558,14 @@ __page_stat__findnew_caller(u64 callsite, bool create)
 	return data;
 }
 
-static struct page_stat *page_stat__find_caller(u64 callsite)
+static struct page_stat *page_stat__find_caller(struct page_stat *pstat)
 {
-	return __page_stat__findnew_caller(callsite, false);
+	return __page_stat__findnew_caller(pstat, false);
 }
 
-static struct page_stat *page_stat__findnew_caller(u64 callsite)
+static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat)
 {
-	return __page_stat__findnew_caller(callsite, true);
+	return __page_stat__findnew_caller(pstat, true);
 }
 
 static bool valid_page(u64 pfn_or_page)
@@ -629,14 +633,11 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	pstat->alloc_bytes += bytes;
 	pstat->callsite = callsite;
 
-	pstat = page_stat__findnew_caller(callsite);
+	this.callsite = callsite;
+	pstat = page_stat__findnew_caller(&this);
 	if (pstat == NULL)
 		return -ENOMEM;
 
-	pstat->order = order;
-	pstat->gfp_flags = gfp_flags;
-	pstat->migrate_type = migrate_type;
-
 	pstat->nr_alloc++;
 	pstat->alloc_bytes += bytes;
 
@@ -690,7 +691,7 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	pstat->nr_free++;
 	pstat->free_bytes += bytes;
 
-	pstat = page_stat__find_caller(this.callsite);
+	pstat = page_stat__find_caller(&this);
 	if (pstat == NULL)
 		return -ENOENT;
 
@@ -976,14 +977,10 @@ static void print_result(struct perf_session *session)
 		print_page_result(session);
 }
 
-struct sort_dimension {
-	const char		name[20];
-	sort_fn_t		cmp;
-	struct list_head	list;
-};
-
-static LIST_HEAD(caller_sort);
-static LIST_HEAD(alloc_sort);
+static LIST_HEAD(slab_caller_sort);
+static LIST_HEAD(slab_alloc_sort);
+static LIST_HEAD(page_caller_sort);
+static LIST_HEAD(page_alloc_sort);
 
 static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data,
 			     struct list_head *sort_list)
@@ -1032,10 +1029,12 @@ static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted
 	}
 }
 
-static void sort_page_insert(struct rb_root *root, struct page_stat *data)
+static void sort_page_insert(struct rb_root *root, struct page_stat *data,
+			     struct list_head *sort_list)
 {
 	struct rb_node **new = &root->rb_node;
 	struct rb_node *parent = NULL;
+	struct sort_dimension *sort;
 
 	while (*new) {
 		struct page_stat *this;
@@ -1044,8 +1043,11 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
 		this = rb_entry(*new, struct page_stat, node);
 		parent = *new;
 
-		/* TODO: support more sort key */
-		cmp = data->alloc_bytes - this->alloc_bytes;
+		list_for_each_entry(sort, sort_list, list) {
+			cmp = sort->cmp(data, this);
+			if (cmp)
+				break;
+		}
 
 		if (cmp > 0)
 			new = &parent->rb_left;
@@ -1057,7 +1059,8 @@ static void sort_page_insert(struct rb_root *root, struct page_stat *data)
 	rb_insert_color(&data->node, root);
 }
 
-static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted)
+static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted,
+			       struct list_head *sort_list)
 {
 	struct rb_node *node;
 	struct page_stat *data;
@@ -1069,7 +1072,7 @@ static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted
 
 		rb_erase(node, root);
 		data = rb_entry(node, struct page_stat, node);
-		sort_page_insert(root_sorted, data);
+		sort_page_insert(root_sorted, data, sort_list);
 	}
 }
 
@@ -1077,13 +1080,15 @@ static void sort_result(void)
 {
 	if (kmem_slab) {
 		__sort_slab_result(&root_alloc_stat, &root_alloc_sorted,
-				   &alloc_sort);
+				   &slab_alloc_sort);
 		__sort_slab_result(&root_caller_stat, &root_caller_sorted,
-				   &caller_sort);
+				   &slab_caller_sort);
 	}
 	if (kmem_page) {
-		__sort_page_result(&page_alloc_tree, &page_alloc_sorted);
-		__sort_page_result(&page_caller_tree, &page_caller_sorted);
+		__sort_page_result(&page_alloc_tree, &page_alloc_sorted,
+				   &page_alloc_sort);
+		__sort_page_result(&page_caller_tree, &page_caller_sorted,
+				   &page_caller_sort);
 	}
 }
 
@@ -1132,8 +1137,12 @@ out:
 	return err;
 }
 
-static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r)
+/* slab sort keys */
+static int ptr_cmp(void *a, void *b)
 {
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
 	if (l->ptr < r->ptr)
 		return -1;
 	else if (l->ptr > r->ptr)
@@ -1146,8 +1155,11 @@ static struct sort_dimension ptr_sort_dimension = {
 	.cmp	= ptr_cmp,
 };
 
-static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int slab_callsite_cmp(void *a, void *b)
 {
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
 	if (l->call_site < r->call_site)
 		return -1;
 	else if (l->call_site > r->call_site)
@@ -1157,11 +1169,14 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r)
 
 static struct sort_dimension callsite_sort_dimension = {
 	.name	= "callsite",
-	.cmp	= callsite_cmp,
+	.cmp	= slab_callsite_cmp,
 };
 
-static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int hit_cmp(void *a, void *b)
 {
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
 	if (l->hit < r->hit)
 		return -1;
 	else if (l->hit > r->hit)
@@ -1174,8 +1189,11 @@ static struct sort_dimension hit_sort_dimension = {
 	.cmp	= hit_cmp,
 };
 
-static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int bytes_cmp(void *a, void *b)
 {
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
 	if (l->bytes_alloc < r->bytes_alloc)
 		return -1;
 	else if (l->bytes_alloc > r->bytes_alloc)
@@ -1188,9 +1206,11 @@ static struct sort_dimension bytes_sort_dimension = {
 	.cmp	= bytes_cmp,
 };
 
-static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int frag_cmp(void *a, void *b)
 {
 	double x, y;
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
 
 	x = fragmentation(l->bytes_req, l->bytes_alloc);
 	y = fragmentation(r->bytes_req, r->bytes_alloc);
@@ -1207,8 +1227,11 @@ static struct sort_dimension frag_sort_dimension = {
 	.cmp	= frag_cmp,
 };
 
-static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r)
+static int pingpong_cmp(void *a, void *b)
 {
+	struct alloc_stat *l = a;
+	struct alloc_stat *r = b;
+
 	if (l->pingpong < r->pingpong)
 		return -1;
 	else if (l->pingpong > r->pingpong)
@@ -1221,7 +1244,135 @@ static struct sort_dimension pingpong_sort_dimension = {
 	.cmp	= pingpong_cmp,
 };
 
-static struct sort_dimension *avail_sorts[] = {
+/* page sort keys */
+static int page_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->page < r->page)
+		return -1;
+	else if (l->page > r->page)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_sort_dimension = {
+	.name	= "page",
+	.cmp	= page_cmp,
+};
+
+static int page_callsite_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->callsite < r->callsite)
+		return -1;
+	else if (l->callsite > r->callsite)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_callsite_sort_dimension = {
+	.name	= "callsite",
+	.cmp	= page_callsite_cmp,
+};
+
+static int page_hit_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->nr_alloc < r->nr_alloc)
+		return -1;
+	else if (l->nr_alloc > r->nr_alloc)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_hit_sort_dimension = {
+	.name	= "hit",
+	.cmp	= page_hit_cmp,
+};
+
+static int page_bytes_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->alloc_bytes < r->alloc_bytes)
+		return -1;
+	else if (l->alloc_bytes > r->alloc_bytes)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_bytes_sort_dimension = {
+	.name	= "bytes",
+	.cmp	= page_bytes_cmp,
+};
+
+static int page_order_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	if (l->order < r->order)
+		return -1;
+	else if (l->order > r->order)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension page_order_sort_dimension = {
+	.name	= "order",
+	.cmp	= page_order_cmp,
+};
+
+static int migrate_type_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	/* for internal use to find free'd page */
+	if (l->migrate_type == -1U)
+		return 0;
+
+	if (l->migrate_type < r->migrate_type)
+		return -1;
+	else if (l->migrate_type > r->migrate_type)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension migrate_type_sort_dimension = {
+	.name	= "migtype",
+	.cmp	= migrate_type_cmp,
+};
+
+static int gfp_flags_cmp(void *a, void *b)
+{
+	struct page_stat *l = a;
+	struct page_stat *r = b;
+
+	/* for internal use to find free'd page */
+	if (l->gfp_flags == -1U)
+		return 0;
+
+	if (l->gfp_flags < r->gfp_flags)
+		return -1;
+	else if (l->gfp_flags > r->gfp_flags)
+		return 1;
+	return 0;
+}
+
+static struct sort_dimension gfp_flags_sort_dimension = {
+	.name	= "gfp",
+	.cmp	= gfp_flags_cmp,
+};
+
+static struct sort_dimension *slab_sorts[] = {
 	&ptr_sort_dimension,
 	&callsite_sort_dimension,
 	&hit_sort_dimension,
@@ -1230,16 +1381,24 @@ static struct sort_dimension *avail_sorts[] = {
 	&pingpong_sort_dimension,
 };
 
-#define NUM_AVAIL_SORTS	((int)ARRAY_SIZE(avail_sorts))
+static struct sort_dimension *page_sorts[] = {
+	&page_sort_dimension,
+	&page_callsite_sort_dimension,
+	&page_hit_sort_dimension,
+	&page_bytes_sort_dimension,
+	&page_order_sort_dimension,
+	&migrate_type_sort_dimension,
+	&gfp_flags_sort_dimension,
+};
 
-static int sort_dimension__add(const char *tok, struct list_head *list)
+static int slab_sort_dimension__add(const char *tok, struct list_head *list)
 {
 	struct sort_dimension *sort;
 	int i;
 
-	for (i = 0; i < NUM_AVAIL_SORTS; i++) {
-		if (!strcmp(avail_sorts[i]->name, tok)) {
-			sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i]));
+	for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) {
+		if (!strcmp(slab_sorts[i]->name, tok)) {
+			sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i]));
 			if (!sort) {
 				pr_err("%s: memdup failed\n", __func__);
 				return -1;
@@ -1252,7 +1411,27 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
 	return -1;
 }
 
-static int setup_sorting(struct list_head *sort_list, const char *arg)
+static int page_sort_dimension__add(const char *tok, struct list_head *list)
+{
+	struct sort_dimension *sort;
+	int i;
+
+	for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) {
+		if (!strcmp(page_sorts[i]->name, tok)) {
+			sort = memdup(page_sorts[i], sizeof(*page_sorts[i]));
+			if (!sort) {
+				pr_err("%s: memdup failed\n", __func__);
+				return -1;
+			}
+			list_add_tail(&sort->list, list);
+			return 0;
+		}
+	}
+
+	return -1;
+}
+
+static int setup_slab_sorting(struct list_head *sort_list, const char *arg)
 {
 	char *tok;
 	char *str = strdup(arg);
@@ -1267,8 +1446,34 @@ static int setup_sorting(struct list_head *sort_list, const char *arg)
 		tok = strsep(&pos, ",");
 		if (!tok)
 			break;
-		if (sort_dimension__add(tok, sort_list) < 0) {
-			error("Unknown --sort key: '%s'", tok);
+		if (slab_sort_dimension__add(tok, sort_list) < 0) {
+			error("Unknown slab --sort key: '%s'", tok);
+			free(str);
+			return -1;
+		}
+	}
+
+	free(str);
+	return 0;
+}
+
+static int setup_page_sorting(struct list_head *sort_list, const char *arg)
+{
+	char *tok;
+	char *str = strdup(arg);
+	char *pos = str;
+
+	if (!str) {
+		pr_err("%s: strdup failed\n", __func__);
+		return -1;
+	}
+
+	while (true) {
+		tok = strsep(&pos, ",");
+		if (!tok)
+			break;
+		if (page_sort_dimension__add(tok, sort_list) < 0) {
+			error("Unknown page --sort key: '%s'", tok);
 			free(str);
 			return -1;
 		}
@@ -1284,10 +1489,17 @@ static int parse_sort_opt(const struct option *opt __maybe_unused,
 	if (!arg)
 		return -1;
 
-	if (caller_flag > alloc_flag)
-		return setup_sorting(&caller_sort, arg);
-	else
-		return setup_sorting(&alloc_sort, arg);
+	if (kmem_page > kmem_slab) {
+		if (caller_flag > alloc_flag)
+			return setup_page_sorting(&page_caller_sort, arg);
+		else
+			return setup_page_sorting(&page_alloc_sort, arg);
+	} else {
+		if (caller_flag > alloc_flag)
+			return setup_slab_sorting(&slab_caller_sort, arg);
+		else
+			return setup_slab_sorting(&slab_alloc_sort, arg);
+	}
 
 	return 0;
 }
@@ -1395,7 +1607,8 @@ static int __cmd_record(int argc, const char **argv)
 
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
-	const char * const default_sort_order = "frag,hit,bytes";
+	const char * const default_slab_sort = "frag,hit,bytes";
+	const char * const default_page_sort = "bytes,hit";
 	struct perf_data_file file = {
 		.mode = PERF_DATA_MODE_READ,
 	};
@@ -1408,8 +1621,8 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL,
 			   "show per-allocation statistics", parse_alloc_opt),
 	OPT_CALLBACK('s', "sort", NULL, "key[,key2...]",
-		     "sort by keys: ptr, call_site, bytes, hit, pingpong, frag",
-		     parse_sort_opt),
+		     "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, "
+		     "page, order, migtype, gfp", parse_sort_opt),
 	OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt),
 	OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"),
 	OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"),
@@ -1467,11 +1680,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 		if (cpu__setup_cpunode_map())
 			goto out_delete;
 
-		if (list_empty(&caller_sort))
-			setup_sorting(&caller_sort, default_sort_order);
-		if (list_empty(&alloc_sort))
-			setup_sorting(&alloc_sort, default_sort_order);
+		if (list_empty(&slab_caller_sort))
+			setup_slab_sorting(&slab_caller_sort, default_slab_sort);
+		if (list_empty(&slab_alloc_sort))
+			setup_slab_sorting(&slab_alloc_sort, default_slab_sort);
+		if (list_empty(&page_caller_sort))
+			setup_page_sorting(&page_caller_sort, default_page_sort);
+		if (list_empty(&page_alloc_sort))
+			setup_page_sorting(&page_alloc_sort, default_page_sort);
 
+		if (kmem_page) {
+			setup_page_sorting(&page_alloc_sort_input,
+					   "page,order,migtype,gfp");
+			setup_page_sorting(&page_caller_sort_input,
+					   "callsite,order,migtype,gfp");
+		}
 		ret = __cmd_kmem(session);
 	} else
 		usage_with_options(kmem_usage, kmem_options);

From 2a7ef02c9ca0172cd48945407893f38c2438e754 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 21 Apr 2015 13:55:04 +0900
Subject: [PATCH 073/305] perf kmem: Add --live option for current allocation
 stat

Currently 'perf kmem stat --page' shows total (page) allocation stat by
default, but sometimes one might want to see live (total alloc-only)
requests/pages only.  The new --live option does this by subtracting freed
allocation from the stat.

E.g.:

 # perf kmem stat --page

 SUMMARY (page allocator)
 ========================
 Total allocation requests     :          988,858   [        4,045,368 KB ]
 Total free requests           :          886,484   [        3,624,996 KB ]

 Total alloc+freed requests    :          885,969   [        3,622,628 KB ]
 Total alloc-only requests     :          102,889   [          422,740 KB ]
 Total free-only requests      :              515   [            2,368 KB ]

 Total allocation failures     :                0   [                0 KB ]

 Order     Unmovable   Reclaimable       Movable      Reserved  CMA/Isolated
 -----  ------------  ------------  ------------  ------------  ------------
     0       172,173         3,083       806,686             .             .
     1           284             .             .             .             .
     2         6,124            58             .             .             .
     3           114           335             .             .             .
     4             .             .             .             .             .
     5             .             .             .             .             .
     6             .             .             .             .             .
     7             .             .             .             .             .
     8             .             .             .             .             .
     9             .             .             1             .             .
    10             .             .             .             .             .
 # perf kmem stat --page --live

 SUMMARY (page allocator)
 ========================
 Total allocation requests     :          988,858   [        4,045,368 KB ]
 Total free requests           :          886,484   [        3,624,996 KB ]

 Total alloc+freed requests    :          885,969   [        3,622,628 KB ]
 Total alloc-only requests     :          102,889   [          422,740 KB ]
 Total free-only requests      :              515   [            2,368 KB ]

 Total allocation failures     :                0   [                0 KB ]

 Order     Unmovable   Reclaimable       Movable      Reserved  CMA/Isolated
 -----  ------------  ------------  ------------  ------------  ------------
     0         2,214         3,025        97,156             .             .
     1            59             .             .             .             .
     2            19            58             .             .             .
     3            23           335             .             .             .
     4             .             .             .             .             .
     5             .             .             .             .             .
     6             .             .             .             .             .
     7             .             .             .             .             .
     8             .             .             .             .             .
     9             .             .             .             .             .
    10             .             .             .             .             .
 #

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Pekka Enberg <penberg@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1429592107-1807-4-git-send-email-namhyung@kernel.org
[ Added examples to the changeset log ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-kmem.txt |   5 ++
 tools/perf/builtin-kmem.c              | 110 +++++++++++++++----------
 2 files changed, 73 insertions(+), 42 deletions(-)

diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt
index 69e181272c51..ff0f433b3fce 100644
--- a/tools/perf/Documentation/perf-kmem.txt
+++ b/tools/perf/Documentation/perf-kmem.txt
@@ -56,6 +56,11 @@ OPTIONS
 --page::
 	Analyze page allocator events
 
+--live::
+	Show live page stat.  The perf kmem shows total allocation stat by
+	default, but this option shows live (currently allocated) pages
+	instead.  (This option works with --page option only)
+
 SEE ALSO
 --------
 linkperf:perf-record[1]
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 0393a7f3fa35..7ead9423fd7a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -244,6 +244,7 @@ static unsigned long nr_page_fails;
 static unsigned long nr_page_nomatch;
 
 static bool use_pfn;
+static bool live_page;
 static struct perf_session *kmem_session;
 
 #define MAX_MIGRATE_TYPES  6
@@ -264,7 +265,7 @@ struct page_stat {
 	int 		nr_free;
 };
 
-static struct rb_root page_tree;
+static struct rb_root page_live_tree;
 static struct rb_root page_alloc_tree;
 static struct rb_root page_alloc_sorted;
 static struct rb_root page_caller_tree;
@@ -403,10 +404,19 @@ out:
 	return sample->ip;
 }
 
+struct sort_dimension {
+	const char		name[20];
+	sort_fn_t		cmp;
+	struct list_head	list;
+};
+
+static LIST_HEAD(page_alloc_sort_input);
+static LIST_HEAD(page_caller_sort_input);
+
 static struct page_stat *
-__page_stat__findnew_page(u64 page, bool create)
+__page_stat__findnew_page(struct page_stat *pstat, bool create)
 {
-	struct rb_node **node = &page_tree.rb_node;
+	struct rb_node **node = &page_live_tree.rb_node;
 	struct rb_node *parent = NULL;
 	struct page_stat *data;
 
@@ -416,7 +426,7 @@ __page_stat__findnew_page(u64 page, bool create)
 		parent = *node;
 		data = rb_entry(*node, struct page_stat, node);
 
-		cmp = data->page - page;
+		cmp = data->page - pstat->page;
 		if (cmp < 0)
 			node = &parent->rb_left;
 		else if (cmp > 0)
@@ -430,34 +440,28 @@ __page_stat__findnew_page(u64 page, bool create)
 
 	data = zalloc(sizeof(*data));
 	if (data != NULL) {
-		data->page = page;
+		data->page = pstat->page;
+		data->order = pstat->order;
+		data->gfp_flags = pstat->gfp_flags;
+		data->migrate_type = pstat->migrate_type;
 
 		rb_link_node(&data->node, parent, node);
-		rb_insert_color(&data->node, &page_tree);
+		rb_insert_color(&data->node, &page_live_tree);
 	}
 
 	return data;
 }
 
-static struct page_stat *page_stat__find_page(u64 page)
+static struct page_stat *page_stat__find_page(struct page_stat *pstat)
 {
-	return __page_stat__findnew_page(page, false);
+	return __page_stat__findnew_page(pstat, false);
 }
 
-static struct page_stat *page_stat__findnew_page(u64 page)
+static struct page_stat *page_stat__findnew_page(struct page_stat *pstat)
 {
-	return __page_stat__findnew_page(page, true);
+	return __page_stat__findnew_page(pstat, true);
 }
 
-struct sort_dimension {
-	const char		name[20];
-	sort_fn_t		cmp;
-	struct list_head	list;
-};
-
-static LIST_HEAD(page_alloc_sort_input);
-static LIST_HEAD(page_caller_sort_input);
-
 static struct page_stat *
 __page_stat__findnew_alloc(struct page_stat *pstat, bool create)
 {
@@ -615,17 +619,8 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	 * This is to find the current page (with correct gfp flags and
 	 * migrate type) at free event.
 	 */
-	pstat = page_stat__findnew_page(page);
-	if (pstat == NULL)
-		return -ENOMEM;
-
-	pstat->order = order;
-	pstat->gfp_flags = gfp_flags;
-	pstat->migrate_type = migrate_type;
-	pstat->callsite = callsite;
-
 	this.page = page;
-	pstat = page_stat__findnew_alloc(&this);
+	pstat = page_stat__findnew_page(&this);
 	if (pstat == NULL)
 		return -ENOMEM;
 
@@ -633,6 +628,16 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 	pstat->alloc_bytes += bytes;
 	pstat->callsite = callsite;
 
+	if (!live_page) {
+		pstat = page_stat__findnew_alloc(&this);
+		if (pstat == NULL)
+			return -ENOMEM;
+
+		pstat->nr_alloc++;
+		pstat->alloc_bytes += bytes;
+		pstat->callsite = callsite;
+	}
+
 	this.callsite = callsite;
 	pstat = page_stat__findnew_caller(&this);
 	if (pstat == NULL)
@@ -665,7 +670,8 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	nr_page_frees++;
 	total_page_free_bytes += bytes;
 
-	pstat = page_stat__find_page(page);
+	this.page = page;
+	pstat = page_stat__find_page(&this);
 	if (pstat == NULL) {
 		pr_debug2("missing free at page %"PRIx64" (order: %d)\n",
 			  page, order);
@@ -676,20 +682,23 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 		return 0;
 	}
 
-	this.page = page;
 	this.gfp_flags = pstat->gfp_flags;
 	this.migrate_type = pstat->migrate_type;
 	this.callsite = pstat->callsite;
 
-	rb_erase(&pstat->node, &page_tree);
+	rb_erase(&pstat->node, &page_live_tree);
 	free(pstat);
 
-	pstat = page_stat__find_alloc(&this);
-	if (pstat == NULL)
-		return -ENOENT;
+	if (live_page) {
+		order_stats[this.order][this.migrate_type]--;
+	} else {
+		pstat = page_stat__find_alloc(&this);
+		if (pstat == NULL)
+			return -ENOMEM;
 
-	pstat->nr_free++;
-	pstat->free_bytes += bytes;
+		pstat->nr_free++;
+		pstat->free_bytes += bytes;
+	}
 
 	pstat = page_stat__find_caller(&this);
 	if (pstat == NULL)
@@ -698,6 +707,16 @@ static int perf_evsel__process_page_free_event(struct perf_evsel *evsel,
 	pstat->nr_free++;
 	pstat->free_bytes += bytes;
 
+	if (live_page) {
+		pstat->nr_alloc--;
+		pstat->alloc_bytes -= bytes;
+
+		if (pstat->nr_alloc == 0) {
+			rb_erase(&pstat->node, &page_caller_tree);
+			free(pstat);
+		}
+	}
+
 	return 0;
 }
 
@@ -815,8 +834,8 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 	const char *format;
 
 	printf("\n%.105s\n", graph_dotted_line);
-	printf(" %-16s | Total alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n",
-	       use_pfn ? "PFN" : "Page");
+	printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n",
+	       use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total");
 	printf("%.105s\n", graph_dotted_line);
 
 	if (use_pfn)
@@ -860,7 +879,8 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
 	struct machine *machine = &session->machines.host;
 
 	printf("\n%.105s\n", graph_dotted_line);
-	printf(" Total alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n");
+	printf(" %5s alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n",
+	       live_page ? "Live" : "Total");
 	printf("%.105s\n", graph_dotted_line);
 
 	while (next && n_lines--) {
@@ -1085,8 +1105,13 @@ static void sort_result(void)
 				   &slab_caller_sort);
 	}
 	if (kmem_page) {
-		__sort_page_result(&page_alloc_tree, &page_alloc_sorted,
-				   &page_alloc_sort);
+		if (live_page)
+			__sort_page_result(&page_live_tree, &page_alloc_sorted,
+					   &page_alloc_sort);
+		else
+			__sort_page_result(&page_alloc_tree, &page_alloc_sorted,
+					   &page_alloc_sort);
+
 		__sort_page_result(&page_caller_tree, &page_caller_sorted,
 				   &page_caller_sort);
 	}
@@ -1630,6 +1655,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 			   parse_slab_opt),
 	OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator",
 			   parse_page_opt),
+	OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"),
 	OPT_END()
 	};
 	const char *const kmem_subcommands[] = { "record", "stat", NULL };

From 0e11115644b39ff9e986eb308b6c44ca75cd475f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 21 Apr 2015 13:55:05 +0900
Subject: [PATCH 074/305] perf kmem: Print gfp flags in human readable string

Save libtraceevent output and print it in the header.

  # perf kmem stat --page --caller
  #
  # GFP flags
  # ---------
  # 00000010:       NI: GFP_NOIO
  # 000000d0:        K: GFP_KERNEL
  # 00000200:      NWR: GFP_NOWARN
  # 000084d0:    K|R|Z: GFP_KERNEL|GFP_REPEAT|GFP_ZERO
  # 000200d2:       HU: GFP_HIGHUSER
  # 000200da:      HUM: GFP_HIGHUSER_MOVABLE
  # 000280da:    HUM|Z: GFP_HIGHUSER_MOVABLE|GFP_ZERO
  # 002084d0: K|R|Z|NT: GFP_KERNEL|GFP_REPEAT|GFP_ZERO|GFP_NOTRACK
  # 0102005a:  NF|HW|M: GFP_NOFS|GFP_HARDWALL|GFP_MOVABLE

  ---------------------------------------------------------------------------------------------------------
   Total alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite
  ---------------------------------------------------------------------------------------------------------
                 60 |        15 |     0 | UNMOVABL | K|R|Z|NT  | pte_alloc_one
                 40 |        10 |     0 |  MOVABLE | HUM|Z     | handle_mm_fault
                 24 |         6 |     0 |  MOVABLE | HUM       | do_wp_page
                 24 |         6 |     0 | UNMOVABL | K         | __pollwait
   ...

Requested-by: Joonsoo Kim <js1304@gmail.com>
Suggested-by: Minchan Kim <minchan@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Pekka Enberg <penberg@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1429592107-1807-5-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 222 +++++++++++++++++++++++++++++++++++---
 1 file changed, 209 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 7ead9423fd7a..1c668953c7ec 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -581,6 +581,176 @@ static bool valid_page(u64 pfn_or_page)
 	return true;
 }
 
+struct gfp_flag {
+	unsigned int flags;
+	char *compact_str;
+	char *human_readable;
+};
+
+static struct gfp_flag *gfps;
+static int nr_gfps;
+
+static int gfpcmp(const void *a, const void *b)
+{
+	const struct gfp_flag *fa = a;
+	const struct gfp_flag *fb = b;
+
+	return fa->flags - fb->flags;
+}
+
+/* see include/trace/events/gfpflags.h */
+static const struct {
+	const char *original;
+	const char *compact;
+} gfp_compact_table[] = {
+	{ "GFP_TRANSHUGE",		"THP" },
+	{ "GFP_HIGHUSER_MOVABLE",	"HUM" },
+	{ "GFP_HIGHUSER",		"HU" },
+	{ "GFP_USER",			"U" },
+	{ "GFP_TEMPORARY",		"TMP" },
+	{ "GFP_KERNEL",			"K" },
+	{ "GFP_NOFS",			"NF" },
+	{ "GFP_ATOMIC",			"A" },
+	{ "GFP_NOIO",			"NI" },
+	{ "GFP_HIGH",			"H" },
+	{ "GFP_WAIT",			"W" },
+	{ "GFP_IO",			"I" },
+	{ "GFP_COLD",			"CO" },
+	{ "GFP_NOWARN",			"NWR" },
+	{ "GFP_REPEAT",			"R" },
+	{ "GFP_NOFAIL",			"NF" },
+	{ "GFP_NORETRY",		"NR" },
+	{ "GFP_COMP",			"C" },
+	{ "GFP_ZERO",			"Z" },
+	{ "GFP_NOMEMALLOC",		"NMA" },
+	{ "GFP_MEMALLOC",		"MA" },
+	{ "GFP_HARDWALL",		"HW" },
+	{ "GFP_THISNODE",		"TN" },
+	{ "GFP_RECLAIMABLE",		"RC" },
+	{ "GFP_MOVABLE",		"M" },
+	{ "GFP_NOTRACK",		"NT" },
+	{ "GFP_NO_KSWAPD",		"NK" },
+	{ "GFP_OTHER_NODE",		"ON" },
+	{ "GFP_NOWAIT",			"NW" },
+};
+
+static size_t max_gfp_len;
+
+static char *compact_gfp_flags(char *gfp_flags)
+{
+	char *orig_flags = strdup(gfp_flags);
+	char *new_flags = NULL;
+	char *str, *pos;
+	size_t len = 0;
+
+	if (orig_flags == NULL)
+		return NULL;
+
+	str = strtok_r(orig_flags, "|", &pos);
+	while (str) {
+		size_t i;
+		char *new;
+		const char *cpt;
+
+		for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) {
+			if (strcmp(gfp_compact_table[i].original, str))
+				continue;
+
+			cpt = gfp_compact_table[i].compact;
+			new = realloc(new_flags, len + strlen(cpt) + 2);
+			if (new == NULL) {
+				free(new_flags);
+				return NULL;
+			}
+
+			new_flags = new;
+
+			if (!len) {
+				strcpy(new_flags, cpt);
+			} else {
+				strcat(new_flags, "|");
+				strcat(new_flags, cpt);
+				len++;
+			}
+
+			len += strlen(cpt);
+		}
+
+		str = strtok_r(NULL, "|", &pos);
+	}
+
+	if (max_gfp_len < len)
+		max_gfp_len = len;
+
+	free(orig_flags);
+	return new_flags;
+}
+
+static char *compact_gfp_string(unsigned long gfp_flags)
+{
+	struct gfp_flag key = {
+		.flags = gfp_flags,
+	};
+	struct gfp_flag *gfp;
+
+	gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+	if (gfp)
+		return gfp->compact_str;
+
+	return NULL;
+}
+
+static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
+			   unsigned int gfp_flags)
+{
+	struct pevent_record record = {
+		.cpu = sample->cpu,
+		.data = sample->raw_data,
+		.size = sample->raw_size,
+	};
+	struct trace_seq seq;
+	char *str, *pos;
+
+	if (nr_gfps) {
+		struct gfp_flag key = {
+			.flags = gfp_flags,
+		};
+
+		if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp))
+			return 0;
+	}
+
+	trace_seq_init(&seq);
+	pevent_event_info(&seq, evsel->tp_format, &record);
+
+	str = strtok_r(seq.buffer, " ", &pos);
+	while (str) {
+		if (!strncmp(str, "gfp_flags=", 10)) {
+			struct gfp_flag *new;
+
+			new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps));
+			if (new == NULL)
+				return -ENOMEM;
+
+			gfps = new;
+			new += nr_gfps++;
+
+			new->flags = gfp_flags;
+			new->human_readable = strdup(str + 10);
+			new->compact_str = compact_gfp_flags(str + 10);
+			if (!new->human_readable || !new->compact_str)
+				return -ENOMEM;
+
+			qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp);
+		}
+
+		str = strtok_r(NULL, " ", &pos);
+	}
+
+	trace_seq_destroy(&seq);
+	return 0;
+}
+
 static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 						struct perf_sample *sample)
 {
@@ -613,6 +783,9 @@ static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel,
 		return 0;
 	}
 
+	if (parse_gfp_flags(evsel, sample, gfp_flags) < 0)
+		return -1;
+
 	callsite = find_callsite(evsel, sample);
 
 	/*
@@ -832,16 +1005,18 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 	struct rb_node *next = rb_first(&page_alloc_sorted);
 	struct machine *machine = &session->machines.host;
 	const char *format;
+	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
 
 	printf("\n%.105s\n", graph_dotted_line);
-	printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n",
-	       use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total");
+	printf(" %-16s | %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
+	       use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total",
+	       gfp_len, "GFP flags");
 	printf("%.105s\n", graph_dotted_line);
 
 	if (use_pfn)
-		format = " %16llu | %'16llu | %'9d | %5d | %8s |  %08lx | %s\n";
+		format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
 	else
-		format = " %016llx | %'16llu | %'9d | %5d | %8s |  %08lx | %s\n";
+		format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n";
 
 	while (next && n_lines--) {
 		struct page_stat *data;
@@ -862,13 +1037,15 @@ static void __print_page_alloc_result(struct perf_session *session, int n_lines)
 		       (unsigned long long)data->alloc_bytes / 1024,
 		       data->nr_alloc, data->order,
 		       migrate_type_str[data->migrate_type],
-		       (unsigned long)data->gfp_flags, caller);
+		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
 
 		next = rb_next(next);
 	}
 
-	if (n_lines == -1)
-		printf(" ...              | ...              | ...       | ...   | ...      | ...       | ...\n");
+	if (n_lines == -1) {
+		printf(" ...              | ...              | ...       | ...   | ...      | %-*s | ...\n",
+		       gfp_len, "...");
+	}
 
 	printf("%.105s\n", graph_dotted_line);
 }
@@ -877,10 +1054,11 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
 {
 	struct rb_node *next = rb_first(&page_caller_sorted);
 	struct machine *machine = &session->machines.host;
+	int gfp_len = max(strlen("GFP flags"), max_gfp_len);
 
 	printf("\n%.105s\n", graph_dotted_line);
-	printf(" %5s alloc (KB) | Hits      | Order | Mig.type | GFP flags | Callsite\n",
-	       live_page ? "Live" : "Total");
+	printf(" %5s alloc (KB) | Hits      | Order | Mig.type | %-*s | Callsite\n",
+	       live_page ? "Live" : "Total", gfp_len, "GFP flags");
 	printf("%.105s\n", graph_dotted_line);
 
 	while (next && n_lines--) {
@@ -898,21 +1076,37 @@ static void __print_page_caller_result(struct perf_session *session, int n_lines
 		else
 			scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite);
 
-		printf(" %'16llu | %'9d | %5d | %8s |  %08lx | %s\n",
+		printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n",
 		       (unsigned long long)data->alloc_bytes / 1024,
 		       data->nr_alloc, data->order,
 		       migrate_type_str[data->migrate_type],
-		       (unsigned long)data->gfp_flags, caller);
+		       gfp_len, compact_gfp_string(data->gfp_flags), caller);
 
 		next = rb_next(next);
 	}
 
-	if (n_lines == -1)
-		printf(" ...              | ...       | ...   | ...      | ...       | ...\n");
+	if (n_lines == -1) {
+		printf(" ...              | ...       | ...   | ...      | %-*s | ...\n",
+		       gfp_len, "...");
+	}
 
 	printf("%.105s\n", graph_dotted_line);
 }
 
+static void print_gfp_flags(void)
+{
+	int i;
+
+	printf("#\n");
+	printf("# GFP flags\n");
+	printf("# ---------\n");
+	for (i = 0; i < nr_gfps; i++) {
+		printf("# %08x: %*s: %s\n", gfps[i].flags,
+		       (int) max_gfp_len, gfps[i].compact_str,
+		       gfps[i].human_readable);
+	}
+}
+
 static void print_slab_summary(void)
 {
 	printf("\nSUMMARY (SLAB allocator)");
@@ -982,6 +1176,8 @@ static void print_slab_result(struct perf_session *session)
 
 static void print_page_result(struct perf_session *session)
 {
+	if (caller_flag || alloc_flag)
+		print_gfp_flags();
 	if (caller_flag)
 		__print_page_caller_result(session, caller_lines);
 	if (alloc_flag)

From 0c160d495b5616e071bb4f873812e8f473128149 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 21 Apr 2015 13:55:06 +0900
Subject: [PATCH 075/305] perf kmem: Add kmem.default config option

Currently perf kmem command will select --slab if neither --slab nor
--page is given for backward compatibility.  Add kmem.default config
option to select the default value ('page' or 'slab').

  # cat ~/.perfconfig
  [kmem]
  	default = page

  # perf kmem stat

  SUMMARY (page allocator)
  ========================
  Total allocation requests     :            1,518   [            6,096 KB ]
  Total free requests           :            1,431   [            5,748 KB ]

  Total alloc+freed requests    :            1,330   [            5,344 KB ]
  Total alloc-only requests     :              188   [              752 KB ]
  Total free-only requests      :              101   [              404 KB ]

  Total allocation failures     :                0   [                0 KB ]
  ...

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Taeung Song <treeze.taeung@gmail.com>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1429592107-1807-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 32 +++++++++++++++++++++++++++++---
 1 file changed, 29 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 1c668953c7ec..828b7284e547 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -28,6 +28,10 @@ static int	kmem_slab;
 static int	kmem_page;
 
 static long	kmem_page_size;
+static enum {
+	KMEM_SLAB,
+	KMEM_PAGE,
+} kmem_default = KMEM_SLAB;  /* for backward compatibility */
 
 struct alloc_stat;
 typedef int (*sort_fn_t)(void *, void *);
@@ -1710,7 +1714,8 @@ static int parse_sort_opt(const struct option *opt __maybe_unused,
 	if (!arg)
 		return -1;
 
-	if (kmem_page > kmem_slab) {
+	if (kmem_page > kmem_slab ||
+	    (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) {
 		if (caller_flag > alloc_flag)
 			return setup_page_sorting(&page_caller_sort, arg);
 		else
@@ -1826,6 +1831,22 @@ static int __cmd_record(int argc, const char **argv)
 	return cmd_record(i, rec_argv, NULL);
 }
 
+static int kmem_config(const char *var, const char *value, void *cb)
+{
+	if (!strcmp(var, "kmem.default")) {
+		if (!strcmp(value, "slab"))
+			kmem_default = KMEM_SLAB;
+		else if (!strcmp(value, "page"))
+			kmem_default = KMEM_PAGE;
+		else
+			pr_err("invalid default value ('slab' or 'page' required): %s\n",
+			       value);
+		return 0;
+	}
+
+	return perf_default_config(var, value, cb);
+}
+
 int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 {
 	const char * const default_slab_sort = "frag,hit,bytes";
@@ -1862,14 +1883,19 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 	struct perf_session *session;
 	int ret = -1;
 
+	perf_config(kmem_config, NULL);
 	argc = parse_options_subcommand(argc, argv, kmem_options,
 					kmem_subcommands, kmem_usage, 0);
 
 	if (!argc)
 		usage_with_options(kmem_usage, kmem_options);
 
-	if (kmem_slab == 0 && kmem_page == 0)
-		kmem_slab = 1;  /* for backward compatibility */
+	if (kmem_slab == 0 && kmem_page == 0) {
+		if (kmem_default == KMEM_SLAB)
+			kmem_slab = 1;
+		else
+			kmem_page = 1;
+	}
 
 	if (!strncmp(argv[0], "rec", 3)) {
 		symbol__init(NULL);

From 64a7e61f11a22de3571b7f43f231e9b3aab00f1e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:24 +0300
Subject: [PATCH 076/305] perf report: Fix placement of itrace option in
 documentation

Unwittingly the itrace options for perf report ended up below the
Overhead Calculation section. Move it back with the other options.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-report.txt | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index b66f6b41b179..b771340d193d 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -324,9 +324,6 @@ OPTIONS
 --header-only::
 	Show only perf.data header (forces --stdio).
 
-
-include::callchain-overhead-calculation.txt[]
-
 --itrace::
 	Options for decoding instruction tracing data. The options are:
 
@@ -354,6 +351,9 @@ include::callchain-overhead-calculation.txt[]
 
 	To disable decoding entirely, use --no-itrace.
 
+
+include::callchain-overhead-calculation.txt[]
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-annotate[1]

From 99fa298453495ee23801ab500a5fe0138c260edb Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:25 +0300
Subject: [PATCH 077/305] perf tools: Add AUX area tracing index

Add an index of AUX area tracing events within a perf.data file.

perf record uses a special user event PERF_RECORD_FINISHED_ROUND to
enable sorting of events in chunks instead of having to sort all events
altogether.

AUX area tracing events contain data that can span back to the very
beginning of the recording period. i.e. they do not obey the rules of
PERF_RECORD_FINISHED_ROUND.

By adding an index, AUX area tracing events can be found in advance and
the PERF_RECORD_FINISHED_ROUND approach works as usual.

The index is recorded with the auxtrace feature in the perf.data file.
A session reads the index but does not process it.  An AUX area decoder
can queue all the AUX area data in advance using
auxtrace_queues__process_index() or otherwise process the index in some
custom manner.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-inject.c |  15 +++
 tools/perf/builtin-record.c |  15 +++
 tools/perf/util/auxtrace.c  | 215 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/auxtrace.h  |  35 ++++++
 tools/perf/util/header.c    |  31 +++++-
 tools/perf/util/session.c   |   2 +
 tools/perf/util/session.h   |   1 +
 7 files changed, 310 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index c5f6515f0723..6d4bbde066fd 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -122,6 +122,18 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 						  tool);
 	int ret;
 
+	if (!inject->output.is_pipe) {
+		off_t offset;
+
+		offset = lseek(inject->output.fd, 0, SEEK_CUR);
+		if (offset == -1)
+			return -errno;
+		ret = auxtrace_index__auxtrace_event(&session->auxtrace_index,
+						     event, offset);
+		if (ret < 0)
+			return ret;
+	}
+
 	if (perf_data_file__is_pipe(session->file) || !session->one_mmap) {
 		ret = output_bytes(inject, event, event->header.size);
 		if (ret < 0)
@@ -487,6 +499,9 @@ static int __cmd_inject(struct perf_inject *inject)
 		output_data_offset = 4096;
 	}
 
+	if (!inject->itrace_synth_opts.set)
+		auxtrace_index__free(&session->auxtrace_index);
+
 	if (!file_out->is_pipe)
 		lseek(fd, output_data_offset, SEEK_SET);
 
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4c9aaa1f688a..c8c784c430b6 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -117,9 +117,24 @@ static int record__process_auxtrace(struct perf_tool *tool,
 				    size_t len1, void *data2, size_t len2)
 {
 	struct record *rec = container_of(tool, struct record, tool);
+	struct perf_data_file *file = &rec->file;
 	size_t padding;
 	u8 pad[8] = {0};
 
+	if (!perf_data_file__is_pipe(file)) {
+		off_t file_offset;
+		int fd = perf_data_file__fd(file);
+		int err;
+
+		file_offset = lseek(fd, 0, SEEK_CUR);
+		if (file_offset == -1)
+			return -1;
+		err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index,
+						     event, file_offset);
+		if (err)
+			return err;
+	}
+
 	/* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */
 	padding = (len1 + len2) & 7;
 	if (padding)
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 3cd89eca1e88..28ce134a61ad 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -344,6 +344,33 @@ out_err:
 	return err;
 }
 
+static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues,
+					      struct perf_session *session,
+					      off_t file_offset, size_t sz)
+{
+	union perf_event *event;
+	int err;
+	char buf[PERF_SAMPLE_MAX_SIZE];
+
+	err = perf_session__peek_event(session, file_offset, buf,
+				       PERF_SAMPLE_MAX_SIZE, &event, NULL);
+	if (err)
+		return err;
+
+	if (event->header.type == PERF_RECORD_AUXTRACE) {
+		if (event->header.size < sizeof(struct auxtrace_event) ||
+		    event->header.size != sz) {
+			err = -EINVAL;
+			goto out;
+		}
+		file_offset += event->header.size;
+		err = auxtrace_queues__add_event(queues, session, event,
+						 file_offset, NULL);
+	}
+out:
+	return err;
+}
+
 void auxtrace_queues__free(struct auxtrace_queues *queues)
 {
 	unsigned int i;
@@ -500,6 +527,194 @@ auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
 	return NULL;
 }
 
+static int auxtrace_index__alloc(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+
+	auxtrace_index = malloc(sizeof(struct auxtrace_index));
+	if (!auxtrace_index)
+		return -ENOMEM;
+
+	auxtrace_index->nr = 0;
+	INIT_LIST_HEAD(&auxtrace_index->list);
+
+	list_add_tail(&auxtrace_index->list, head);
+
+	return 0;
+}
+
+void auxtrace_index__free(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index, *n;
+
+	list_for_each_entry_safe(auxtrace_index, n, head, list) {
+		list_del(&auxtrace_index->list);
+		free(auxtrace_index);
+	}
+}
+
+static struct auxtrace_index *auxtrace_index__last(struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+	int err;
+
+	if (list_empty(head)) {
+		err = auxtrace_index__alloc(head);
+		if (err)
+			return NULL;
+	}
+
+	auxtrace_index = list_entry(head->prev, struct auxtrace_index, list);
+
+	if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) {
+		err = auxtrace_index__alloc(head);
+		if (err)
+			return NULL;
+		auxtrace_index = list_entry(head->prev, struct auxtrace_index,
+					    list);
+	}
+
+	return auxtrace_index;
+}
+
+int auxtrace_index__auxtrace_event(struct list_head *head,
+				   union perf_event *event, off_t file_offset)
+{
+	struct auxtrace_index *auxtrace_index;
+	size_t nr;
+
+	auxtrace_index = auxtrace_index__last(head);
+	if (!auxtrace_index)
+		return -ENOMEM;
+
+	nr = auxtrace_index->nr;
+	auxtrace_index->entries[nr].file_offset = file_offset;
+	auxtrace_index->entries[nr].sz = event->header.size;
+	auxtrace_index->nr += 1;
+
+	return 0;
+}
+
+static int auxtrace_index__do_write(int fd,
+				    struct auxtrace_index *auxtrace_index)
+{
+	struct auxtrace_index_entry ent;
+	size_t i;
+
+	for (i = 0; i < auxtrace_index->nr; i++) {
+		ent.file_offset = auxtrace_index->entries[i].file_offset;
+		ent.sz = auxtrace_index->entries[i].sz;
+		if (writen(fd, &ent, sizeof(ent)) != sizeof(ent))
+			return -errno;
+	}
+	return 0;
+}
+
+int auxtrace_index__write(int fd, struct list_head *head)
+{
+	struct auxtrace_index *auxtrace_index;
+	u64 total = 0;
+	int err;
+
+	list_for_each_entry(auxtrace_index, head, list)
+		total += auxtrace_index->nr;
+
+	if (writen(fd, &total, sizeof(total)) != sizeof(total))
+		return -errno;
+
+	list_for_each_entry(auxtrace_index, head, list) {
+		err = auxtrace_index__do_write(fd, auxtrace_index);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int auxtrace_index__process_entry(int fd, struct list_head *head,
+					 bool needs_swap)
+{
+	struct auxtrace_index *auxtrace_index;
+	struct auxtrace_index_entry ent;
+	size_t nr;
+
+	if (readn(fd, &ent, sizeof(ent)) != sizeof(ent))
+		return -1;
+
+	auxtrace_index = auxtrace_index__last(head);
+	if (!auxtrace_index)
+		return -1;
+
+	nr = auxtrace_index->nr;
+	if (needs_swap) {
+		auxtrace_index->entries[nr].file_offset =
+						bswap_64(ent.file_offset);
+		auxtrace_index->entries[nr].sz = bswap_64(ent.sz);
+	} else {
+		auxtrace_index->entries[nr].file_offset = ent.file_offset;
+		auxtrace_index->entries[nr].sz = ent.sz;
+	}
+
+	auxtrace_index->nr = nr + 1;
+
+	return 0;
+}
+
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+			    bool needs_swap)
+{
+	struct list_head *head = &session->auxtrace_index;
+	u64 nr;
+
+	if (readn(fd, &nr, sizeof(u64)) != sizeof(u64))
+		return -1;
+
+	if (needs_swap)
+		nr = bswap_64(nr);
+
+	if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size)
+		return -1;
+
+	while (nr--) {
+		int err;
+
+		err = auxtrace_index__process_entry(fd, head, needs_swap);
+		if (err)
+			return -1;
+	}
+
+	return 0;
+}
+
+static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues,
+						struct perf_session *session,
+						struct auxtrace_index_entry *ent)
+{
+	return auxtrace_queues__add_indexed_event(queues, session,
+						  ent->file_offset, ent->sz);
+}
+
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+				   struct perf_session *session)
+{
+	struct auxtrace_index *auxtrace_index;
+	struct auxtrace_index_entry *ent;
+	size_t i;
+	int err;
+
+	list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) {
+		for (i = 0; i < auxtrace_index->nr; i++) {
+			ent = &auxtrace_index->entries[i];
+			err = auxtrace_queues__process_index_entry(queues,
+								   session,
+								   ent);
+			if (err)
+				return err;
+		}
+	}
+	return 0;
+}
+
 struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
 					      struct auxtrace_buffer *buffer)
 {
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 53b60a64a693..b9e4b9d66f5e 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -79,6 +79,32 @@ struct itrace_synth_opts {
 	enum itrace_period_type	period_type;
 };
 
+/**
+ * struct auxtrace_index_entry - indexes a AUX area tracing event within a
+ *                               perf.data file.
+ * @file_offset: offset within the perf.data file
+ * @sz: size of the event
+ */
+struct auxtrace_index_entry {
+	u64			file_offset;
+	u64			sz;
+};
+
+#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256
+
+/**
+ * struct auxtrace_index - index of AUX area tracing events within a perf.data
+ *                         file.
+ * @list: linking a number of arrays of entries
+ * @nr: number of entries
+ * @entries: array of entries
+ */
+struct auxtrace_index {
+	struct list_head	list;
+	size_t			nr;
+	struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT];
+};
+
 /**
  * struct auxtrace - session callbacks to allow AUX area data decoding.
  * @process_event: lets the decoder see all session events
@@ -321,6 +347,8 @@ int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 			       union perf_event *event, off_t data_offset,
 			       struct auxtrace_buffer **buffer_ptr);
 void auxtrace_queues__free(struct auxtrace_queues *queues);
+int auxtrace_queues__process_index(struct auxtrace_queues *queues,
+				   struct perf_session *session);
 struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue,
 					      struct auxtrace_buffer *buffer);
 void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd);
@@ -361,6 +389,13 @@ int auxtrace_record__info_fill(struct auxtrace_record *itr,
 void auxtrace_record__free(struct auxtrace_record *itr);
 u64 auxtrace_record__reference(struct auxtrace_record *itr);
 
+int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,
+				   off_t file_offset);
+int auxtrace_index__write(int fd, struct list_head *head);
+int auxtrace_index__process(int fd, u64 size, struct perf_session *session,
+			    bool needs_swap);
+void auxtrace_index__free(struct list_head *head);
+
 void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type,
 			  int code, int cpu, pid_t pid, pid_t tid, u64 ip,
 			  const char *msg);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 589c28028379..3f0d809d853a 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -869,11 +869,18 @@ static int write_branch_stack(int fd __maybe_unused,
 	return 0;
 }
 
-static int write_auxtrace(int fd __maybe_unused,
-			  struct perf_header *h __maybe_unused,
+static int write_auxtrace(int fd, struct perf_header *h,
 			  struct perf_evlist *evlist __maybe_unused)
 {
-	return 0;
+	struct perf_session *session;
+	int err;
+
+	session = container_of(h, struct perf_session, header);
+
+	err = auxtrace_index__write(fd, &session->auxtrace_index);
+	if (err < 0)
+		pr_err("Failed to write auxtrace index\n");
+	return err;
 }
 
 static void print_hostname(struct perf_header *ph, int fd __maybe_unused,
@@ -1834,6 +1841,22 @@ out_free:
 	return ret;
 }
 
+static int process_auxtrace(struct perf_file_section *section,
+			    struct perf_header *ph, int fd,
+			    void *data __maybe_unused)
+{
+	struct perf_session *session;
+	int err;
+
+	session = container_of(ph, struct perf_session, header);
+
+	err = auxtrace_index__process(fd, section->size, session,
+				      ph->needs_swap);
+	if (err < 0)
+		pr_err("Failed to process auxtrace index\n");
+	return err;
+}
+
 struct feature_ops {
 	int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist);
 	void (*print)(struct perf_header *h, int fd, FILE *fp);
@@ -1874,7 +1897,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = {
 	FEAT_OPA(HEADER_BRANCH_STACK,	branch_stack),
 	FEAT_OPP(HEADER_PMU_MAPPINGS,	pmu_mappings),
 	FEAT_OPP(HEADER_GROUP_DESC,	group_desc),
-	FEAT_OPA(HEADER_AUXTRACE,	auxtrace),
+	FEAT_OPP(HEADER_AUXTRACE,	auxtrace),
 };
 
 struct header_print_data {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 90fa5674ccb4..b6972b118bc2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -120,6 +120,7 @@ struct perf_session *perf_session__new(struct perf_data_file *file,
 
 	session->repipe = repipe;
 	session->tool   = tool;
+	INIT_LIST_HEAD(&session->auxtrace_index);
 	machines__init(&session->machines);
 	ordered_events__init(&session->ordered_events, ordered_events__deliver_event);
 
@@ -187,6 +188,7 @@ static void perf_session_env__delete(struct perf_session_env *env)
 void perf_session__delete(struct perf_session *session)
 {
 	auxtrace__free(session);
+	auxtrace_index__free(&session->auxtrace_index);
 	perf_session__destroy_kernel_maps(session);
 	perf_session__delete_threads(session);
 	perf_session_env__delete(&session->header.env);
diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h
index 9ed51353a3d7..b44afc75d1cc 100644
--- a/tools/perf/util/session.h
+++ b/tools/perf/util/session.h
@@ -24,6 +24,7 @@ struct perf_session {
 	struct perf_evlist	*evlist;
 	struct auxtrace		*auxtrace;
 	struct itrace_synth_opts *itrace_synth_opts;
+	struct list_head	auxtrace_index;
 	struct trace_event	tevent;
 	bool			repipe;
 	bool			one_mmap;

From cd10b289520577a56c5d369b9a2e7bbee5698a4b Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:26 +0300
Subject: [PATCH 078/305] perf tools: Hit all build ids when AUX area tracing

We need to include all buildids when a perf.data file contains AUX area
tracing data because we do not decode the trace for that purpose because
it would take too long.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-buildid-list.c |  9 +++++++++
 tools/perf/builtin-inject.c       |  8 +++++++-
 tools/perf/builtin-record.c       | 10 +++++++++-
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c
index feb420f74c2d..9fe93c8d4fcf 100644
--- a/tools/perf/builtin-buildid-list.c
+++ b/tools/perf/builtin-buildid-list.c
@@ -69,6 +69,15 @@ static int perf_session__list_build_ids(bool force, bool with_hits)
 	session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops);
 	if (session == NULL)
 		return -1;
+
+	/*
+	 * We take all buildids when the file contains AUX area tracing data
+	 * because we do not decode the trace because it would take too long.
+	 */
+	if (!perf_data_file__is_pipe(&file) &&
+	    perf_header__has_feat(&session->header, HEADER_AUXTRACE))
+		with_hits = false;
+
 	/*
 	 * in pipe-mode, the only way to get the buildids is to parse
 	 * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 6d4bbde066fd..cc905f1d3124 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -27,6 +27,7 @@ struct perf_inject {
 	struct perf_session	*session;
 	bool			build_ids;
 	bool			sched_stat;
+	bool			have_auxtrace;
 	const char		*input_name;
 	struct perf_data_file	output;
 	u64			bytes_written;
@@ -122,6 +123,8 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 						  tool);
 	int ret;
 
+	inject->have_auxtrace = true;
+
 	if (!inject->output.is_pipe) {
 		off_t offset;
 
@@ -508,9 +511,12 @@ static int __cmd_inject(struct perf_inject *inject)
 	ret = perf_session__process_events(session);
 
 	if (!file_out->is_pipe) {
-		if (inject->build_ids)
+		if (inject->build_ids) {
 			perf_header__set_feat(&session->header,
 					      HEADER_BUILD_ID);
+			if (inject->have_auxtrace)
+				dsos__hit_all(session);
+		}
 		/*
 		 * The AUX areas have been removed and replaced with
 		 * synthesized hardware events, so clear the feature flag.
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index c8c784c430b6..4d0ebbb944aa 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -620,8 +620,16 @@ out_child:
 	if (!err && !file->is_pipe) {
 		rec->session->header.data_size += rec->bytes_written;
 
-		if (!rec->no_buildid)
+		if (!rec->no_buildid) {
 			process_buildids(rec);
+			/*
+			 * We take all buildids when the file contains
+			 * AUX area tracing data because we do not decode the
+			 * trace because it would take too long.
+			 */
+			if (rec->opts.full_auxtrace)
+				dsos__hit_all(rec->session);
+		}
 		perf_session__write_header(rec->session, rec->evlist, fd, true);
 	}
 

From e31f0d017ea19fce9f12f084e1c750a0e3b43680 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:27 +0300
Subject: [PATCH 079/305] perf tools: Add build option NO_AUXTRACE to exclude
 AUX area tracing

Add build option NO_AUXTRACE to exclude compiling support for AUX area
tracing. Support for both recording and processing is excluded and by
implication any future additions such as Intel PT and Intel BTS will
also not be compiled in with this option.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf    |   2 +
 tools/perf/builtin-inject.c |  53 +++++++++------
 tools/perf/builtin-record.c |  13 ++++
 tools/perf/config/Makefile  |   5 ++
 tools/perf/tests/make       |   4 +-
 tools/perf/util/Build       |   2 +-
 tools/perf/util/auxtrace.h  | 128 ++++++++++++++++++++++++++++++++++++
 7 files changed, 186 insertions(+), 21 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index c43a20517591..03409cc02117 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -73,6 +73,8 @@ include config/utilities.mak
 # for CTF data format.
 #
 # Define NO_LZMA if you do not want to support compressed (xz) kernel modules
+#
+# Define NO_AUXTRACE if you do not want AUX area tracing support
 
 ifeq ($(srctree),)
 srctree := $(patsubst %/,%,$(dir $(shell pwd)))
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index cc905f1d3124..7c7a7eea900d 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -53,25 +53,6 @@ static int output_bytes(struct perf_inject *inject, void *buf, size_t sz)
 	return 0;
 }
 
-static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
-{
-	char buf[4096];
-	ssize_t ssz;
-	int ret;
-
-	while (size > 0) {
-		ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
-		if (ssz < 0)
-			return -errno;
-		ret = output_bytes(inject, buf, ssz);
-		if (ret)
-			return ret;
-		size -= ssz;
-	}
-
-	return 0;
-}
-
 static int perf_event__repipe_synth(struct perf_tool *tool,
 				    union perf_event *event)
 {
@@ -114,6 +95,27 @@ static int perf_event__repipe_attr(struct perf_tool *tool,
 	return perf_event__repipe_synth(tool, event);
 }
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+
+static int copy_bytes(struct perf_inject *inject, int fd, off_t size)
+{
+	char buf[4096];
+	ssize_t ssz;
+	int ret;
+
+	while (size > 0) {
+		ssz = read(fd, buf, min(size, (off_t)sizeof(buf)));
+		if (ssz < 0)
+			return -errno;
+		ret = output_bytes(inject, buf, ssz);
+		if (ret)
+			return ret;
+		size -= ssz;
+	}
+
+	return 0;
+}
+
 static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 				       union perf_event *event,
 				       struct perf_session *session
@@ -153,6 +155,19 @@ static s64 perf_event__repipe_auxtrace(struct perf_tool *tool,
 	return event->auxtrace.size;
 }
 
+#else
+
+static s64
+perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event __maybe_unused,
+			    struct perf_session *session __maybe_unused)
+{
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+#endif
+
 static int perf_event__repipe(struct perf_tool *tool,
 			      union perf_event *event,
 			      struct perf_sample *sample __maybe_unused,
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4d0ebbb944aa..dbb2c02cd706 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -112,6 +112,8 @@ out:
 	return rc;
 }
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+
 static int record__process_auxtrace(struct perf_tool *tool,
 				    union perf_event *event, void *data1,
 				    size_t len1, void *data2, size_t len2)
@@ -165,6 +167,17 @@ static int record__auxtrace_mmap_read(struct record *rec,
 	return 0;
 }
 
+#else
+
+static inline
+int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
+			       struct auxtrace_mmap *mm __maybe_unused)
+{
+	return 0;
+}
+
+#endif
+
 static volatile int done = 0;
 static volatile int signr = -1;
 static volatile int child_finished = 0;
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 59a98c643240..435b6ca85b1f 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -610,6 +610,11 @@ ifdef LIBBABELTRACE
   endif
 endif
 
+ifndef NO_AUXTRACE
+  $(call detected,CONFIG_AUXTRACE)
+  CFLAGS += -DHAVE_AUXTRACE_SUPPORT
+endif
+
 # Among the variables below, these:
 #   perfexecdir
 #   template_dir
diff --git a/tools/perf/tests/make b/tools/perf/tests/make
index 901ec9191359..65280d28662e 100644
--- a/tools/perf/tests/make
+++ b/tools/perf/tests/make
@@ -32,6 +32,7 @@ make_no_backtrace   := NO_BACKTRACE=1
 make_no_libnuma     := NO_LIBNUMA=1
 make_no_libaudit    := NO_LIBAUDIT=1
 make_no_libbionic   := NO_LIBBIONIC=1
+make_no_auxtrace    := NO_AUXTRACE=1
 make_tags           := tags
 make_cscope         := cscope
 make_help           := help
@@ -52,7 +53,7 @@ make_static         := LDFLAGS=-static
 make_minimal        := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1
 make_minimal        += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1
 make_minimal        += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1
-make_minimal        += NO_LIBDW_DWARF_UNWIND=1
+make_minimal        += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1
 
 # $(run) contains all available tests
 run := make_pure
@@ -74,6 +75,7 @@ run += make_no_backtrace
 run += make_no_libnuma
 run += make_no_libaudit
 run += make_no_libbionic
+run += make_no_auxtrace
 run += make_help
 run += make_doc
 run += make_perf_o
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 28af8e2825b3..d552203aead0 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -74,7 +74,7 @@ libperf-y += data.o
 libperf-$(CONFIG_X86) += tsc.o
 libperf-y += cloexec.o
 libperf-y += thread-stack.o
-libperf-y += auxtrace.o
+libperf-$(CONFIG_AUXTRACE) += auxtrace.o
 
 libperf-$(CONFIG_LIBELF) += symbol-elf.o
 libperf-$(CONFIG_LIBELF) += probe-event.o
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index b9e4b9d66f5e..77b46994e8ea 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -26,6 +26,7 @@
 #include "../perf.h"
 #include "event.h"
 #include "session.h"
+#include "debug.h"
 
 union perf_event;
 struct perf_session;
@@ -290,6 +291,8 @@ struct auxtrace_record {
 	int (*read_finish)(struct auxtrace_record *itr, int idx);
 };
 
+#ifdef HAVE_AUXTRACE_SUPPORT
+
 static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->userpg;
@@ -458,4 +461,129 @@ static inline void auxtrace__free(struct perf_session *session)
 	return session->auxtrace->free(session);
 }
 
+#else
+
+static inline struct auxtrace_record *
+auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
+		      int *err __maybe_unused)
+{
+	*err = 0;
+	return NULL;
+}
+
+static inline
+void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused)
+{
+}
+
+static inline int
+perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused,
+				     struct perf_tool *tool __maybe_unused,
+				     struct perf_session *session __maybe_unused,
+				     perf_event__handler_t process __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused,
+			     struct perf_evlist *evlist __maybe_unused,
+			     struct record_opts *opts __maybe_unused)
+{
+	return 0;
+}
+
+#define perf_event__process_auxtrace_info		0
+#define perf_event__process_auxtrace			0
+#define perf_event__process_auxtrace_error		0
+
+static inline
+void perf_session__auxtrace_error_inc(struct perf_session *session
+				      __maybe_unused,
+				      union perf_event *event
+				      __maybe_unused)
+{
+}
+
+static inline
+void events_stats__auxtrace_error_warn(const struct events_stats *stats
+				       __maybe_unused)
+{
+}
+
+static inline
+int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
+			    const char *str __maybe_unused,
+			    int unset __maybe_unused)
+{
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
+static inline
+int auxtrace__process_event(struct perf_session *session __maybe_unused,
+			    union perf_event *event __maybe_unused,
+			    struct perf_sample *sample __maybe_unused,
+			    struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+int auxtrace__flush_events(struct perf_session *session __maybe_unused,
+			   struct perf_tool *tool __maybe_unused)
+{
+	return 0;
+}
+
+static inline
+void auxtrace__free_events(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused)
+{
+}
+
+static inline
+void auxtrace__free(struct perf_session *session __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_index__write(int fd __maybe_unused,
+			  struct list_head *head __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+int auxtrace_index__process(int fd __maybe_unused,
+			    u64 size __maybe_unused,
+			    struct perf_session *session __maybe_unused,
+			    bool needs_swap __maybe_unused)
+{
+	return -EINVAL;
+}
+
+static inline
+void auxtrace_index__free(struct list_head *head __maybe_unused)
+{
+}
+
+int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
+			struct auxtrace_mmap_params *mp,
+			void *userpg, int fd);
+void auxtrace_mmap__munmap(struct auxtrace_mmap *mm);
+void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp,
+				off_t auxtrace_offset,
+				unsigned int auxtrace_pages,
+				bool auxtrace_overwrite);
+void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp,
+				   struct perf_evlist *evlist, int idx,
+				   bool per_cpu);
+
+#endif
+
 #endif

From 53c76b0e9ed5ed1f00371160f0be60402acba165 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:28 +0300
Subject: [PATCH 080/305] perf auxtrace: Add option to synthesize events for
 transactions

Add AUX area tracing option 'x' to synthesize events for transactions.
This will be used by Intel PT to synthesize an event record for each TSX
start, commit or abort.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-6-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-inject.txt | 9 +++++----
 tools/perf/Documentation/perf-report.txt | 9 +++++----
 tools/perf/Documentation/perf-script.txt | 9 +++++----
 tools/perf/util/auxtrace.c               | 5 ++++-
 tools/perf/util/auxtrace.h               | 2 ++
 5 files changed, 21 insertions(+), 13 deletions(-)

diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index bb1f655e1361..b876ae312699 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -52,11 +52,12 @@ OPTIONS
 		b	synthesize branches events
 		c	synthesize branches events (calls only)
 		r	synthesize branches events (returns only)
+		x	synthesize transactions events
 		e	synthesize error events
 		d	create a debug log
-		g	synthesize a call chain for instructions events
+		g	synthesize a call chain (use with i or x)
 
-	The default is all events i.e. the same as --itrace=ibe
+	The default is all events i.e. the same as --itrace=ibxe
 
 	In addition, the period (default 100000) for instructions events
 	can be specified in units of:
@@ -67,8 +68,8 @@ OPTIONS
 		us	microseconds
 		ns	nanoseconds (default)
 
-	Also the call chain size (default 16, max. 1024) for instructions
-	events can be specified.
+	Also the call chain size (default 16, max. 1024) for instructions or
+	transactions events can be specified.
 
 SEE ALSO
 --------
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index b771340d193d..27190ed06f9c 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -331,11 +331,12 @@ OPTIONS
 		b	synthesize branches events
 		c	synthesize branches events (calls only)
 		r	synthesize branches events (returns only)
+		x	synthesize transactions events
 		e	synthesize error events
 		d	create a debug log
-		g	synthesize a call chain for instructions events
+		g	synthesize a call chain (use with i or x)
 
-	The default is all events i.e. the same as --itrace=ibe
+	The default is all events i.e. the same as --itrace=ibxe
 
 	In addition, the period (default 100000) for instructions events
 	can be specified in units of:
@@ -346,8 +347,8 @@ OPTIONS
 		us	microseconds
 		ns	nanoseconds (default)
 
-	Also the call chain size (default 16, max. 1024) for instructions
-	events can be specified.
+	Also the call chain size (default 16, max. 1024) for instructions or
+	transactions events can be specified.
 
 	To disable decoding entirely, use --no-itrace.
 
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index b29cd2f17d13..c82df572fac2 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -235,11 +235,12 @@ OPTIONS
 		b	synthesize branches events
 		c	synthesize branches events (calls only)
 		r	synthesize branches events (returns only)
+		x	synthesize transactions events
 		e	synthesize error events
 		d	create a debug log
-		g	synthesize a call chain for instructions events
+		g	synthesize a call chain (use with i or x)
 
-	The default is all events i.e. the same as --itrace=ibe
+	The default is all events i.e. the same as --itrace=ibxe
 
 	In addition, the period (default 100000) for instructions events
 	can be specified in units of:
@@ -250,8 +251,8 @@ OPTIONS
 		us	microseconds
 		ns	nanoseconds (default)
 
-	Also the call chain size (default 16, max. 1024) for instructions
-	events can be specified.
+	Also the call chain size (default 16, max. 1024) for instructions or
+	transactions events can be specified.
 
 	To disable decoding entirely, use --no-itrace.
 
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 28ce134a61ad..129371048fc1 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -888,6 +888,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
 {
 	synth_opts->instructions = true;
 	synth_opts->branches = true;
+	synth_opts->transactions = true;
 	synth_opts->errors = true;
 	synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE;
 	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
@@ -960,6 +961,9 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 		case 'b':
 			synth_opts->branches = true;
 			break;
+		case 'x':
+			synth_opts->transactions = true;
+			break;
 		case 'e':
 			synth_opts->errors = true;
 			break;
@@ -975,7 +979,6 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
 			synth_opts->returns = true;
 			break;
 		case 'g':
-			synth_opts->instructions = true;
 			synth_opts->callchain = true;
 			synth_opts->callchain_sz =
 					PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 77b46994e8ea..8c6cbb123fe5 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -54,6 +54,7 @@ enum itrace_period_type {
  *          because 'perf inject' will write it out
  * @instructions: whether to synthesize 'instructions' events
  * @branches: whether to synthesize 'branches' events
+ * @transactions: whether to synthesize events for transactions
  * @errors: whether to synthesize decoder error events
  * @dont_decode: whether to skip decoding entirely
  * @log: write a decoding log
@@ -69,6 +70,7 @@ struct itrace_synth_opts {
 	bool			inject;
 	bool			instructions;
 	bool			branches;
+	bool			transactions;
 	bool			errors;
 	bool			dont_decode;
 	bool			log;

From 4a96f7a02eb52b1b618ab610e689bd82770f00b0 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:29 +0300
Subject: [PATCH 081/305] perf tools: Add support for PERF_RECORD_AUX

Add support for the PERF_RECORD_AUX event type.

PERF_RECORD_AUX is a new kernel event that records when new data lands
in the AUX buffer. Currently it is assumed that AUX data follows the
same ring buffer conventions used by the perf events buffer, and
consequently the AUX event is not processed during recording.

It is processed during session processing so that the information in the
'flags' member is made available.

The format of PERF_RECORD_AUX is outlined in the linux/perf_events.h
header file. The 'flags' are also enumerated.

Intel PT and Intel BTS use the flag named PERF_AUX_FLAG_TRUNCATED to
determine if data has been lost because the buffer became full as perf
was not able to empty it fast enough.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-7-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-inject.c |  1 +
 tools/perf/util/event.c     | 21 +++++++++++++++++++++
 tools/perf/util/event.h     | 13 +++++++++++++
 tools/perf/util/machine.c   | 10 ++++++++++
 tools/perf/util/machine.h   |  2 ++
 tools/perf/util/session.c   | 15 +++++++++++++++
 tools/perf/util/tool.h      |  1 +
 7 files changed, 63 insertions(+)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 7c7a7eea900d..3e8f730c7dee 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -558,6 +558,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 			.fork		= perf_event__repipe,
 			.exit		= perf_event__repipe,
 			.lost		= perf_event__repipe,
+			.aux		= perf_event__repipe,
 			.read		= perf_event__repipe_sample,
 			.throttle	= perf_event__repipe,
 			.unthrottle	= perf_event__repipe,
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 32323e5d7b25..724ffde8e7d7 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -23,6 +23,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_FORK]			= "FORK",
 	[PERF_RECORD_READ]			= "READ",
 	[PERF_RECORD_SAMPLE]			= "SAMPLE",
+	[PERF_RECORD_AUX]			= "AUX",
 	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
 	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
 	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
@@ -695,6 +696,14 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused,
 	return machine__process_lost_event(machine, event, sample);
 }
 
+int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
+			    union perf_event *event,
+			    struct perf_sample *sample __maybe_unused,
+			    struct machine *machine)
+{
+	return machine__process_aux_event(machine, event);
+}
+
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
 {
 	return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -758,6 +767,15 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused,
 	return machine__process_exit_event(machine, event, sample);
 }
 
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n",
+		       event->aux.aux_offset, event->aux.aux_size,
+		       event->aux.flags,
+		       event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "",
+		       event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "");
+}
+
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 {
 	size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -777,6 +795,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 	case PERF_RECORD_MMAP2:
 		ret += perf_event__fprintf_mmap2(event, fp);
 		break;
+	case PERF_RECORD_AUX:
+		ret += perf_event__fprintf_aux(event, fp);
+		break;
 	default:
 		ret += fprintf(fp, "\n");
 	}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 80e9f5969a39..b3c350ed4768 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -323,6 +323,13 @@ struct auxtrace_error_event {
 	char msg[MAX_AUXTRACE_ERROR_MSG];
 };
 
+struct aux_event {
+	struct perf_event_header header;
+	u64	aux_offset;
+	u64	aux_size;
+	u64	flags;
+};
+
 union perf_event {
 	struct perf_event_header	header;
 	struct mmap_event		mmap;
@@ -341,6 +348,7 @@ union perf_event {
 	struct auxtrace_info_event	auxtrace_info;
 	struct auxtrace_event		auxtrace;
 	struct auxtrace_error_event	auxtrace_error;
+	struct aux_event		aux;
 };
 
 void perf_event__print_totals(void);
@@ -376,6 +384,10 @@ int perf_event__process_lost(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_sample *sample,
 			     struct machine *machine);
+int perf_event__process_aux(struct perf_tool *tool,
+			    union perf_event *event,
+			    struct perf_sample *sample,
+			    struct machine *machine);
 int perf_event__process_mmap(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_sample *sample,
@@ -433,6 +445,7 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
 u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 527e032e24f6..a7ad51100b55 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -486,6 +486,14 @@ machine__module_dso(struct machine *machine, struct kmod_path *m,
 	return dso;
 }
 
+int machine__process_aux_event(struct machine *machine __maybe_unused,
+			       union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_aux(event, stdout);
+	return 0;
+}
+
 struct map *machine__new_module(struct machine *machine, u64 start,
 				const char *filename)
 {
@@ -1331,6 +1339,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
 		ret = machine__process_exit_event(machine, event, sample); break;
 	case PERF_RECORD_LOST:
 		ret = machine__process_lost_event(machine, event, sample); break;
+	case PERF_RECORD_AUX:
+		ret = machine__process_aux_event(machine, event); break;
 	default:
 		ret = -1;
 		break;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 6d64cedb9d1e..fc5432aa66b5 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -81,6 +81,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 				struct perf_sample *sample);
 int machine__process_lost_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);
+int machine__process_aux_event(struct machine *machine,
+			       union perf_event *event);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);
 int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index b6972b118bc2..0b4646cd5245 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -325,6 +325,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->exit = process_event_stub;
 	if (tool->lost == NULL)
 		tool->lost = perf_event__process_lost;
+	if (tool->aux == NULL)
+		tool->aux = perf_event__process_aux;
 	if (tool->read == NULL)
 		tool->read = process_event_sample_stub;
 	if (tool->throttle == NULL)
@@ -443,6 +445,16 @@ static void perf_event__read_swap(union perf_event *event, bool sample_id_all)
 		swap_sample_id_all(event, &event->read + 1);
 }
 
+static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
+{
+	event->aux.aux_offset = bswap_64(event->aux.aux_offset);
+	event->aux.aux_size   = bswap_64(event->aux.aux_size);
+	event->aux.flags      = bswap_64(event->aux.flags);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->aux + 1);
+}
+
 static void perf_event__throttle_swap(union perf_event *event,
 				      bool sample_id_all)
 {
@@ -580,6 +592,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_THROTTLE]		  = perf_event__throttle_swap,
 	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
 	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
+	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1029,6 +1042,8 @@ static int machines__deliver_event(struct machines *machines,
 		return tool->throttle(tool, event, sample, machine);
 	case PERF_RECORD_UNTHROTTLE:
 		return tool->unthrottle(tool, event, sample, machine);
+	case PERF_RECORD_AUX:
+		return tool->aux(tool, event, sample, machine);
 	default:
 		++evlist->stats.nr_unknown_events;
 		return -1;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 20afe19a8eb0..8288caf0836e 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -43,6 +43,7 @@ struct perf_tool {
 			fork,
 			exit,
 			lost,
+			aux,
 			throttle,
 			unthrottle;
 	event_attr_op	attr;

From 0ad21f6869222fd7fd7c63f02febea082e801fc2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:30 +0300
Subject: [PATCH 082/305] perf tools: Add support for PERF_RECORD_ITRACE_START

Add support for the PERF_RECORD_ITRACE_START event type.  This event can
be used to determine the pid and tid that are running when Instruction
Tracing starts.  Generally that information would come from a
sched_switch event but, at the start, no sched_switch events may yet
have been recorded.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-8-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-inject.c |  1 +
 tools/perf/util/event.c     | 18 ++++++++++++++++++
 tools/perf/util/event.h     | 11 +++++++++++
 tools/perf/util/machine.c   | 11 +++++++++++
 tools/perf/util/machine.h   |  2 ++
 tools/perf/util/session.c   | 15 +++++++++++++++
 tools/perf/util/tool.h      |  1 +
 7 files changed, 59 insertions(+)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 3e8f730c7dee..d6a47e854b2b 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -559,6 +559,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 			.exit		= perf_event__repipe,
 			.lost		= perf_event__repipe,
 			.aux		= perf_event__repipe,
+			.itrace_start	= perf_event__repipe,
 			.read		= perf_event__repipe_sample,
 			.throttle	= perf_event__repipe,
 			.unthrottle	= perf_event__repipe,
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 724ffde8e7d7..db526091f580 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -24,6 +24,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_READ]			= "READ",
 	[PERF_RECORD_SAMPLE]			= "SAMPLE",
 	[PERF_RECORD_AUX]			= "AUX",
+	[PERF_RECORD_ITRACE_START]		= "ITRACE_START",
 	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
 	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
 	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
@@ -704,6 +705,14 @@ int perf_event__process_aux(struct perf_tool *tool __maybe_unused,
 	return machine__process_aux_event(machine, event);
 }
 
+int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample __maybe_unused,
+				     struct machine *machine)
+{
+	return machine__process_itrace_start_event(machine, event);
+}
+
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
 {
 	return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
@@ -776,6 +785,12 @@ size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp)
 		       event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : "");
 }
 
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp)
+{
+	return fprintf(fp, " pid: %u tid: %u\n",
+		       event->itrace_start.pid, event->itrace_start.tid);
+}
+
 size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 {
 	size_t ret = fprintf(fp, "PERF_RECORD_%s",
@@ -798,6 +813,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp)
 	case PERF_RECORD_AUX:
 		ret += perf_event__fprintf_aux(event, fp);
 		break;
+	case PERF_RECORD_ITRACE_START:
+		ret += perf_event__fprintf_itrace_start(event, fp);
+		break;
 	default:
 		ret += fprintf(fp, "\n");
 	}
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index b3c350ed4768..7eecd5e23d77 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -330,6 +330,11 @@ struct aux_event {
 	u64	flags;
 };
 
+struct itrace_start_event {
+	struct perf_event_header header;
+	u32 pid, tid;
+};
+
 union perf_event {
 	struct perf_event_header	header;
 	struct mmap_event		mmap;
@@ -349,6 +354,7 @@ union perf_event {
 	struct auxtrace_event		auxtrace;
 	struct auxtrace_error_event	auxtrace_error;
 	struct aux_event		aux;
+	struct itrace_start_event	itrace_start;
 };
 
 void perf_event__print_totals(void);
@@ -388,6 +394,10 @@ int perf_event__process_aux(struct perf_tool *tool,
 			    union perf_event *event,
 			    struct perf_sample *sample,
 			    struct machine *machine);
+int perf_event__process_itrace_start(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine);
 int perf_event__process_mmap(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_sample *sample,
@@ -446,6 +456,7 @@ size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_task(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp);
+size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf(union perf_event *event, FILE *fp);
 
 u64 kallsyms__get_function_start(const char *kallsyms_filename,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index a7ad51100b55..2f471105efb1 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -494,6 +494,14 @@ int machine__process_aux_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
+int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
+					union perf_event *event)
+{
+	if (dump_trace)
+		perf_event__fprintf_itrace_start(event, stdout);
+	return 0;
+}
+
 struct map *machine__new_module(struct machine *machine, u64 start,
 				const char *filename)
 {
@@ -1341,6 +1349,9 @@ int machine__process_event(struct machine *machine, union perf_event *event,
 		ret = machine__process_lost_event(machine, event, sample); break;
 	case PERF_RECORD_AUX:
 		ret = machine__process_aux_event(machine, event); break;
+	case PERF_RECORD_ITRACE_START:
+		ret = machine__process_itrace_start_event(machine, event);
+		break;
 	default:
 		ret = -1;
 		break;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index fc5432aa66b5..1d992961d5d1 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -83,6 +83,8 @@ int machine__process_lost_event(struct machine *machine, union perf_event *event
 				struct perf_sample *sample);
 int machine__process_aux_event(struct machine *machine,
 			       union perf_event *event);
+int machine__process_itrace_start_event(struct machine *machine,
+					union perf_event *event);
 int machine__process_mmap_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);
 int machine__process_mmap2_event(struct machine *machine, union perf_event *event,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 0b4646cd5245..e722107f932a 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -327,6 +327,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->lost = perf_event__process_lost;
 	if (tool->aux == NULL)
 		tool->aux = perf_event__process_aux;
+	if (tool->itrace_start == NULL)
+		tool->itrace_start = perf_event__process_itrace_start;
 	if (tool->read == NULL)
 		tool->read = process_event_sample_stub;
 	if (tool->throttle == NULL)
@@ -455,6 +457,16 @@ static void perf_event__aux_swap(union perf_event *event, bool sample_id_all)
 		swap_sample_id_all(event, &event->aux + 1);
 }
 
+static void perf_event__itrace_start_swap(union perf_event *event,
+					  bool sample_id_all)
+{
+	event->itrace_start.pid	 = bswap_32(event->itrace_start.pid);
+	event->itrace_start.tid	 = bswap_32(event->itrace_start.tid);
+
+	if (sample_id_all)
+		swap_sample_id_all(event, &event->itrace_start + 1);
+}
+
 static void perf_event__throttle_swap(union perf_event *event,
 				      bool sample_id_all)
 {
@@ -593,6 +605,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_UNTHROTTLE]	  = perf_event__throttle_swap,
 	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
 	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
+	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1044,6 +1057,8 @@ static int machines__deliver_event(struct machines *machines,
 		return tool->unthrottle(tool, event, sample, machine);
 	case PERF_RECORD_AUX:
 		return tool->aux(tool, event, sample, machine);
+	case PERF_RECORD_ITRACE_START:
+		return tool->itrace_start(tool, event, sample, machine);
 	default:
 		++evlist->stats.nr_unknown_events;
 		return -1;
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 8288caf0836e..7f282ad1d2bd 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -44,6 +44,7 @@ struct perf_tool {
 			exit,
 			lost,
 			aux,
+			itrace_start,
 			throttle,
 			unthrottle;
 	event_attr_op	attr;

From d20031bb63dd6dde35feb7845eaf17c620eef120 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:31 +0300
Subject: [PATCH 083/305] perf tools: Add AUX area tracing Snapshot Mode

Add support for making snapshots of AUX area tracing data.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-9-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/perf.h          |  3 ++
 tools/perf/util/auxtrace.c | 85 +++++++++++++++++++++++++++++++++-----
 tools/perf/util/auxtrace.h | 41 ++++++++++++++++++
 3 files changed, 119 insertions(+), 10 deletions(-)

diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index 5042093d5213..aa79fb8a16d4 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -55,6 +55,7 @@ struct record_opts {
 	bool	     sample_intr_regs;
 	bool	     running_time;
 	bool	     full_auxtrace;
+	bool	     auxtrace_snapshot_mode;
 	unsigned int freq;
 	unsigned int mmap_pages;
 	unsigned int auxtrace_mmap_pages;
@@ -62,6 +63,8 @@ struct record_opts {
 	u64          branch_stack;
 	u64	     default_interval;
 	u64	     user_interval;
+	size_t	     auxtrace_snapshot_size;
+	const char   *auxtrace_snapshot_opts;
 	bool	     sample_transaction;
 	unsigned     initial_delay;
 	bool         use_clockid;
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 129371048fc1..df66966cfde7 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -504,6 +504,29 @@ void auxtrace_record__free(struct auxtrace_record *itr)
 		itr->free(itr);
 }
 
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr)
+{
+	if (itr && itr->snapshot_start)
+		return itr->snapshot_start(itr);
+	return 0;
+}
+
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr)
+{
+	if (itr && itr->snapshot_finish)
+		return itr->snapshot_finish(itr);
+	return 0;
+}
+
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+				   struct auxtrace_mmap *mm,
+				   unsigned char *data, u64 *head, u64 *old)
+{
+	if (itr && itr->find_snapshot)
+		return itr->find_snapshot(itr, idx, mm, data, head, old);
+	return 0;
+}
+
 int auxtrace_record__options(struct auxtrace_record *itr,
 			     struct perf_evlist *evlist,
 			     struct record_opts *opts)
@@ -520,6 +543,19 @@ u64 auxtrace_record__reference(struct auxtrace_record *itr)
 	return 0;
 }
 
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+				    struct record_opts *opts, const char *str)
+{
+	if (!str)
+		return 0;
+
+	if (itr)
+		return itr->parse_snapshot_options(itr, opts, str);
+
+	pr_err("No AUX area tracing to snapshot\n");
+	return -EINVAL;
+}
+
 struct auxtrace_record *__weak
 auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err)
 {
@@ -1077,16 +1113,26 @@ int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused,
 	return 0;
 }
 
-int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
-			struct perf_tool *tool, process_auxtrace_t fn)
+static int __auxtrace_mmap__read(struct auxtrace_mmap *mm,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 bool snapshot, size_t snapshot_size)
 {
-	u64 head = auxtrace_mmap__read_head(mm);
-	u64 old = mm->prev, offset, ref;
+	u64 head, old = mm->prev, offset, ref;
 	unsigned char *data = mm->base;
 	size_t size, head_off, old_off, len1, len2, padding;
 	union perf_event ev;
 	void *data1, *data2;
 
+	if (snapshot) {
+		head = auxtrace_mmap__read_snapshot_head(mm);
+		if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data,
+						   &head, &old))
+			return -1;
+	} else {
+		head = auxtrace_mmap__read_head(mm);
+	}
+
 	if (old == head)
 		return 0;
 
@@ -1106,6 +1152,9 @@ int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
 	else
 		size = mm->len - (old_off - head_off);
 
+	if (snapshot && size > snapshot_size)
+		size = snapshot_size;
+
 	ref = auxtrace_record__reference(itr);
 
 	if (head > old || size <= head || mm->mask) {
@@ -1153,18 +1202,34 @@ int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
 
 	mm->prev = head;
 
-	auxtrace_mmap__write_tail(mm, head);
-	if (itr->read_finish) {
-		int err;
+	if (!snapshot) {
+		auxtrace_mmap__write_tail(mm, head);
+		if (itr->read_finish) {
+			int err;
 
-		err = itr->read_finish(itr, mm->idx);
-		if (err < 0)
-			return err;
+			err = itr->read_finish(itr, mm->idx);
+			if (err < 0)
+				return err;
+		}
 	}
 
 	return 1;
 }
 
+int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
+			struct perf_tool *tool, process_auxtrace_t fn)
+{
+	return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0);
+}
+
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 size_t snapshot_size)
+{
+	return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size);
+}
+
 /**
  * struct auxtrace_cache - hash table to implement a cache
  * @hashtable: the hashtable
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index 8c6cbb123fe5..c2c677e62733 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -276,6 +276,10 @@ struct auxtrace_mmap_params {
  * @info_priv_size: return the size of the private data in auxtrace_info_event
  * @info_fill: fill-in the private data in auxtrace_info_event
  * @free: free this auxtrace record structure
+ * @snapshot_start: starting a snapshot
+ * @snapshot_finish: finishing a snapshot
+ * @find_snapshot: find data to snapshot within auxtrace mmap
+ * @parse_snapshot_options: parse snapshot options
  * @reference: provide a 64-bit reference number for auxtrace_event
  * @read_finish: called after reading from an auxtrace mmap
  */
@@ -289,12 +293,36 @@ struct auxtrace_record {
 			 struct auxtrace_info_event *auxtrace_info,
 			 size_t priv_size);
 	void (*free)(struct auxtrace_record *itr);
+	int (*snapshot_start)(struct auxtrace_record *itr);
+	int (*snapshot_finish)(struct auxtrace_record *itr);
+	int (*find_snapshot)(struct auxtrace_record *itr, int idx,
+			     struct auxtrace_mmap *mm, unsigned char *data,
+			     u64 *head, u64 *old);
+	int (*parse_snapshot_options)(struct auxtrace_record *itr,
+				      struct record_opts *opts,
+				      const char *str);
 	u64 (*reference)(struct auxtrace_record *itr);
 	int (*read_finish)(struct auxtrace_record *itr, int idx);
 };
 
 #ifdef HAVE_AUXTRACE_SUPPORT
 
+/*
+ * In snapshot mode the mmapped page is read-only which makes using
+ * __sync_val_compare_and_swap() problematic.  However, snapshot mode expects
+ * the buffer is not updated while the snapshot is made (e.g. Intel PT disables
+ * the event) so there is not a race anyway.
+ */
+static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm)
+{
+	struct perf_event_mmap_page *pc = mm->userpg;
+	u64 head = ACCESS_ONCE(pc->aux_head);
+
+	/* Ensure all reads are done after we read the head */
+	rmb();
+	return head;
+}
+
 static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm)
 {
 	struct perf_event_mmap_page *pc = mm->userpg;
@@ -346,6 +374,11 @@ typedef int (*process_auxtrace_t)(struct perf_tool *tool,
 int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr,
 			struct perf_tool *tool, process_auxtrace_t fn);
 
+int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm,
+				 struct auxtrace_record *itr,
+				 struct perf_tool *tool, process_auxtrace_t fn,
+				 size_t snapshot_size);
+
 int auxtrace_queues__init(struct auxtrace_queues *queues);
 int auxtrace_queues__add_event(struct auxtrace_queues *queues,
 			       struct perf_session *session,
@@ -383,6 +416,9 @@ void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key);
 struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
 					      int *err);
 
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr,
+				    struct record_opts *opts,
+				    const char *str);
 int auxtrace_record__options(struct auxtrace_record *itr,
 			     struct perf_evlist *evlist,
 			     struct record_opts *opts);
@@ -392,6 +428,11 @@ int auxtrace_record__info_fill(struct auxtrace_record *itr,
 			       struct auxtrace_info_event *auxtrace_info,
 			       size_t priv_size);
 void auxtrace_record__free(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr);
+int auxtrace_record__snapshot_finish(struct auxtrace_record *itr);
+int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx,
+				   struct auxtrace_mmap *mm,
+				   unsigned char *data, u64 *head, u64 *old);
 u64 auxtrace_record__reference(struct auxtrace_record *itr);
 
 int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event,

From 2dd6d8a10a942c5fd8950d1046e172237d009c8e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 30 Apr 2015 17:37:32 +0300
Subject: [PATCH 084/305] perf record: Add AUX area tracing Snapshot Mode
 support

Add a new option and support for Instruction Tracing Snapshot Mode.
When the new option is selected, no AUX area tracing data is captured
until a signal (SIGUSR2) is received.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1430404667-10593-10-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt |   7 ++
 tools/perf/builtin-record.c              | 152 +++++++++++++++++++----
 tools/perf/util/auxtrace.h               |  11 ++
 tools/perf/util/parse-options.h          |   4 +
 4 files changed, 149 insertions(+), 25 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 76a141e326da..57dd57bcef95 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -259,6 +259,13 @@ records. See clock_gettime(). In particular CLOCK_MONOTONIC and
 CLOCK_MONOTONIC_RAW are supported, some events might also allow
 CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI.
 
+-S::
+--snapshot::
+Select AUX area tracing Snapshot Mode. This option is valid only with an
+AUX area tracing event. Optionally the number of bytes to capture per
+snapshot can be specified. In Snapshot Mode, trace data is captured only when
+signal SIGUSR2 is received.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index dbb2c02cd706..5dfe91395617 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -112,6 +112,32 @@ out:
 	return rc;
 }
 
+static volatile int done;
+static volatile int signr = -1;
+static volatile int child_finished;
+static volatile int auxtrace_snapshot_enabled;
+static volatile int auxtrace_snapshot_err;
+static volatile int auxtrace_record__snapshot_started;
+
+static void sig_handler(int sig)
+{
+	if (sig == SIGCHLD)
+		child_finished = 1;
+	else
+		signr = sig;
+
+	done = 1;
+}
+
+static void record__sig_exit(void)
+{
+	if (signr == -1)
+		return;
+
+	signal(signr, SIG_DFL);
+	raise(signr);
+}
+
 #ifdef HAVE_AUXTRACE_SUPPORT
 
 static int record__process_auxtrace(struct perf_tool *tool,
@@ -167,6 +193,56 @@ static int record__auxtrace_mmap_read(struct record *rec,
 	return 0;
 }
 
+static int record__auxtrace_mmap_read_snapshot(struct record *rec,
+					       struct auxtrace_mmap *mm)
+{
+	int ret;
+
+	ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool,
+					   record__process_auxtrace,
+					   rec->opts.auxtrace_snapshot_size);
+	if (ret < 0)
+		return ret;
+
+	if (ret)
+		rec->samples++;
+
+	return 0;
+}
+
+static int record__auxtrace_read_snapshot_all(struct record *rec)
+{
+	int i;
+	int rc = 0;
+
+	for (i = 0; i < rec->evlist->nr_mmaps; i++) {
+		struct auxtrace_mmap *mm =
+				&rec->evlist->mmap[i].auxtrace_mmap;
+
+		if (!mm->base)
+			continue;
+
+		if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) {
+			rc = -1;
+			goto out;
+		}
+	}
+out:
+	return rc;
+}
+
+static void record__read_auxtrace_snapshot(struct record *rec)
+{
+	pr_debug("Recording AUX area tracing snapshot\n");
+	if (record__auxtrace_read_snapshot_all(rec) < 0) {
+		auxtrace_snapshot_err = -1;
+	} else {
+		auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr);
+		if (!auxtrace_snapshot_err)
+			auxtrace_snapshot_enabled = 1;
+	}
+}
+
 #else
 
 static inline
@@ -176,31 +252,19 @@ int record__auxtrace_mmap_read(struct record *rec __maybe_unused,
 	return 0;
 }
 
+static inline
+void record__read_auxtrace_snapshot(struct record *rec __maybe_unused)
+{
+}
+
+static inline
+int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused)
+{
+	return 0;
+}
+
 #endif
 
-static volatile int done = 0;
-static volatile int signr = -1;
-static volatile int child_finished = 0;
-
-static void sig_handler(int sig)
-{
-	if (sig == SIGCHLD)
-		child_finished = 1;
-	else
-		signr = sig;
-
-	done = 1;
-}
-
-static void record__sig_exit(void)
-{
-	if (signr == -1)
-		return;
-
-	signal(signr, SIG_DFL);
-	raise(signr);
-}
-
 static int record__open(struct record *rec)
 {
 	char msg[512];
@@ -238,7 +302,8 @@ try_again:
 	}
 
 	if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false,
-				 opts->auxtrace_mmap_pages, false) < 0) {
+				 opts->auxtrace_mmap_pages,
+				 opts->auxtrace_snapshot_mode) < 0) {
 		if (errno == EPERM) {
 			pr_err("Permission error mapping pages.\n"
 			       "Consider increasing "
@@ -349,7 +414,7 @@ static int record__mmap_read_all(struct record *rec)
 			}
 		}
 
-		if (mm->base &&
+		if (mm->base && !rec->opts.auxtrace_snapshot_mode &&
 		    record__auxtrace_mmap_read(rec, mm) != 0) {
 			rc = -1;
 			goto out;
@@ -404,6 +469,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused,
 	child_finished = 1;
 }
 
+static void snapshot_sig_handler(int sig);
+
 static int __cmd_record(struct record *rec, int argc, const char **argv)
 {
 	int err;
@@ -424,6 +491,10 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	signal(SIGCHLD, sig_handler);
 	signal(SIGINT, sig_handler);
 	signal(SIGTERM, sig_handler);
+	if (rec->opts.auxtrace_snapshot_mode)
+		signal(SIGUSR2, snapshot_sig_handler);
+	else
+		signal(SIGUSR2, SIG_IGN);
 
 	session = perf_session__new(file, false, tool);
 	if (session == NULL) {
@@ -563,14 +634,27 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		perf_evlist__enable(rec->evlist);
 	}
 
+	auxtrace_snapshot_enabled = 1;
 	for (;;) {
 		int hits = rec->samples;
 
 		if (record__mmap_read_all(rec) < 0) {
+			auxtrace_snapshot_enabled = 0;
 			err = -1;
 			goto out_child;
 		}
 
+		if (auxtrace_record__snapshot_started) {
+			auxtrace_record__snapshot_started = 0;
+			if (!auxtrace_snapshot_err)
+				record__read_auxtrace_snapshot(rec);
+			if (auxtrace_snapshot_err) {
+				pr_err("AUX area tracing snapshot failed\n");
+				err = -1;
+				goto out_child;
+			}
+		}
+
 		if (hits == rec->samples) {
 			if (done || draining)
 				break;
@@ -593,10 +677,12 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 		 * disable events in this case.
 		 */
 		if (done && !disabled && !target__none(&opts->target)) {
+			auxtrace_snapshot_enabled = 0;
 			perf_evlist__disable(rec->evlist);
 			disabled = true;
 		}
 	}
+	auxtrace_snapshot_enabled = 0;
 
 	if (forks && workload_exec_errno) {
 		char msg[STRERR_BUFSIZE];
@@ -1068,6 +1154,8 @@ struct option __record_options[] = {
 	OPT_CALLBACK('k', "clockid", &record.opts,
 	"clockid", "clockid to use for events, see clock_gettime()",
 	parse_clockid),
+	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
+			  "opts", "AUX area tracing Snapshot Mode", ""),
 	OPT_END()
 };
 
@@ -1102,6 +1190,11 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 			return err;
 	}
 
+	err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts,
+					      rec->opts.auxtrace_snapshot_opts);
+	if (err)
+		return err;
+
 	err = -ENOMEM;
 
 	symbol__init(NULL);
@@ -1165,3 +1258,12 @@ out_symbol_exit:
 	auxtrace_record__free(rec->itr);
 	return err;
 }
+
+static void snapshot_sig_handler(int sig __maybe_unused)
+{
+	if (!auxtrace_snapshot_enabled)
+		return;
+	auxtrace_snapshot_enabled = 0;
+	auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr);
+	auxtrace_record__snapshot_started = 1;
+}
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index c2c677e62733..a171abbe7301 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -563,6 +563,17 @@ int itrace_parse_synth_opts(const struct option *opt __maybe_unused,
 	return -EINVAL;
 }
 
+static inline
+int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused,
+				    struct record_opts *opts __maybe_unused,
+				    const char *str)
+{
+	if (!str)
+		return 0;
+	pr_err("AUX area tracing not supported\n");
+	return -EINVAL;
+}
+
 static inline
 int auxtrace__process_event(struct perf_session *session __maybe_unused,
 			    union perf_event *event __maybe_unused,
diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h
index 59561fd86278..367d8b816cc7 100644
--- a/tools/perf/util/parse-options.h
+++ b/tools/perf/util/parse-options.h
@@ -123,6 +123,10 @@ struct option {
 #define OPT_LONG(s, l, v, h)        { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) }
 #define OPT_U64(s, l, v, h)         { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) }
 #define OPT_STRING(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) }
+#define OPT_STRING_OPTARG(s, l, v, a, h, d) \
+	{ .type = OPTION_STRING,  .short_name = (s), .long_name = (l), \
+	  .value = check_vtype(v, const char **), (a), .help = (h), \
+	  .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) }
 #define OPT_STRING_NOEMPTY(s, l, v, a, h)   { .type = OPTION_STRING,  .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY}
 #define OPT_DATE(s, l, v, h) \
 	{ .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb }

From 307a464b2342a502da492f0ada8cefd6ab7f63a7 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 5 May 2015 11:29:48 +0900
Subject: [PATCH 085/305] perf probe: Allow to use filter on --del command

This makes perf-probe --del option to accept filter rules
not only simple glob pattern. This simplifies the code and
improve the flexibility.

E.g. if we remove 2 different pattern events, we need 2
     -d options.
  ----
  # ./perf probe -d vfs\* -d malloc
  Removed event: probe_libc:malloc
  Removed event: probe:vfs_read
  ----

  This allows you to joint the 2 patterns with '|'.

  ----
  # ./perf probe -d 'vfs*|malloc'
  Removed event: probe:vfs_read
  Removed event: probe_libc:malloc
  ----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150505022948.23399.4197.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c    |  16 ++----
 tools/perf/util/probe-event.c | 102 +++++++++++-----------------------
 tools/perf/util/probe-event.h |   2 +-
 3 files changed, 39 insertions(+), 81 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index feca3165f957..1f41b4eb0c5a 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -55,12 +55,12 @@ static struct {
 	bool show_ext_vars;
 	bool show_funcs;
 	bool mod_events;
+	bool del_events;
 	bool uprobes;
 	bool quiet;
 	bool target_used;
 	int nevents;
 	struct perf_probe_event events[MAX_PROBES];
-	struct strlist *dellist;
 	struct line_range line_range;
 	char *target;
 	int max_probe_points;
@@ -195,10 +195,8 @@ static int opt_del_probe_event(const struct option *opt __maybe_unused,
 			       const char *str, int unset __maybe_unused)
 {
 	if (str) {
-		params.mod_events = true;
-		if (!params.dellist)
-			params.dellist = strlist__new(true, NULL);
-		strlist__add(params.dellist, str);
+		params.del_events = true;
+		return params_add_filter(str);
 	}
 	return 0;
 }
@@ -313,8 +311,6 @@ static void cleanup_params(void)
 
 	for (i = 0; i < params.nevents; i++)
 		clear_perf_probe_event(params.events + i);
-	if (params.dellist)
-		strlist__delete(params.dellist);
 	line_range__clear(&params.line_range);
 	free(params.target);
 	if (params.filter)
@@ -454,7 +450,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (params.max_probe_points == 0)
 		params.max_probe_points = MAX_PROBES;
 
-	if ((!params.nevents && !params.dellist && !params.list_events &&
+	if ((!params.nevents && !params.del_events && !params.list_events &&
 	     !params.show_lines && !params.show_funcs))
 		usage_with_options(probe_usage, options);
 
@@ -514,8 +510,8 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	}
 #endif
 
-	if (params.dellist) {
-		ret = del_perf_probe_events(params.dellist);
+	if (params.del_events) {
+		ret = del_perf_probe_events(params.filter);
 		if (ret < 0) {
 			pr_err_with_code("  Error: Failed to delete events.", ret);
 			return ret;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 5995d81d2bad..abf5845a2acc 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2734,40 +2734,39 @@ error:
 	return ret;
 }
 
-static int del_trace_probe_event(int fd, const char *buf,
-						  struct strlist *namelist)
+static int del_trace_probe_events(int fd, struct strfilter *filter,
+				  struct strlist *namelist)
 {
-	struct str_node *ent, *n;
+	struct str_node *ent;
+	const char *p;
 	int ret = -ENOENT;
 
-	if (strpbrk(buf, "*?")) { /* Glob-exp */
-		strlist__for_each_safe(ent, n, namelist)
-			if (strglobmatch(ent->s, buf)) {
-				ret = __del_trace_probe_event(fd, ent);
-				if (ret < 0)
-					break;
-				strlist__remove(namelist, ent);
-			}
-	} else {
-		ent = strlist__find(namelist, buf);
-		if (ent) {
+	if (!namelist)
+		return -ENOENT;
+
+	strlist__for_each(ent, namelist) {
+		p = strchr(ent->s, ':');
+		if ((p && strfilter__compare(filter, p + 1)) ||
+		    strfilter__compare(filter, ent->s)) {
 			ret = __del_trace_probe_event(fd, ent);
-			if (ret >= 0)
-				strlist__remove(namelist, ent);
+			if (ret < 0)
+				break;
 		}
 	}
 
 	return ret;
 }
 
-int del_perf_probe_events(struct strlist *dellist)
+int del_perf_probe_events(struct strfilter *filter)
 {
-	int ret = -1, ret2, ufd = -1, kfd = -1;
-	char buf[128];
-	const char *group, *event;
-	char *p, *str;
-	struct str_node *ent;
+	int ret, ret2, ufd = -1, kfd = -1;
 	struct strlist *namelist = NULL, *unamelist = NULL;
+	char *str = strfilter__string(filter);
+
+	if (!str)
+		return -EINVAL;
+
+	pr_debug("Delete filter: \'%s\'\n", str);
 
 	/* Get current event names */
 	kfd = open_kprobe_events(true);
@@ -2780,59 +2779,21 @@ int del_perf_probe_events(struct strlist *dellist)
 
 	if (kfd < 0 && ufd < 0) {
 		print_both_open_warning(kfd, ufd);
+		ret = kfd;
 		goto error;
 	}
 
-	if (namelist == NULL && unamelist == NULL) {
-		ret = -ENOENT;
+	ret = del_trace_probe_events(kfd, filter, namelist);
+	if (ret < 0 && ret != -ENOENT)
 		goto error;
-	}
 
-	strlist__for_each(ent, dellist) {
-		str = strdup(ent->s);
-		if (str == NULL) {
-			ret = -ENOMEM;
-			goto error;
-		}
-		pr_debug("Parsing: %s\n", str);
-		p = strchr(str, ':');
-		if (p) {
-			group = str;
-			*p = '\0';
-			event = p + 1;
-		} else {
-			group = "*";
-			event = str;
-		}
-
-		if (event && *event == '.')
-			event++;
-
-		ret = e_snprintf(buf, 128, "%s:%s", group, event);
-		if (ret < 0) {
-			pr_err("Failed to copy event.");
-			free(str);
-			goto error;
-		}
-
-		pr_debug("Group: %s, Event: %s\n", group, event);
-		free(str);
-
-		ret = ret2 = -ENOENT;
-		if (namelist)
-			ret = del_trace_probe_event(kfd, buf, namelist);
-
-		if ((ret >= 0 || ret == -ENOENT) && unamelist)
-			ret2 = del_trace_probe_event(ufd, buf, unamelist);
-
-		/* Since we can remove probes which already removed, don't check it */
-		if (ret == -ENOENT && ret2 == -ENOENT)
-			pr_debug("Event \"%s\" does not exist.\n", buf);
-		else if (ret < 0 || ret2 < 0) {
-			if (ret >= 0)
-				ret = ret2;
-			break;
-		}
+	ret2 = del_trace_probe_events(ufd, filter, unamelist);
+	if (ret2 < 0 && ret2 != -ENOENT)
+		ret = ret2;
+	else if (ret == -ENOENT && ret2 == -ENOENT) {
+		pr_debug("\"%s\" does not hit any event.\n", str);
+		/* Note that this is silently ignored */
+		ret = 0;
 	}
 
 error:
@@ -2845,6 +2806,7 @@ error:
 		strlist__delete(unamelist);
 		close(ufd);
 	}
+	free(str);
 
 	return ret;
 }
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index ec13362d882f..e10aedc34570 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -126,7 +126,7 @@ extern const char *kernel_get_module_path(const char *module);
 
 extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
 				 int max_probe_points, bool force_add);
-extern int del_perf_probe_events(struct strlist *dellist);
+extern int del_perf_probe_events(struct strfilter *filter);
 extern int show_perf_probe_events(struct strfilter *filter);
 extern int show_line_range(struct line_range *lr, const char *module,
 			   bool user);

From 9f7811d08dcf7b3e900cbc0d8384b713a86b034f Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 5 May 2015 11:29:50 +0900
Subject: [PATCH 086/305] perf probe: Accept filter argument for --funcs

This allows the user to pass the filter pattern directly to the --funcs
option as below:

  ----
  # ./perf probe -F *kmalloc
  __kmalloc
  devm_kmalloc
  mempool_kmalloc
  sg_kmalloc
  sock_kmalloc
  ----

We previously needed to use the --filter option for that.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150505022950.23399.22435.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-probe.txt |  3 ++-
 tools/perf/builtin-probe.c              | 19 ++++++++++++++-----
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index d0feb8e6eeef..a272f2e9a1cf 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -84,9 +84,10 @@ OPTIONS
 	variables.
 
 -F::
---funcs::
+--funcs[=FILTER]::
 	Show available functions in given module or kernel. With -x/--exec,
 	can also list functions in a user space executable / shared library.
+	This also can accept a FILTER rule argument.
 
 --filter=FILTER::
 	(Only for --vars and --funcs) Set filter. FILTER is a combination of glob
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 1f41b4eb0c5a..5a0e8f1b2c36 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -290,6 +290,17 @@ static int opt_show_vars(const struct option *opt __maybe_unused,
 	return ret;
 }
 #endif
+static int opt_show_funcs(const struct option *opt __maybe_unused,
+			  const char *str, int unset)
+{
+	if (!unset)
+		params.show_funcs = true;
+
+	if (str)
+		return params_add_filter(str);
+
+	return 0;
+}
 
 static int opt_set_filter(const struct option *opt __maybe_unused,
 			  const char *str, int unset __maybe_unused)
@@ -399,8 +410,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT__DRY_RUN(&probe_event_dry_run),
 	OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
 		 "Set how many probe points can be found for a probe."),
-	OPT_BOOLEAN('F', "funcs", &params.show_funcs,
-		    "Show potential probe-able functions."),
+	OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
+			     "Show potential probe-able functions.",
+			     opt_show_funcs, DEFAULT_FUNC_FILTER),
 	OPT_CALLBACK('\0', "filter", NULL,
 		     "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
 		     "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
@@ -472,9 +484,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		return ret;
 	}
 	if (params.show_funcs) {
-		if (!params.filter)
-			params.filter = strfilter__new(DEFAULT_FUNC_FILTER,
-						       NULL);
 		ret = show_available_funcs(params.target, params.filter,
 					params.uprobes);
 		strfilter__delete(params.filter);

From 3da166b7b5253aaa6b36410f5d4c4a996ee5915d Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 5 May 2015 11:29:52 +0900
Subject: [PATCH 087/305] perf probe: Remove redundant cleanup of params.filter

Since params.filter will be released in cleanup_params, we don't need to
clear it in each command.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150505022952.23399.58072.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 5a0e8f1b2c36..08c948138424 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -477,8 +477,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 			usage_with_options(probe_usage, options);
 		}
 		ret = show_perf_probe_events(params.filter);
-		strfilter__delete(params.filter);
-		params.filter = NULL;
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show event list.", ret);
 		return ret;
@@ -486,8 +484,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (params.show_funcs) {
 		ret = show_available_funcs(params.target, params.filter,
 					params.uprobes);
-		strfilter__delete(params.filter);
-		params.filter = NULL;
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show functions.", ret);
 		return ret;
@@ -511,8 +507,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 					  params.target,
 					  params.filter,
 					  params.show_ext_vars);
-		strfilter__delete(params.filter);
-		params.filter = NULL;
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show vars.", ret);
 		return ret;

From b1019d5e6e605190d008003a382407f23e19f807 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 May 2015 00:22:57 +0900
Subject: [PATCH 088/305] perf probe: Cleanup and consolidate command parsers

To simplify the 'perf probe' command code, consolidate some similar
functions and use command short-name for command classification, instead
of separate booleans.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150505152257.18790.41548.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c | 110 ++++++++++++++-----------------------
 1 file changed, 42 insertions(+), 68 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 08c948138424..53d475b1422e 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -48,14 +48,10 @@
 
 /* Session management structure */
 static struct {
+	int command;	/* Command short_name */
 	bool list_events;
 	bool force_add;
-	bool show_lines;
-	bool show_vars;
 	bool show_ext_vars;
-	bool show_funcs;
-	bool mod_events;
-	bool del_events;
 	bool uprobes;
 	bool quiet;
 	bool target_used;
@@ -175,44 +171,11 @@ static int parse_probe_event_argv(int argc, const char **argv)
 
 		len += sprintf(&buf[len], "%s ", argv[i]);
 	}
-	params.mod_events = true;
 	ret = parse_probe_event(buf);
 	free(buf);
 	return ret;
 }
 
-static int opt_add_probe_event(const struct option *opt __maybe_unused,
-			      const char *str, int unset __maybe_unused)
-{
-	if (str) {
-		params.mod_events = true;
-		return parse_probe_event(str);
-	} else
-		return 0;
-}
-
-static int opt_del_probe_event(const struct option *opt __maybe_unused,
-			       const char *str, int unset __maybe_unused)
-{
-	if (str) {
-		params.del_events = true;
-		return params_add_filter(str);
-	}
-	return 0;
-}
-
-static int opt_list_probe_event(const struct option *opt __maybe_unused,
-				const char *str, int unset)
-{
-	if (!unset)
-		params.list_events = true;
-
-	if (str)
-		return params_add_filter(str);
-
-	return 0;
-}
-
 static int opt_set_target(const struct option *opt, const char *str,
 			int unset __maybe_unused)
 {
@@ -250,8 +213,10 @@ static int opt_set_target(const struct option *opt, const char *str,
 	return ret;
 }
 
+/* Command option callbacks */
+
 #ifdef HAVE_DWARF_SUPPORT
-static int opt_show_lines(const struct option *opt __maybe_unused,
+static int opt_show_lines(const struct option *opt,
 			  const char *str, int unset __maybe_unused)
 {
 	int ret = 0;
@@ -259,19 +224,19 @@ static int opt_show_lines(const struct option *opt __maybe_unused,
 	if (!str)
 		return 0;
 
-	if (params.show_lines) {
+	if (params.command == 'L') {
 		pr_warning("Warning: more than one --line options are"
 			   " detected. Only the first one is valid.\n");
 		return 0;
 	}
 
-	params.show_lines = true;
+	params.command = opt->short_name;
 	ret = parse_line_range_desc(str, &params.line_range);
 
 	return ret;
 }
 
-static int opt_show_vars(const struct option *opt __maybe_unused,
+static int opt_show_vars(const struct option *opt,
 			 const char *str, int unset __maybe_unused)
 {
 	struct perf_probe_event *pev = &params.events[params.nevents];
@@ -285,16 +250,27 @@ static int opt_show_vars(const struct option *opt __maybe_unused,
 		pr_err("  Error: '--vars' doesn't accept arguments.\n");
 		return -EINVAL;
 	}
-	params.show_vars = true;
+	params.command = opt->short_name;
 
 	return ret;
 }
 #endif
-static int opt_show_funcs(const struct option *opt __maybe_unused,
-			  const char *str, int unset)
+static int opt_add_probe_event(const struct option *opt,
+			      const char *str, int unset __maybe_unused)
+{
+	if (str) {
+		params.command = opt->short_name;
+		return parse_probe_event(str);
+	}
+
+	return 0;
+}
+
+static int opt_set_filter_with_command(const struct option *opt,
+				       const char *str, int unset)
 {
 	if (!unset)
-		params.show_funcs = true;
+		params.command = opt->short_name;
 
 	if (str)
 		return params_add_filter(str);
@@ -360,10 +336,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN('q', "quiet", &params.quiet,
 		    "be quiet (do not show any mesages)"),
 	OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT",
-			     "list up probe events", opt_list_probe_event,
-			     DEFAULT_LIST_FILTER),
+			     "list up probe events",
+			     opt_set_filter_with_command, DEFAULT_LIST_FILTER),
 	OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.",
-		opt_del_probe_event),
+		     opt_set_filter_with_command),
 	OPT_CALLBACK('a', "add", NULL,
 #ifdef HAVE_DWARF_SUPPORT
 		"[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT"
@@ -412,7 +388,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		 "Set how many probe points can be found for a probe."),
 	OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
 			     "Show potential probe-able functions.",
-			     opt_show_funcs, DEFAULT_FUNC_FILTER),
+			     opt_set_filter_with_command, DEFAULT_FUNC_FILTER),
 	OPT_CALLBACK('\0', "filter", NULL,
 		     "[!]FILTER", "Set a filter (with --vars/funcs only)\n"
 		     "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n"
@@ -444,11 +420,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 			pr_warning("  Error: '-' is not supported.\n");
 			usage_with_options(probe_usage, options);
 		}
+		if (params.command && params.command != 'a') {
+			pr_warning("  Error: another command except --add is set.\n");
+			usage_with_options(probe_usage, options);
+		}
 		ret = parse_probe_event_argv(argc, argv);
 		if (ret < 0) {
 			pr_err_with_code("  Error: Command Parse Error.", ret);
 			return ret;
 		}
+		params.command = 'a';
 	}
 
 	if (params.quiet) {
@@ -462,16 +443,13 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (params.max_probe_points == 0)
 		params.max_probe_points = MAX_PROBES;
 
-	if ((!params.nevents && !params.del_events && !params.list_events &&
-	     !params.show_lines && !params.show_funcs))
-		usage_with_options(probe_usage, options);
-
 	/*
 	 * Only consider the user's kernel image path if given.
 	 */
 	symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL);
 
-	if (params.list_events) {
+	switch (params.command) {
+	case 'l':
 		if (params.uprobes) {
 			pr_warning("  Error: Don't use --list with --exec.\n");
 			usage_with_options(probe_usage, options);
@@ -480,24 +458,20 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show event list.", ret);
 		return ret;
-	}
-	if (params.show_funcs) {
+	case 'F':
 		ret = show_available_funcs(params.target, params.filter,
 					params.uprobes);
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show functions.", ret);
 		return ret;
-	}
-
 #ifdef HAVE_DWARF_SUPPORT
-	if (params.show_lines) {
+	case 'L':
 		ret = show_line_range(&params.line_range, params.target,
 				      params.uprobes);
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show lines.", ret);
 		return ret;
-	}
-	if (params.show_vars) {
+	case 'V':
 		if (!params.filter)
 			params.filter = strfilter__new(DEFAULT_VAR_FILTER,
 						       NULL);
@@ -510,18 +484,15 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show vars.", ret);
 		return ret;
-	}
 #endif
-
-	if (params.del_events) {
+	case 'd':
 		ret = del_perf_probe_events(params.filter);
 		if (ret < 0) {
 			pr_err_with_code("  Error: Failed to delete events.", ret);
 			return ret;
 		}
-	}
-
-	if (params.nevents) {
+		break;
+	case 'a':
 		/* Ensure the last given target is used */
 		if (params.target && !params.target_used) {
 			pr_warning("  Error: -x/-m must follow the probe definitions.\n");
@@ -535,6 +506,9 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 			pr_err_with_code("  Error: Failed to add events.", ret);
 			return ret;
 		}
+		break;
+	default:
+		usage_with_options(probe_usage, options);
 	}
 	return 0;
 }

From a923e2c4b14f99f70692f82ee7bd63717604b738 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 5 May 2015 23:52:52 +0900
Subject: [PATCH 089/305] perf kmem: Show warning when trying to run stat
 without record

Sometimes one can mistakenly run 'perf kmem stat' without running 'perf
kmem record' before or with a different configuration like recording
--slab and stat --page.  Show a warning message like the one below to
inform the user:

  # perf kmem stat --page --caller
  No page allocation events found.  Have you run 'perf kmem record --page'?

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Pekka Enberg <penberg@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Joonsoo Kim <js1304@gmail.com>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: linux-mm@kvack.org
Link: http://lkml.kernel.org/r/1430837572-31395-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 828b7284e547..e628bf1a0c24 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -1882,6 +1882,7 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 	};
 	struct perf_session *session;
 	int ret = -1;
+	const char errmsg[] = "No %s allocation events found.  Have you run 'perf kmem record --%s'?\n";
 
 	perf_config(kmem_config, NULL);
 	argc = parse_options_subcommand(argc, argv, kmem_options,
@@ -1908,11 +1909,21 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (session == NULL)
 		return -1;
 
-	if (kmem_page) {
-		struct perf_evsel *evsel = perf_evlist__first(session->evlist);
+	if (kmem_slab) {
+		if (!perf_evlist__find_tracepoint_by_name(session->evlist,
+							  "kmem:kmalloc")) {
+			pr_err(errmsg, "slab", "slab");
+			return -1;
+		}
+	}
 
-		if (evsel == NULL || evsel->tp_format == NULL) {
-			pr_err("invalid event found.. aborting\n");
+	if (kmem_page) {
+		struct perf_evsel *evsel;
+
+		evsel = perf_evlist__find_tracepoint_by_name(session->evlist,
+							     "kmem:mm_page_alloc");
+		if (evsel == NULL) {
+			pr_err(errmsg, "page", "page");
 			return -1;
 		}
 

From 297508216556fbf4e3f70fb97d03280741b4a709 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:12 +0900
Subject: [PATCH 090/305] perf tools: Move TUI-specific fields into unnamed
 union

Since perf diff only supports stdio output, TUI fields are only accessed
from perf report (or perf top).  So add a new unnamed union and move
struct hist_entry_tui and those TUI-specific fields.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.h | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index de3303fe726d..7f0c0a8d615d 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -93,18 +93,24 @@ struct hist_entry {
 	s32			cpu;
 	u8			cpumode;
 
-	struct hist_entry_diff	diff;
-
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
 
-	/* XXX These two should move to some tree widget lib */
-	u16			row_offset;
-	u16			nr_rows;
-
 	bool			init_have_children;
 	char			level;
 	u8			filtered;
+	union {
+		/*
+		 * Since perf diff only supports the stdio output, TUI
+		 * fields are only accessed from perf report (or perf
+		 * top).  So make it an union to reduce memory usage.
+		 */
+		struct hist_entry_diff	diff;
+		struct /* for TUI */ {
+			u16	row_offset;
+			u16	nr_rows;
+		};
+	};
 	char			*srcline;
 	struct symbol		*parent;
 	struct rb_root		sorted_chain;

From d8a0f80042efc4ba08977f3d66fc4678037fe456 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:13 +0900
Subject: [PATCH 091/305] perf tools: Move init_have_children field to the
 unnamed union

The init_have_children is used to init callchain info only for TUI.  So
it'd be better to move it to the TUI-specific unnamed union member.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 7f0c0a8d615d..4d923e6e0069 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -96,7 +96,6 @@ struct hist_entry {
 	/* We are added by hists__add_dummy_entry. */
 	bool			dummy;
 
-	bool			init_have_children;
 	char			level;
 	u8			filtered;
 	union {
@@ -109,6 +108,7 @@ struct hist_entry {
 		struct /* for TUI */ {
 			u16	row_offset;
 			u16	nr_rows;
+			bool	init_have_children;
 		};
 	};
 	char			*srcline;

From f2b487db45f2aa203892384f6a08f0a761edad5d Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:14 +0900
Subject: [PATCH 092/305] perf hists browser: Fix possible memory leak

The options array saves strings for each popup menu item.  The number of
items can be vary according to the currently selected item.  So it can
leak some memory if it's exited from a small item.  Fix it by freeing
all items when loop terminates.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-4-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 995b7a8596b1..0972d4722297 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1424,7 +1424,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	struct hist_browser *browser = hist_browser__new(hists);
 	struct branch_info *bi;
 	struct pstack *fstack;
-	char *options[16];
+#define MAX_OPTIONS  16
+	char *options[MAX_OPTIONS];
 	int nr_options = 0;
 	int key = -1;
 	char buf[64];
@@ -1691,7 +1692,8 @@ skip_annotation:
 				"Switch to another data file in PWD") > 0)
 			switch_data = nr_options++;
 add_exit_option:
-		options[nr_options++] = (char *)"Exit";
+		if (asprintf(&options[nr_options], "Exit") > 0)
+			nr_options++;
 retry_popup_menu:
 		choice = ui__popup_menu(nr_options, options);
 
@@ -1812,7 +1814,7 @@ out_free_stack:
 	pstack__delete(fstack);
 out:
 	hist_browser__delete(browser);
-	free_popup_options(options, nr_options - 1);
+	free_popup_options(options, MAX_OPTIONS);
 	return key;
 }
 

From c2a51ab802d17c572cd0a940fd97538b75aa7889 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:15 +0900
Subject: [PATCH 093/305] perf hists browser: Save hist_browser_timer pointer
 in hist_browser

The struct hist_browser_timer is to carry perf-top related info
throughout the hist browser code.  So it'd be better to keep in the
struct hist_browser.  This is a preparation to later change.

Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-5-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 0972d4722297..0847623f42e0 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -25,6 +25,7 @@ struct hist_browser {
 	struct hists	    *hists;
 	struct hist_entry   *he_selection;
 	struct map_symbol   *selection;
+	struct hist_browser_timer *hbt;
 	int		     print_seq;
 	bool		     show_dso;
 	bool		     show_headers;
@@ -406,11 +407,11 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
 		"Or reduce the sampling frequency.");
 }
 
-static int hist_browser__run(struct hist_browser *browser,
-			     struct hist_browser_timer *hbt)
+static int hist_browser__run(struct hist_browser *browser)
 {
 	int key;
 	char title[160];
+	struct hist_browser_timer *hbt = browser->hbt;
 	int delay_secs = hbt ? hbt->refresh : 0;
 
 	browser->b.entries = &browser->hists->entries;
@@ -1195,7 +1196,8 @@ static int hist_browser__dump(struct hist_browser *browser)
 	return 0;
 }
 
-static struct hist_browser *hist_browser__new(struct hists *hists)
+static struct hist_browser *hist_browser__new(struct hists *hists,
+					      struct hist_browser_timer *hbt)
 {
 	struct hist_browser *browser = zalloc(sizeof(*browser));
 
@@ -1206,6 +1208,7 @@ static struct hist_browser *hist_browser__new(struct hists *hists)
 		browser->b.seek = ui_browser__hists_seek;
 		browser->b.use_navkeypressed = true;
 		browser->show_headers = symbol_conf.show_hist_headers;
+		browser->hbt = hbt;
 	}
 
 	return browser;
@@ -1421,7 +1424,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    struct perf_session_env *env)
 {
 	struct hists *hists = evsel__hists(evsel);
-	struct hist_browser *browser = hist_browser__new(hists);
+	struct hist_browser *browser = hist_browser__new(hists, hbt);
 	struct branch_info *bi;
 	struct pstack *fstack;
 #define MAX_OPTIONS  16
@@ -1499,7 +1502,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 		nr_options = 0;
 
-		key = hist_browser__run(browser, hbt);
+		key = hist_browser__run(browser);
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);

From 01f00a1cd158ecd86d5f561ded271597d0550313 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:16 +0900
Subject: [PATCH 094/305] perf hists browser: Save pstack in the hist_browser

The struct pstack is to save currently applied thread and/or dso filters
in the browser.  So it'd be better to keep in the struct hist_browser.
This is a preparation to later change.

Suggested-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-6-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 0847623f42e0..26d5548b796e 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -26,6 +26,7 @@ struct hist_browser {
 	struct hist_entry   *he_selection;
 	struct map_symbol   *selection;
 	struct hist_browser_timer *hbt;
+	struct pstack	    *pstack;
 	int		     print_seq;
 	bool		     show_dso;
 	bool		     show_headers;
@@ -1426,7 +1427,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	struct hists *hists = evsel__hists(evsel);
 	struct hist_browser *browser = hist_browser__new(hists, hbt);
 	struct branch_info *bi;
-	struct pstack *fstack;
 #define MAX_OPTIONS  16
 	char *options[MAX_OPTIONS];
 	int nr_options = 0;
@@ -1477,8 +1477,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		hist_browser__update_nr_entries(browser);
 	}
 
-	fstack = pstack__new(2);
-	if (fstack == NULL)
+	browser->pstack = pstack__new(2);
+	if (browser->pstack == NULL)
 		goto out;
 
 	ui_helpline__push(helpline);
@@ -1587,7 +1587,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		case K_LEFT: {
 			const void *top;
 
-			if (pstack__empty(fstack)) {
+			if (pstack__empty(browser->pstack)) {
 				/*
 				 * Go back to the perf_evsel_menu__run or other user
 				 */
@@ -1595,7 +1595,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 					goto out_free_stack;
 				continue;
 			}
-			top = pstack__pop(fstack);
+			top = pstack__pop(browser->pstack);
 			if (top == &browser->hists->dso_filter)
 				goto zoom_out_dso;
 			if (top == &browser->hists->thread_filter)
@@ -1753,7 +1753,7 @@ do_annotate:
 		else if (choice == zoom_dso) {
 zoom_dso:
 			if (browser->hists->dso_filter) {
-				pstack__remove(fstack, &browser->hists->dso_filter);
+				pstack__remove(browser->pstack, &browser->hists->dso_filter);
 zoom_out_dso:
 				ui_helpline__pop();
 				browser->hists->dso_filter = NULL;
@@ -1765,14 +1765,14 @@ zoom_out_dso:
 						   dso->kernel ? "the Kernel" : dso->short_name);
 				browser->hists->dso_filter = dso;
 				perf_hpp__set_elide(HISTC_DSO, true);
-				pstack__push(fstack, &browser->hists->dso_filter);
+				pstack__push(browser->pstack, &browser->hists->dso_filter);
 			}
 			hists__filter_by_dso(hists);
 			hist_browser__reset(browser);
 		} else if (choice == zoom_thread) {
 zoom_thread:
 			if (browser->hists->thread_filter) {
-				pstack__remove(fstack, &browser->hists->thread_filter);
+				pstack__remove(browser->pstack, &browser->hists->thread_filter);
 zoom_out_thread:
 				ui_helpline__pop();
 				thread__zput(browser->hists->thread_filter);
@@ -1783,7 +1783,7 @@ zoom_out_thread:
 						   thread->tid);
 				browser->hists->thread_filter = thread__get(thread);
 				perf_hpp__set_elide(HISTC_THREAD, false);
-				pstack__push(fstack, &browser->hists->thread_filter);
+				pstack__push(browser->pstack, &browser->hists->thread_filter);
 			}
 			hists__filter_by_thread(hists);
 			hist_browser__reset(browser);
@@ -1814,7 +1814,7 @@ do_data_switch:
 		}
 	}
 out_free_stack:
-	pstack__delete(fstack);
+	pstack__delete(browser->pstack);
 out:
 	hist_browser__delete(browser);
 	free_popup_options(options, MAX_OPTIONS);

From b1a9ceef724341ce05b125d39abf9cfc7059b949 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:17 +0900
Subject: [PATCH 095/305] perf hists browser: Save perf_session_env in the
 hist_browser

The perf_session_env is to save system informantion at the recording
time to be refered in the hist browser.  So it'd be better to keep in
the struct hist_browser.  This is a preparation to later change.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-7-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 26d5548b796e..45704d6383e4 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -27,6 +27,7 @@ struct hist_browser {
 	struct map_symbol   *selection;
 	struct hist_browser_timer *hbt;
 	struct pstack	    *pstack;
+	struct perf_session_env *env;
 	int		     print_seq;
 	bool		     show_dso;
 	bool		     show_headers;
@@ -1198,7 +1199,8 @@ static int hist_browser__dump(struct hist_browser *browser)
 }
 
 static struct hist_browser *hist_browser__new(struct hists *hists,
-					      struct hist_browser_timer *hbt)
+					      struct hist_browser_timer *hbt,
+					      struct perf_session_env *env)
 {
 	struct hist_browser *browser = zalloc(sizeof(*browser));
 
@@ -1210,6 +1212,7 @@ static struct hist_browser *hist_browser__new(struct hists *hists,
 		browser->b.use_navkeypressed = true;
 		browser->show_headers = symbol_conf.show_hist_headers;
 		browser->hbt = hbt;
+		browser->env = env;
 	}
 
 	return browser;
@@ -1425,7 +1428,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				    struct perf_session_env *env)
 {
 	struct hists *hists = evsel__hists(evsel);
-	struct hist_browser *browser = hist_browser__new(hists, hbt);
+	struct hist_browser *browser = hist_browser__new(hists, hbt, env);
 	struct branch_info *bi;
 #define MAX_OPTIONS  16
 	char *options[MAX_OPTIONS];

From bc7cad429bcdda6f112525c17db9577a1be4c8aa Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:18 +0900
Subject: [PATCH 096/305] perf hists browser: Split popup menu actions

Currently perf_evsel__hists_browse() function spins on a huge loop and
handles many key actions.  Since it's hard to read and modify, let's
split it out into small helper functions.

This patch introduces do_XXX() functions which corresponds to each goto
label.  This way we can call such functions both from key press actions
and popup menu actions.

No functional change intended.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-8-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 242 +++++++++++++++++++++------------
 1 file changed, 156 insertions(+), 86 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 45704d6383e4..7d88a1cdf04b 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1402,6 +1402,120 @@ close_file_and_continue:
 	return ret;
 }
 
+static int
+do_annotate(struct hist_browser *browser, struct map_symbol *ms)
+{
+	struct perf_evsel *evsel;
+	struct annotation *notes;
+	struct hist_entry *he;
+	int err;
+
+	if (!objdump_path && perf_session_env__lookup_objdump(browser->env))
+		return 0;
+
+	notes = symbol__annotation(ms->sym);
+	if (!notes->src)
+		return 0;
+
+	evsel = hists_to_evsel(browser->hists);
+	err = map_symbol__tui_annotate(ms, evsel, browser->hbt);
+	he = hist_browser__selected_entry(browser);
+	/*
+	 * offer option to annotate the other branch source or target
+	 * (if they exists) when returning from annotate
+	 */
+	if ((err == 'q' || err == CTRL('c')) && he->branch_info)
+		return 1;
+
+	ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
+	if (err)
+		ui_browser__handle_resize(&browser->b);
+	return 0;
+}
+
+static int
+do_zoom_thread(struct hist_browser *browser, struct thread *thread)
+{
+	if (browser->hists->thread_filter) {
+		pstack__remove(browser->pstack, &browser->hists->thread_filter);
+		perf_hpp__set_elide(HISTC_THREAD, false);
+		thread__zput(browser->hists->thread_filter);
+		ui_helpline__pop();
+	} else {
+		ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
+				   thread->comm_set ? thread__comm_str(thread) : "",
+				   thread->tid);
+		browser->hists->thread_filter = thread__get(thread);
+		perf_hpp__set_elide(HISTC_THREAD, false);
+		pstack__push(browser->pstack, &browser->hists->thread_filter);
+	}
+
+	hists__filter_by_thread(browser->hists);
+	hist_browser__reset(browser);
+	return 0;
+}
+
+static int
+do_zoom_dso(struct hist_browser *browser, struct dso *dso)
+{
+	if (browser->hists->dso_filter) {
+		pstack__remove(browser->pstack, &browser->hists->dso_filter);
+		perf_hpp__set_elide(HISTC_DSO, false);
+		browser->hists->dso_filter = NULL;
+		ui_helpline__pop();
+	} else {
+		if (dso == NULL)
+			return 0;
+		ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
+				   dso->kernel ? "the Kernel" : dso->short_name);
+		browser->hists->dso_filter = dso;
+		perf_hpp__set_elide(HISTC_DSO, true);
+		pstack__push(browser->pstack, &browser->hists->dso_filter);
+	}
+
+	hists__filter_by_dso(browser->hists);
+	hist_browser__reset(browser);
+	return 0;
+}
+
+static int
+do_browse_map(struct hist_browser *browser __maybe_unused, struct map *map)
+{
+	map__browse(map);
+	return 0;
+}
+
+static int
+do_run_script(struct hist_browser *browser __maybe_unused,
+	      struct thread *thread, struct symbol *sym)
+{
+	char script_opt[64];
+	memset(script_opt, 0, sizeof(script_opt));
+
+	if (thread) {
+		scnprintf(script_opt, sizeof(script_opt), " -c %s ",
+			  thread__comm_str(thread));
+	} else if (sym) {
+		scnprintf(script_opt, sizeof(script_opt), " -S %s ",
+			  sym->name);
+	}
+
+	script_browse(script_opt);
+	return 0;
+}
+
+static int
+do_switch_data(struct hist_browser *browser __maybe_unused, int key)
+{
+	if (switch_data_file()) {
+		ui__warning("Won't switch the data files due to\n"
+			    "no valid data file get selected!\n");
+		return key;
+	}
+
+	return K_SWITCH_INPUT_DATA;
+}
+
 static void hist_browser__update_nr_entries(struct hist_browser *hb)
 {
 	u64 nr_entries = 0;
@@ -1435,7 +1549,6 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	int nr_options = 0;
 	int key = -1;
 	char buf[64];
-	char script_opt[64];
 	int delay_secs = hbt ? hbt->refresh : 0;
 	struct perf_hpp_fmt *fmt;
 
@@ -1496,7 +1609,8 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 	while (1) {
 		struct thread *thread = NULL;
-		const struct dso *dso = NULL;
+		struct dso *dso = NULL;
+		struct map_symbol ms;
 		int choice = 0,
 		    annotate = -2, zoom_dso = -2, zoom_thread = -2,
 		    annotate_f = -2, annotate_t = -2, browse_map = -2;
@@ -1533,17 +1647,24 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			    browser->selection->sym == NULL ||
 			    browser->selection->map->dso->annotate_warned)
 				continue;
-			goto do_annotate;
+
+			ms.map = browser->selection->map;
+			ms.sym = browser->selection->sym;
+
+			do_annotate(browser, &ms);
+			continue;
 		case 'P':
 			hist_browser__dump(browser);
 			continue;
 		case 'd':
-			goto zoom_dso;
+			do_zoom_dso(browser, dso);
+			continue;
 		case 'V':
 			browser->show_dso = !browser->show_dso;
 			continue;
 		case 't':
-			goto zoom_thread;
+			do_zoom_thread(browser, thread);
+			continue;
 		case '/':
 			if (ui_browser__input_window("Symbol to show",
 					"Please enter the name of symbol you want to see",
@@ -1556,11 +1677,14 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			continue;
 		case 'r':
 			if (is_report_browser(hbt))
-				goto do_scripts;
+				do_run_script(browser, NULL, NULL);
 			continue;
 		case 's':
-			if (is_report_browser(hbt))
-				goto do_data_switch;
+			if (is_report_browser(hbt)) {
+				key = do_switch_data(browser, key);
+				if (key == K_SWITCH_INPUT_DATA)
+					goto out_free_stack;
+			}
 			continue;
 		case 'i':
 			/* env->arch is NULL for live-mode (i.e. perf top) */
@@ -1599,10 +1723,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 				continue;
 			}
 			top = pstack__pop(browser->pstack);
-			if (top == &browser->hists->dso_filter)
-				goto zoom_out_dso;
-			if (top == &browser->hists->thread_filter)
-				goto zoom_out_thread;
+			if (top == &browser->hists->dso_filter) {
+				perf_hpp__set_elide(HISTC_DSO, false);
+				browser->hists->dso_filter = NULL;
+				hists__filter_by_dso(browser->hists);
+			}
+			if (top == &browser->hists->thread_filter) {
+				perf_hpp__set_elide(HISTC_THREAD, false);
+				thread__zput(browser->hists->thread_filter);
+				hists__filter_by_thread(browser->hists);
+			}
+			ui_helpline__pop();
+			hist_browser__reset(browser);
 			continue;
 		}
 		case K_ESC:
@@ -1713,12 +1845,6 @@ retry_popup_menu:
 
 		if (choice == annotate || choice == annotate_t || choice == annotate_f) {
 			struct hist_entry *he;
-			struct annotation *notes;
-			struct map_symbol ms;
-			int err;
-do_annotate:
-			if (!objdump_path && perf_session_env__lookup_objdump(env))
-				continue;
 
 			he = hist_browser__selected_entry(browser);
 			if (he == NULL)
@@ -1734,86 +1860,30 @@ do_annotate:
 				ms = *browser->selection;
 			}
 
-			notes = symbol__annotation(ms.sym);
-			if (!notes->src)
-				continue;
-
-			err = map_symbol__tui_annotate(&ms, evsel, hbt);
-			/*
-			 * offer option to annotate the other branch source or target
-			 * (if they exists) when returning from annotate
-			 */
-			if ((err == 'q' || err == CTRL('c'))
-			    && annotate_t != -2 && annotate_f != -2)
+			if (do_annotate(browser, &ms) == 1)
 				goto retry_popup_menu;
-
-			ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries);
-			if (err)
-				ui_browser__handle_resize(&browser->b);
-
-		} else if (choice == browse_map)
-			map__browse(browser->selection->map);
-		else if (choice == zoom_dso) {
-zoom_dso:
-			if (browser->hists->dso_filter) {
-				pstack__remove(browser->pstack, &browser->hists->dso_filter);
-zoom_out_dso:
-				ui_helpline__pop();
-				browser->hists->dso_filter = NULL;
-				perf_hpp__set_elide(HISTC_DSO, false);
-			} else {
-				if (dso == NULL)
-					continue;
-				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"",
-						   dso->kernel ? "the Kernel" : dso->short_name);
-				browser->hists->dso_filter = dso;
-				perf_hpp__set_elide(HISTC_DSO, true);
-				pstack__push(browser->pstack, &browser->hists->dso_filter);
-			}
-			hists__filter_by_dso(hists);
-			hist_browser__reset(browser);
+		} else if (choice == browse_map) {
+			do_browse_map(browser, browser->selection->map);
+		} else if (choice == zoom_dso) {
+			do_zoom_dso(browser, dso);
 		} else if (choice == zoom_thread) {
-zoom_thread:
-			if (browser->hists->thread_filter) {
-				pstack__remove(browser->pstack, &browser->hists->thread_filter);
-zoom_out_thread:
-				ui_helpline__pop();
-				thread__zput(browser->hists->thread_filter);
-				perf_hpp__set_elide(HISTC_THREAD, false);
-			} else {
-				ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"",
-						   thread->comm_set ? thread__comm_str(thread) : "",
-						   thread->tid);
-				browser->hists->thread_filter = thread__get(thread);
-				perf_hpp__set_elide(HISTC_THREAD, false);
-				pstack__push(browser->pstack, &browser->hists->thread_filter);
-			}
-			hists__filter_by_thread(hists);
-			hist_browser__reset(browser);
+			do_zoom_thread(browser, thread);
 		}
 		/* perf scripts support */
 		else if (choice == scripts_all || choice == scripts_comm ||
 				choice == scripts_symbol) {
-do_scripts:
-			memset(script_opt, 0, 64);
-
 			if (choice == scripts_comm)
-				sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread));
-
+				do_run_script(browser, browser->he_selection->thread, NULL);
 			if (choice == scripts_symbol)
-				sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name);
-
-			script_browse(script_opt);
+				do_run_script(browser, NULL, browser->he_selection->ms.sym);
+			if (choice == scripts_all)
+				do_run_script(browser, NULL, NULL);
 		}
 		/* Switch to another data file */
 		else if (choice == switch_data) {
-do_data_switch:
-			if (!switch_data_file()) {
-				key = K_SWITCH_INPUT_DATA;
+			key = do_switch_data(browser, key);
+			if (key == K_SWITCH_INPUT_DATA)
 				break;
-			} else
-				ui__warning("Won't switch the data files due to\n"
-					"no valid data file get selected!\n");
 		}
 	}
 out_free_stack:

From ea7cd59233097984850adc0e4119644f089be734 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Wed, 22 Apr 2015 16:18:19 +0900
Subject: [PATCH 097/305] perf hists browser: Split popup menu actions - part 2

Currently perf_evsel__hists_browse() function spins on a huge loop and
handles many key actions.  Since it's hard to read and modify, let's
split it out into small helper functions.

The add_XXX_opt() functions are to register popup menu item on the
selected entry.  When it adds an item, it also saves related data into
struct popup_action and returns 1 so that it can increase the number of
items (nr_options).

With this change, we can simplify the code just to call selected
callback function without considering various conditions.  A callback
function named do_XXX is called with saved data when the item is
selected by user.

No functional change intended.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-9-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 356 ++++++++++++++++++++-------------
 1 file changed, 215 insertions(+), 141 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 7d88a1cdf04b..9bd7b38de64c 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1402,8 +1402,16 @@ close_file_and_continue:
 	return ret;
 }
 
+struct popup_action {
+	struct thread 		*thread;
+	struct dso		*dso;
+	struct map_symbol 	ms;
+
+	int (*fn)(struct hist_browser *browser, struct popup_action *act);
+};
+
 static int
-do_annotate(struct hist_browser *browser, struct map_symbol *ms)
+do_annotate(struct hist_browser *browser, struct popup_action *act)
 {
 	struct perf_evsel *evsel;
 	struct annotation *notes;
@@ -1413,12 +1421,12 @@ do_annotate(struct hist_browser *browser, struct map_symbol *ms)
 	if (!objdump_path && perf_session_env__lookup_objdump(browser->env))
 		return 0;
 
-	notes = symbol__annotation(ms->sym);
+	notes = symbol__annotation(act->ms.sym);
 	if (!notes->src)
 		return 0;
 
 	evsel = hists_to_evsel(browser->hists);
-	err = map_symbol__tui_annotate(ms, evsel, browser->hbt);
+	err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt);
 	he = hist_browser__selected_entry(browser);
 	/*
 	 * offer option to annotate the other branch source or target
@@ -1434,8 +1442,27 @@ do_annotate(struct hist_browser *browser, struct map_symbol *ms)
 }
 
 static int
-do_zoom_thread(struct hist_browser *browser, struct thread *thread)
+add_annotate_opt(struct hist_browser *browser __maybe_unused,
+		 struct popup_action *act, char **optstr,
+		 struct map *map, struct symbol *sym)
 {
+	if (sym == NULL || map->dso->annotate_warned)
+		return 0;
+
+	if (asprintf(optstr, "Annotate %s", sym->name) < 0)
+		return 0;
+
+	act->ms.map = map;
+	act->ms.sym = sym;
+	act->fn = do_annotate;
+	return 1;
+}
+
+static int
+do_zoom_thread(struct hist_browser *browser, struct popup_action *act)
+{
+	struct thread *thread = act->thread;
+
 	if (browser->hists->thread_filter) {
 		pstack__remove(browser->pstack, &browser->hists->thread_filter);
 		perf_hpp__set_elide(HISTC_THREAD, false);
@@ -1456,8 +1483,28 @@ do_zoom_thread(struct hist_browser *browser, struct thread *thread)
 }
 
 static int
-do_zoom_dso(struct hist_browser *browser, struct dso *dso)
+add_thread_opt(struct hist_browser *browser, struct popup_action *act,
+	       char **optstr, struct thread *thread)
 {
+	if (thread == NULL)
+		return 0;
+
+	if (asprintf(optstr, "Zoom %s %s(%d) thread",
+		     browser->hists->thread_filter ? "out of" : "into",
+		     thread->comm_set ? thread__comm_str(thread) : "",
+		     thread->tid) < 0)
+		return 0;
+
+	act->thread = thread;
+	act->fn = do_zoom_thread;
+	return 1;
+}
+
+static int
+do_zoom_dso(struct hist_browser *browser, struct popup_action *act)
+{
+	struct dso *dso = act->dso;
+
 	if (browser->hists->dso_filter) {
 		pstack__remove(browser->pstack, &browser->hists->dso_filter);
 		perf_hpp__set_elide(HISTC_DSO, false);
@@ -1479,25 +1526,58 @@ do_zoom_dso(struct hist_browser *browser, struct dso *dso)
 }
 
 static int
-do_browse_map(struct hist_browser *browser __maybe_unused, struct map *map)
+add_dso_opt(struct hist_browser *browser, struct popup_action *act,
+	    char **optstr, struct dso *dso)
 {
-	map__browse(map);
+	if (dso == NULL)
+		return 0;
+
+	if (asprintf(optstr, "Zoom %s %s DSO",
+		     browser->hists->dso_filter ? "out of" : "into",
+		     dso->kernel ? "the Kernel" : dso->short_name) < 0)
+		return 0;
+
+	act->dso = dso;
+	act->fn = do_zoom_dso;
+	return 1;
+}
+
+static int
+do_browse_map(struct hist_browser *browser __maybe_unused,
+	      struct popup_action *act)
+{
+	map__browse(act->ms.map);
 	return 0;
 }
 
+static int
+add_map_opt(struct hist_browser *browser __maybe_unused,
+	    struct popup_action *act, char **optstr, struct map *map)
+{
+	if (map == NULL)
+		return 0;
+
+	if (asprintf(optstr, "Browse map details") < 0)
+		return 0;
+
+	act->ms.map = map;
+	act->fn = do_browse_map;
+	return 1;
+}
+
 static int
 do_run_script(struct hist_browser *browser __maybe_unused,
-	      struct thread *thread, struct symbol *sym)
+	      struct popup_action *act)
 {
 	char script_opt[64];
 	memset(script_opt, 0, sizeof(script_opt));
 
-	if (thread) {
+	if (act->thread) {
 		scnprintf(script_opt, sizeof(script_opt), " -c %s ",
-			  thread__comm_str(thread));
-	} else if (sym) {
+			  thread__comm_str(act->thread));
+	} else if (act->ms.sym) {
 		scnprintf(script_opt, sizeof(script_opt), " -S %s ",
-			  sym->name);
+			  act->ms.sym->name);
 	}
 
 	script_browse(script_opt);
@@ -1505,17 +1585,74 @@ do_run_script(struct hist_browser *browser __maybe_unused,
 }
 
 static int
-do_switch_data(struct hist_browser *browser __maybe_unused, int key)
+add_script_opt(struct hist_browser *browser __maybe_unused,
+	       struct popup_action *act, char **optstr,
+	       struct thread *thread, struct symbol *sym)
+{
+	if (thread) {
+		if (asprintf(optstr, "Run scripts for samples of thread [%s]",
+			     thread__comm_str(thread)) < 0)
+			return 0;
+	} else if (sym) {
+		if (asprintf(optstr, "Run scripts for samples of symbol [%s]",
+			     sym->name) < 0)
+			return 0;
+	} else {
+		if (asprintf(optstr, "Run scripts for all samples") < 0)
+			return 0;
+	}
+
+	act->thread = thread;
+	act->ms.sym = sym;
+	act->fn = do_run_script;
+	return 1;
+}
+
+static int
+do_switch_data(struct hist_browser *browser __maybe_unused,
+	       struct popup_action *act __maybe_unused)
 {
 	if (switch_data_file()) {
 		ui__warning("Won't switch the data files due to\n"
 			    "no valid data file get selected!\n");
-		return key;
+		return 0;
 	}
 
 	return K_SWITCH_INPUT_DATA;
 }
 
+static int
+add_switch_opt(struct hist_browser *browser,
+	       struct popup_action *act, char **optstr)
+{
+	if (!is_report_browser(browser->hbt))
+		return 0;
+
+	if (asprintf(optstr, "Switch to another data file in PWD") < 0)
+		return 0;
+
+	act->fn = do_switch_data;
+	return 1;
+}
+
+static int
+do_exit_browser(struct hist_browser *browser __maybe_unused,
+		struct popup_action *act __maybe_unused)
+{
+	return 0;
+}
+
+static int
+add_exit_opt(struct hist_browser *browser __maybe_unused,
+	     struct popup_action *act, char **optstr)
+{
+	if (asprintf(optstr, "Exit") < 0)
+		return 0;
+
+	act->fn = do_exit_browser;
+	return 1;
+}
+
 static void hist_browser__update_nr_entries(struct hist_browser *hb)
 {
 	u64 nr_entries = 0;
@@ -1546,6 +1683,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	struct branch_info *bi;
 #define MAX_OPTIONS  16
 	char *options[MAX_OPTIONS];
+	struct popup_action actions[MAX_OPTIONS];
 	int nr_options = 0;
 	int key = -1;
 	char buf[64];
@@ -1600,6 +1738,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	ui_helpline__push(helpline);
 
 	memset(options, 0, sizeof(options));
+	memset(actions, 0, sizeof(actions));
 
 	perf_hpp__for_each_format(fmt)
 		perf_hpp__reset_width(fmt, hists);
@@ -1610,12 +1749,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	while (1) {
 		struct thread *thread = NULL;
 		struct dso *dso = NULL;
-		struct map_symbol ms;
-		int choice = 0,
-		    annotate = -2, zoom_dso = -2, zoom_thread = -2,
-		    annotate_f = -2, annotate_t = -2, browse_map = -2;
-		int scripts_comm = -2, scripts_symbol = -2,
-		    scripts_all = -2, switch_data = -2;
+		int choice = 0;
 
 		nr_options = 0;
 
@@ -1648,22 +1782,23 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			    browser->selection->map->dso->annotate_warned)
 				continue;
 
-			ms.map = browser->selection->map;
-			ms.sym = browser->selection->sym;
-
-			do_annotate(browser, &ms);
+			actions->ms.map = browser->selection->map;
+			actions->ms.sym = browser->selection->sym;
+			do_annotate(browser, actions);
 			continue;
 		case 'P':
 			hist_browser__dump(browser);
 			continue;
 		case 'd':
-			do_zoom_dso(browser, dso);
+			actions->dso = dso;
+			do_zoom_dso(browser, actions);
 			continue;
 		case 'V':
 			browser->show_dso = !browser->show_dso;
 			continue;
 		case 't':
-			do_zoom_thread(browser, thread);
+			actions->thread = thread;
+			do_zoom_thread(browser, actions);
 			continue;
 		case '/':
 			if (ui_browser__input_window("Symbol to show",
@@ -1676,12 +1811,15 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			}
 			continue;
 		case 'r':
-			if (is_report_browser(hbt))
-				do_run_script(browser, NULL, NULL);
+			if (is_report_browser(hbt)) {
+				actions->thread = NULL;
+				actions->ms.sym = NULL;
+				do_run_script(browser, actions);
+			}
 			continue;
 		case 's':
 			if (is_report_browser(hbt)) {
-				key = do_switch_data(browser, key);
+				key = do_switch_data(browser, actions);
 				if (key == K_SWITCH_INPUT_DATA)
 					goto out_free_stack;
 			}
@@ -1762,129 +1900,65 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			if (bi == NULL)
 				goto skip_annotation;
 
-			if (bi->from.sym != NULL &&
-			    !bi->from.map->dso->annotate_warned &&
-			    asprintf(&options[nr_options], "Annotate %s", bi->from.sym->name) > 0) {
-				annotate_f = nr_options++;
-			}
-
-			if (bi->to.sym != NULL &&
-			    !bi->to.map->dso->annotate_warned &&
-			    (bi->to.sym != bi->from.sym ||
-			     bi->to.map->dso != bi->from.map->dso) &&
-			    asprintf(&options[nr_options], "Annotate %s", bi->to.sym->name) > 0) {
-				annotate_t = nr_options++;
-			}
+			nr_options += add_annotate_opt(browser,
+						       &actions[nr_options],
+						       &options[nr_options],
+						       bi->from.map,
+						       bi->from.sym);
+			if (bi->to.sym != bi->from.sym)
+				nr_options += add_annotate_opt(browser,
+							&actions[nr_options],
+							&options[nr_options],
+							bi->to.map,
+							bi->to.sym);
 		} else {
-			if (browser->selection->sym != NULL &&
-			    !browser->selection->map->dso->annotate_warned) {
-				struct annotation *notes;
-
-				notes = symbol__annotation(browser->selection->sym);
-
-				if (notes->src &&
-				    asprintf(&options[nr_options], "Annotate %s",
-						 browser->selection->sym->name) > 0) {
-					annotate = nr_options++;
-				}
-			}
+			nr_options += add_annotate_opt(browser,
+						       &actions[nr_options],
+						       &options[nr_options],
+						       browser->selection->map,
+						       browser->selection->sym);
 		}
 skip_annotation:
-		if (thread != NULL &&
-		    asprintf(&options[nr_options], "Zoom %s %s(%d) thread",
-			     (browser->hists->thread_filter ? "out of" : "into"),
-			     (thread->comm_set ? thread__comm_str(thread) : ""),
-			     thread->tid) > 0)
-			zoom_thread = nr_options++;
-
-		if (dso != NULL &&
-		    asprintf(&options[nr_options], "Zoom %s %s DSO",
-			     (browser->hists->dso_filter ? "out of" : "into"),
-			     (dso->kernel ? "the Kernel" : dso->short_name)) > 0)
-			zoom_dso = nr_options++;
-
-		if (browser->selection != NULL &&
-		    browser->selection->map != NULL &&
-		    asprintf(&options[nr_options], "Browse map details") > 0)
-			browse_map = nr_options++;
+		nr_options += add_thread_opt(browser, &actions[nr_options],
+					     &options[nr_options], thread);
+		nr_options += add_dso_opt(browser, &actions[nr_options],
+					  &options[nr_options], dso);
+		nr_options += add_map_opt(browser, &actions[nr_options],
+					  &options[nr_options],
+					  browser->selection->map);
 
 		/* perf script support */
 		if (browser->he_selection) {
-			struct symbol *sym;
-
-			if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]",
-				     thread__comm_str(browser->he_selection->thread)) > 0)
-				scripts_comm = nr_options++;
-
-			sym = browser->he_selection->ms.sym;
-			if (sym && sym->namelen &&
-				asprintf(&options[nr_options], "Run scripts for samples of symbol [%s]",
-						sym->name) > 0)
-				scripts_symbol = nr_options++;
+			nr_options += add_script_opt(browser,
+						     &actions[nr_options],
+						     &options[nr_options],
+						     thread, NULL);
+			nr_options += add_script_opt(browser,
+						     &actions[nr_options],
+						     &options[nr_options],
+						     NULL, browser->selection->sym);
 		}
-
-		if (asprintf(&options[nr_options], "Run scripts for all samples") > 0)
-			scripts_all = nr_options++;
-
-		if (is_report_browser(hbt) && asprintf(&options[nr_options],
-				"Switch to another data file in PWD") > 0)
-			switch_data = nr_options++;
+		nr_options += add_script_opt(browser, &actions[nr_options],
+					     &options[nr_options], NULL, NULL);
+		nr_options += add_switch_opt(browser, &actions[nr_options],
+					     &options[nr_options]);
 add_exit_option:
-		if (asprintf(&options[nr_options], "Exit") > 0)
-			nr_options++;
-retry_popup_menu:
-		choice = ui__popup_menu(nr_options, options);
+		nr_options += add_exit_opt(browser, &actions[nr_options],
+					   &options[nr_options]);
 
-		if (choice == nr_options - 1)
-			break;
+		do {
+			struct popup_action *act;
 
-		if (choice == -1) {
-			free_popup_options(options, nr_options - 1);
-			continue;
-		}
-
-		if (choice == annotate || choice == annotate_t || choice == annotate_f) {
-			struct hist_entry *he;
-
-			he = hist_browser__selected_entry(browser);
-			if (he == NULL)
-				continue;
-
-			if (choice == annotate_f) {
-				ms.map = he->branch_info->from.map;
-				ms.sym = he->branch_info->from.sym;
-			} else if (choice == annotate_t) {
-				ms.map = he->branch_info->to.map;
-				ms.sym = he->branch_info->to.sym;
-			} else {
-				ms = *browser->selection;
-			}
-
-			if (do_annotate(browser, &ms) == 1)
-				goto retry_popup_menu;
-		} else if (choice == browse_map) {
-			do_browse_map(browser, browser->selection->map);
-		} else if (choice == zoom_dso) {
-			do_zoom_dso(browser, dso);
-		} else if (choice == zoom_thread) {
-			do_zoom_thread(browser, thread);
-		}
-		/* perf scripts support */
-		else if (choice == scripts_all || choice == scripts_comm ||
-				choice == scripts_symbol) {
-			if (choice == scripts_comm)
-				do_run_script(browser, browser->he_selection->thread, NULL);
-			if (choice == scripts_symbol)
-				do_run_script(browser, NULL, browser->he_selection->ms.sym);
-			if (choice == scripts_all)
-				do_run_script(browser, NULL, NULL);
-		}
-		/* Switch to another data file */
-		else if (choice == switch_data) {
-			key = do_switch_data(browser, key);
-			if (key == K_SWITCH_INPUT_DATA)
+			choice = ui__popup_menu(nr_options, options);
+			if (choice == -1 || choice >= nr_options)
 				break;
-		}
+
+			act = &actions[choice];
+			key = act->fn(browser, act);
+		} while (key == 1);
+
+		if (key == K_SWITCH_INPUT_DATA)
+			break;
 	}
 out_free_stack:
 	pstack__delete(browser->pstack);

From c8539e3fc630067020814657636b45095edfb5bb Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 24 Apr 2015 10:15:32 +0900
Subject: [PATCH 098/305] perf tools: Introduce pstack_peek()

The pstack_peek() is to get the topmost entry without removing it.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429838133-14001-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pstack.c | 7 +++++++
 tools/perf/util/pstack.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c
index a126e6cc6e73..b234a6e3d0d4 100644
--- a/tools/perf/util/pstack.c
+++ b/tools/perf/util/pstack.c
@@ -74,3 +74,10 @@ void *pstack__pop(struct pstack *pstack)
 	pstack->entries[pstack->top] = NULL;
 	return ret;
 }
+
+void *pstack__peek(struct pstack *pstack)
+{
+	if (pstack->top == 0)
+		return NULL;
+	return pstack->entries[pstack->top - 1];
+}
diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h
index c3cb6584d527..ded7f2e36624 100644
--- a/tools/perf/util/pstack.h
+++ b/tools/perf/util/pstack.h
@@ -10,5 +10,6 @@ bool pstack__empty(const struct pstack *pstack);
 void pstack__remove(struct pstack *pstack, void *key);
 void pstack__push(struct pstack *pstack, void *key);
 void *pstack__pop(struct pstack *pstack);
+void *pstack__peek(struct pstack *pstack);
 
 #endif /* _PERF_PSTACK_ */

From 6422184b087ff4355951d72e0bb320f52e107185 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 24 Apr 2015 10:15:33 +0900
Subject: [PATCH 099/305] perf hists browser: Simplify zooming code using
 pstack_peek()

Now LEFT key press action can just use do_zoom_dso/thread() code to get
out of the current filter.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429838133-14001-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 9bd7b38de64c..8733d577db78 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1860,19 +1860,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 					goto out_free_stack;
 				continue;
 			}
-			top = pstack__pop(browser->pstack);
+			top = pstack__peek(browser->pstack);
 			if (top == &browser->hists->dso_filter) {
-				perf_hpp__set_elide(HISTC_DSO, false);
-				browser->hists->dso_filter = NULL;
-				hists__filter_by_dso(browser->hists);
+				/*
+				 * No need to set actions->dso here since
+				 * it's just to remove the current filter.
+				 * Ditto for thread below.
+				 */
+				do_zoom_dso(browser, actions);
 			}
-			if (top == &browser->hists->thread_filter) {
-				perf_hpp__set_elide(HISTC_THREAD, false);
-				thread__zput(browser->hists->thread_filter);
-				hists__filter_by_thread(browser->hists);
-			}
-			ui_helpline__pop();
-			hist_browser__reset(browser);
+			if (top == &browser->hists->thread_filter)
+				do_zoom_thread(browser, actions);
 			continue;
 		}
 		case K_ESC:

From 3698dab1c849c7e1cd440df4fca24baa1973d53b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 5 May 2015 23:55:46 +0900
Subject: [PATCH 100/305] perf tools: Move TUI-specific fields out of
 map_symbol

The has_children and unfolded fields don't belong to the struct
map_symbol since they're used by the TUI only.  Move those fields out of
map_symbol since the struct is also used by other places.

This will also help to compact the sizeof struct hist_entry.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429687101-4360-11-git-send-email-namhyung@kernel.org
Link: http://lkml.kernel.org/r/1430837746-5439-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 79 ++++++++++++++++++++--------------
 tools/perf/util/callchain.h    |  4 ++
 tools/perf/util/hist.c         |  2 +-
 tools/perf/util/sort.h         |  2 +
 tools/perf/util/symbol.h       |  2 -
 5 files changed, 54 insertions(+), 35 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 8733d577db78..f981cb8f0158 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -63,7 +63,7 @@ static int hist_browser__get_folding(struct hist_browser *browser)
 		struct hist_entry *he =
 			rb_entry(nd, struct hist_entry, rb_node);
 
-		if (he->ms.unfolded)
+		if (he->unfolded)
 			unfolded_rows += he->nr_rows;
 	}
 	return unfolded_rows;
@@ -139,24 +139,19 @@ static char tree__folded_sign(bool unfolded)
 	return unfolded ? '-' : '+';
 }
 
-static char map_symbol__folded(const struct map_symbol *ms)
-{
-	return ms->has_children ? tree__folded_sign(ms->unfolded) : ' ';
-}
-
 static char hist_entry__folded(const struct hist_entry *he)
 {
-	return map_symbol__folded(&he->ms);
+	return he->has_children ? tree__folded_sign(he->unfolded) : ' ';
 }
 
 static char callchain_list__folded(const struct callchain_list *cl)
 {
-	return map_symbol__folded(&cl->ms);
+	return cl->has_children ? tree__folded_sign(cl->unfolded) : ' ';
 }
 
-static void map_symbol__set_folding(struct map_symbol *ms, bool unfold)
+static void callchain_list__set_folding(struct callchain_list *cl, bool unfold)
 {
-	ms->unfolded = unfold ? ms->has_children : false;
+	cl->unfolded = unfold ? cl->has_children : false;
 }
 
 static int callchain_node__count_rows_rb_tree(struct callchain_node *node)
@@ -192,7 +187,7 @@ static int callchain_node__count_rows(struct callchain_node *node)
 
 	list_for_each_entry(chain, &node->val, list) {
 		++n;
-		unfolded = chain->ms.unfolded;
+		unfolded = chain->unfolded;
 	}
 
 	if (unfolded)
@@ -214,15 +209,27 @@ static int callchain__count_rows(struct rb_root *chain)
 	return n;
 }
 
-static bool map_symbol__toggle_fold(struct map_symbol *ms)
+static bool hist_entry__toggle_fold(struct hist_entry *he)
 {
-	if (!ms)
+	if (!he)
 		return false;
 
-	if (!ms->has_children)
+	if (!he->has_children)
 		return false;
 
-	ms->unfolded = !ms->unfolded;
+	he->unfolded = !he->unfolded;
+	return true;
+}
+
+static bool callchain_list__toggle_fold(struct callchain_list *cl)
+{
+	if (!cl)
+		return false;
+
+	if (!cl->has_children)
+		return false;
+
+	cl->unfolded = !cl->unfolded;
 	return true;
 }
 
@@ -238,10 +245,10 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *no
 		list_for_each_entry(chain, &child->val, list) {
 			if (first) {
 				first = false;
-				chain->ms.has_children = chain->list.next != &child->val ||
+				chain->has_children = chain->list.next != &child->val ||
 							 !RB_EMPTY_ROOT(&child->rb_root);
 			} else
-				chain->ms.has_children = chain->list.next == &child->val &&
+				chain->has_children = chain->list.next == &child->val &&
 							 !RB_EMPTY_ROOT(&child->rb_root);
 		}
 
@@ -255,11 +262,11 @@ static void callchain_node__init_have_children(struct callchain_node *node,
 	struct callchain_list *chain;
 
 	chain = list_entry(node->val.next, struct callchain_list, list);
-	chain->ms.has_children = has_sibling;
+	chain->has_children = has_sibling;
 
 	if (!list_empty(&node->val)) {
 		chain = list_entry(node->val.prev, struct callchain_list, list);
-		chain->ms.has_children = !RB_EMPTY_ROOT(&node->rb_root);
+		chain->has_children = !RB_EMPTY_ROOT(&node->rb_root);
 	}
 
 	callchain_node__init_have_children_rb_tree(node);
@@ -279,7 +286,7 @@ static void callchain__init_have_children(struct rb_root *root)
 static void hist_entry__init_have_children(struct hist_entry *he)
 {
 	if (!he->init_have_children) {
-		he->ms.has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
+		he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain);
 		callchain__init_have_children(&he->sorted_chain);
 		he->init_have_children = true;
 	}
@@ -287,14 +294,22 @@ static void hist_entry__init_have_children(struct hist_entry *he)
 
 static bool hist_browser__toggle_fold(struct hist_browser *browser)
 {
-	if (map_symbol__toggle_fold(browser->selection)) {
-		struct hist_entry *he = browser->he_selection;
+	struct hist_entry *he = browser->he_selection;
+	struct map_symbol *ms = browser->selection;
+	struct callchain_list *cl = container_of(ms, struct callchain_list, ms);
+	bool has_children;
 
+	if (ms == &he->ms)
+		has_children = hist_entry__toggle_fold(he);
+	else
+		has_children = callchain_list__toggle_fold(cl);
+
+	if (has_children) {
 		hist_entry__init_have_children(he);
 		browser->b.nr_entries -= he->nr_rows;
 		browser->nr_callchain_rows -= he->nr_rows;
 
-		if (he->ms.unfolded)
+		if (he->unfolded)
 			he->nr_rows = callchain__count_rows(&he->sorted_chain);
 		else
 			he->nr_rows = 0;
@@ -321,8 +336,8 @@ static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool
 
 		list_for_each_entry(chain, &child->val, list) {
 			++n;
-			map_symbol__set_folding(&chain->ms, unfold);
-			has_children = chain->ms.has_children;
+			callchain_list__set_folding(chain, unfold);
+			has_children = chain->has_children;
 		}
 
 		if (has_children)
@@ -340,8 +355,8 @@ static int callchain_node__set_folding(struct callchain_node *node, bool unfold)
 
 	list_for_each_entry(chain, &node->val, list) {
 		++n;
-		map_symbol__set_folding(&chain->ms, unfold);
-		has_children = chain->ms.has_children;
+		callchain_list__set_folding(chain, unfold);
+		has_children = chain->has_children;
 	}
 
 	if (has_children)
@@ -366,9 +381,9 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold)
 static void hist_entry__set_folding(struct hist_entry *he, bool unfold)
 {
 	hist_entry__init_have_children(he);
-	map_symbol__set_folding(&he->ms, unfold);
+	he->unfolded = unfold ? he->has_children : false;
 
-	if (he->ms.has_children) {
+	if (he->has_children) {
 		int n = callchain__set_folding(&he->sorted_chain, unfold);
 		he->nr_rows = unfold ? n : 0;
 	} else
@@ -1019,7 +1034,7 @@ do_offset:
 	if (offset > 0) {
 		do {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->ms.unfolded) {
+			if (h->unfolded) {
 				u16 remaining = h->nr_rows - h->row_offset;
 				if (offset > remaining) {
 					offset -= remaining;
@@ -1040,7 +1055,7 @@ do_offset:
 	} else if (offset < 0) {
 		while (1) {
 			h = rb_entry(nd, struct hist_entry, rb_node);
-			if (h->ms.unfolded) {
+			if (h->unfolded) {
 				if (first) {
 					if (-offset > h->row_offset) {
 						offset += h->row_offset;
@@ -1077,7 +1092,7 @@ do_offset:
 				 * row_offset at its last entry.
 				 */
 				h = rb_entry(nd, struct hist_entry, rb_node);
-				if (h->ms.unfolded)
+				if (h->unfolded)
 					h->row_offset = h->nr_rows;
 				break;
 			}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 6033a0a212ca..679c2c6d8ade 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -72,6 +72,10 @@ extern struct callchain_param callchain_param;
 struct callchain_list {
 	u64			ip;
 	struct map_symbol	ms;
+	struct /* for TUI */ {
+		bool		unfolded;
+		bool		has_children;
+	};
 	char		       *srcline;
 	struct list_head	list;
 };
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index cc22b9158b93..338770679863 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -1163,7 +1163,7 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h
 		return;
 
 	/* force fold unfiltered entry for simplicity */
-	h->ms.unfolded = false;
+	h->unfolded = false;
 	h->row_offset = 0;
 	h->nr_rows = 0;
 
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 4d923e6e0069..e97cd476d336 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -109,6 +109,8 @@ struct hist_entry {
 			u16	row_offset;
 			u16	nr_rows;
 			bool	init_have_children;
+			bool	unfolded;
+			bool	has_children;
 		};
 	};
 	char			*srcline;
diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h
index 90965296c129..bef47ead1d9b 100644
--- a/tools/perf/util/symbol.h
+++ b/tools/perf/util/symbol.h
@@ -158,8 +158,6 @@ struct ref_reloc_sym {
 struct map_symbol {
 	struct map    *map;
 	struct symbol *sym;
-	bool	      unfolded;
-	bool	      has_children;
 };
 
 struct addr_map_symbol {

From ff303e66c240ba6269e31817a386995440a18c99 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 17 Apr 2015 20:05:30 +0200
Subject: [PATCH 101/305] perf: Fix software migrate events

Stephane asked about PERF_COUNT_SW_CPU_MIGRATIONS and I realized it
was borken:

 > The problem is that the task isn't actually scheduled while its being
 > migrated (obviously), and if its not scheduled, the counters aren't
 > scheduled either, so there's no observing of the fact.
 >
 > A further problem with migrations is that many migrations happen from
 > softirq context, which is nested inside the 'random' task context of
 > whoemever happens to run at that time, similarly for the wakeup
 > migrations triggered from (soft)irq context. All those end up being
 > accounted in the task that's currently running, eg. your 'ls'.

The below cures this by marking a task as migrated and accounting it
on the subsequent sched_in().

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 24 ++++++++++++++++++++++++
 include/linux/sched.h      |  7 ++++---
 kernel/sched/core.c        |  2 +-
 3 files changed, 29 insertions(+), 4 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 61992cf2e977..e86f85abeda7 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -798,11 +798,33 @@ perf_sw_event_sched(u32 event_id, u64 nr, u64 addr)
 
 extern struct static_key_deferred perf_sched_events;
 
+static __always_inline bool
+perf_sw_migrate_enabled(void)
+{
+	if (static_key_false(&perf_swevent_enabled[PERF_COUNT_SW_CPU_MIGRATIONS]))
+		return true;
+	return false;
+}
+
+static inline void perf_event_task_migrate(struct task_struct *task)
+{
+	if (perf_sw_migrate_enabled())
+		task->sched_migrated = 1;
+}
+
 static inline void perf_event_task_sched_in(struct task_struct *prev,
 					    struct task_struct *task)
 {
 	if (static_key_false(&perf_sched_events.key))
 		__perf_event_task_sched_in(prev, task);
+
+	if (perf_sw_migrate_enabled() && task->sched_migrated) {
+		struct pt_regs *regs = this_cpu_ptr(&__perf_regs[0]);
+
+		perf_fetch_caller_regs(regs);
+		___perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 1, regs, 0);
+		task->sched_migrated = 0;
+	}
 }
 
 static inline void perf_event_task_sched_out(struct task_struct *prev,
@@ -925,6 +947,8 @@ perf_aux_output_skip(struct perf_output_handle *handle,
 static inline void *
 perf_get_aux(struct perf_output_handle *handle)				{ return NULL; }
 static inline void
+perf_event_task_migrate(struct task_struct *task)			{ }
+static inline void
 perf_event_task_sched_in(struct task_struct *prev,
 			 struct task_struct *task)			{ }
 static inline void
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 26a2e6122734..2c5e6c3db654 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1356,9 +1356,6 @@ struct task_struct {
 #endif
 
 	struct mm_struct *mm, *active_mm;
-#ifdef CONFIG_COMPAT_BRK
-	unsigned brk_randomized:1;
-#endif
 	/* per-thread vma caching */
 	u32 vmacache_seqnum;
 	struct vm_area_struct *vmacache[VMACACHE_SIZE];
@@ -1381,10 +1378,14 @@ struct task_struct {
 	/* Revert to default priority/policy when forking */
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
+	unsigned sched_migrated:1;
 
 #ifdef CONFIG_MEMCG_KMEM
 	unsigned memcg_kmem_skip_account:1;
 #endif
+#ifdef CONFIG_COMPAT_BRK
+	unsigned brk_randomized:1;
+#endif
 
 	unsigned long atomic_flags; /* Flags needing atomic access. */
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fe22f7510bce..8652fd540780 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1049,7 +1049,7 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
 		if (p->sched_class->migrate_task_rq)
 			p->sched_class->migrate_task_rq(p, new_cpu);
 		p->se.nr_migrations++;
-		perf_sw_event_sched(PERF_COUNT_SW_CPU_MIGRATIONS, 1, 0);
+		perf_event_task_migrate(p);
 	}
 
 	__set_task_cpu(p, new_cpu);

From cb307113746b4d184155d2c412e8069aeaa60d42 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Mon, 4 May 2015 16:26:39 +1000
Subject: [PATCH 102/305] perf_event: Don't allow vmalloc() backed perf on
 powerpc

On powerpc the perf event interrupt is not masked when interrupts are
disabled, allowing it to function as an NMI.

This causes problems if perf is using vmalloc. If we take a page fault
on the vmalloc region the fault handler will fail the page fault because
it detects we are coming in from an NMI (see do_hash_page()).

We don't actually need or want vmalloc backed perf so just disable it on
powerpc.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <linuxppc-dev@ozlabs.org>
Cc: Andrew Morton <akpm@osdl.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@ghostprotocols.net
Cc: sukadev@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/1430720799-18426-1-git-send-email-mpe@ellerman.id.au
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 init/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/init/Kconfig b/init/Kconfig
index dc24dec60232..81050e4d9941 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1637,7 +1637,7 @@ config PERF_EVENTS
 config DEBUG_PERF_USE_VMALLOC
 	default n
 	bool "Debug: use vmalloc to back perf mmap() buffers"
-	depends on PERF_EVENTS && DEBUG_KERNEL
+	depends on PERF_EVENTS && DEBUG_KERNEL && !PPC
 	select PERF_USE_VMALLOC
 	help
 	 Use vmalloc memory to back perf mmap() buffers.

From ae2cb1ac60758e99cec15e9edd68e0d22bfd310e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 May 2015 21:46:40 +0900
Subject: [PATCH 103/305] perf probe: Fix to close probe_events file in error

Fix perf-probe to close probe_events file if it failed to get existing
probe's name. This also fix the return error code to -ENOMEM.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: hemant@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20150506124640.4961.26062.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index abf5845a2acc..230353f1bea2 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2384,7 +2384,8 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 	namelist = get_probe_trace_event_names(fd, false);
 	if (!namelist) {
 		pr_debug("Failed to get current event list.\n");
-		return -EIO;
+		ret = -ENOMEM;
+		goto close_out;
 	}
 	/* Get kprobe blacklist if exists */
 	if (!pev->uprobes) {
@@ -2467,6 +2468,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 
 	kprobe_blacklist__delete(&blacklist);
 	strlist__delete(namelist);
+close_out:
 	close(fd);
 	return ret;
 }

From b8dc3984c1fce87a36d3247c9f722229692bec72 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 May 2015 21:46:42 +0900
Subject: [PATCH 104/305] perf probe: Fix a typo for the flags of open

Fix to pass O_APPEND by using bit-or with other flags, instead of
passing it as mode.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: hemant@linux.vnet.ibm.com
Link: http://lkml.kernel.org/r/20150506124642.4961.97878.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 230353f1bea2..63cb7c589105 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -1969,7 +1969,7 @@ static int open_probe_events(const char *trace_file, bool readwrite)
 	if (ret >= 0) {
 		pr_debug("Opening %s write=%d\n", buf, readwrite);
 		if (readwrite && !probe_event_dry_run)
-			ret = open(buf, O_RDWR, O_APPEND);
+			ret = open(buf, O_RDWR | O_APPEND, 0);
 		else
 			ret = open(buf, O_RDONLY, 0);
 

From 9bc9f3b6800e8de16f40a2da1d6ded3a391ea01a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 May 2015 21:46:45 +0900
Subject: [PATCH 105/305] perf probe: Fix to return 0 when positive value
 returned

Fix to return 0 when positive value returned from probe command.

At least --vars can returns a positive value if it found a point.
  ----
  # perf probe --vars vfs_read && echo succeeded! || echo failed!
  Available variables at vfs_read
          @<vfs_read+0>
                  char*   buf
                  loff_t* pos
                  size_t  count
                  struct file*    file
  failed!
  ----

This fixes above problem.
  ----
  # perf probe --vars vfs_read && echo succeeded! || echo failed!
  Available variables at vfs_read
          @<vfs_read+0>
                  char*   buf
                  loff_t* pos
                  size_t  count
                  struct file*    file
  succeeded!
  ----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150506124645.4961.56973.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 53d475b1422e..9c4cf5e5aad4 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -523,5 +523,5 @@ int cmd_probe(int argc, const char **argv, const char *prefix)
 		cleanup_params();
 	}
 
-	return ret;
+	return ret < 0 ? ret : 0;
 }

From 573709fdfd668423ba4202c4f1016e3cd7bdd134 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 May 2015 21:46:47 +0900
Subject: [PATCH 106/305] perf probe: Make --line checks validate C-style
 function name

Fix --line to check valid C-style function name and returns
a semantic error if it is not.

For example, previously, --line doesn't support lazy pattern
but it doesn't recognized as a semantic error.

  ----
  # perf probe -L 'func;return*:0-10'
  Specified source line is not found.
    Error: Failed to show lines.
  ----

With this patch, it is correctly handled as a semantic error.
  ----
  # perf probe -L 'func;return*:0-10'
  Semantic error :'func;return*' is not a valid function name.
  ...
  ----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150506124647.4961.99473.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 35 ++++++++++++++++++++---------------
 1 file changed, 20 insertions(+), 15 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 63cb7c589105..4265f2e3bd14 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -980,6 +980,18 @@ static int parse_line_num(char **ptr, int *val, const char *what)
 	return 0;
 }
 
+/* Check the name is good for event, group or function */
+static bool is_c_func_name(const char *name)
+{
+	if (!isalpha(*name) && *name != '_')
+		return false;
+	while (*++name != '\0') {
+		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
+			return false;
+	}
+	return true;
+}
+
 /*
  * Stuff 'lr' according to the line range described by 'arg'.
  * The line range syntax is described by:
@@ -1048,10 +1060,15 @@ int parse_line_range_desc(const char *arg, struct line_range *lr)
 			goto err;
 		}
 		lr->function = name;
-	} else if (strchr(name, '.'))
+	} else if (strchr(name, '/') || strchr(name, '.'))
 		lr->file = name;
-	else
+	else if (is_c_func_name(name))/* We reuse it for checking funcname */
 		lr->function = name;
+	else {	/* Invalid name */
+		semantic_error("'%s' is not a valid function name.\n", name);
+		err = -EINVAL;
+		goto err;
+	}
 
 	return 0;
 err:
@@ -1059,18 +1076,6 @@ err:
 	return err;
 }
 
-/* Check the name is good for event/group */
-static bool check_event_name(const char *name)
-{
-	if (!isalpha(*name) && *name != '_')
-		return false;
-	while (*++name != '\0') {
-		if (!isalpha(*name) && !isdigit(*name) && *name != '_')
-			return false;
-	}
-	return true;
-}
-
 /* Parse probepoint definition. */
 static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 {
@@ -1094,7 +1099,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev)
 			semantic_error("Group name is not supported yet.\n");
 			return -ENOTSUP;
 		}
-		if (!check_event_name(arg)) {
+		if (!is_c_func_name(arg)) {
 			semantic_error("%s is bad for event name -it must "
 				       "follow C symbol-naming rule.\n", arg);
 			return -EINVAL;

From 5a51fcd1f30c0f93bb54cec7201a3690032470cb Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 May 2015 21:46:49 +0900
Subject: [PATCH 107/305] perf probe: Skip kernel symbols which is out of .text

Skip the kernel symbols which is out of .text, e.g. the functions
in .inittext. Those are found in debuginfo/kallsyms, but already
freed from memory.

e.g.
  ----
  # perf probe vfs_caches_init
  vfs_caches_init+0 is out of .text, skip it.
  Probe point 'vfs_caches_init' not found.
    Error: Failed to add events.
  ----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150506124649.4961.56249.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 4265f2e3bd14..37a3a8b04a69 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -557,8 +557,9 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 					   bool uprobe)
 {
 	struct ref_reloc_sym *reloc_sym;
+	u64 etext_addr;
 	char *tmp;
-	int i;
+	int i, skipped = 0;
 
 	if (uprobe)
 		return add_exec_to_probe_trace_events(tevs, ntevs, module);
@@ -572,19 +573,29 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 		pr_warning("Relocated base symbol is not found!\n");
 		return -EINVAL;
 	}
+	/* Get the address of _etext for checking non-probable text symbol */
+	etext_addr = kernel_get_symbol_address_by_name("_etext", false);
 
 	for (i = 0; i < ntevs; i++) {
 		if (tevs[i].point.address && !tevs[i].point.retprobe) {
-			tmp = strdup(reloc_sym->name);
-			if (!tmp)
-				return -ENOMEM;
+			/* If we found a wrong one, mark it by NULL symbol */
+			if (etext_addr < tevs[i].point.address) {
+				pr_warning("%s+%lu is out of .text, skip it.\n",
+				   tevs[i].point.symbol, tevs[i].point.offset);
+				tmp = NULL;
+				skipped++;
+			} else {
+				tmp = strdup(reloc_sym->name);
+				if (!tmp)
+					return -ENOMEM;
+			}
 			free(tevs[i].point.symbol);
 			tevs[i].point.symbol = tmp;
 			tevs[i].point.offset = tevs[i].point.address -
 					       reloc_sym->unrelocated_addr;
 		}
 	}
-	return 0;
+	return skipped;
 }
 
 /* Try to find perf_probe_event with debuginfo */
@@ -630,11 +641,14 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 		pr_debug("Found %d probe_trace_events.\n", ntevs);
 		ret = post_process_probe_trace_events(*tevs, ntevs,
 							target, pev->uprobes);
-		if (ret < 0) {
+		if (ret < 0 || ret == ntevs) {
 			clear_probe_trace_events(*tevs, ntevs);
 			zfree(tevs);
 		}
-		return ret < 0 ? ret : ntevs;
+		if (ret != ntevs)
+			return ret < 0 ? ret : ntevs;
+		ntevs = 0;
+		/* Fall through */
 	}
 
 	if (ntevs == 0)	{	/* No error but failed to find probe point. */
@@ -2403,6 +2417,9 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 	pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
 	for (i = 0; i < ntevs; i++) {
 		tev = &tevs[i];
+		/* Skip if the symbol is out of .text (marked previously) */
+		if (!tev->point.symbol)
+			continue;
 		/* Ensure that the address is NOT blacklisted */
 		node = kprobe_blacklist__find_by_address(&blacklist,
 							 tev->point.address);

From f8bffbf1222a64336a81974fc25fe846656ac53e Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 6 May 2015 21:46:53 +0900
Subject: [PATCH 108/305] perf probe: Support $params special probe argument

$params is similar to $vars but matches only function parameters not
local variables.

Thus, this is useful for tracing function parameter changing or tracing
function call with parameters.

Testing it:

 # perf probe tcp_sendmsg '$params'
 Added new event:
  probe:tcp_sendmsg    (on tcp_sendmsg with $params)

 You can now use it in all perf tools, such as:

	perf record -e probe:tcp_sendmsg -aR sleep 1

 # perf probe -l
  probe:tcp_sendmsg    (on tcp_sendmsg@acme/git/linux/net/ipv4/tcp.c with iocb sk msg size)
 # perf record -a -e probe:*
 press some random letters to generate TCP (sshd) traffic...

 ^C[ perf record: Woken up 1 times to write data ]
 [ perf record: Captured and wrote 0.223 MB perf.data (6 samples) ]

 # perf script
   sshd 6385 [2] 3.907529: probe:tcp_sendmsg: iocb=0xffff8800ac4cfe70 sk=0xffff88042196c140 msg=0xffff8800ac4cfda8 size=0x24
   sshd 6385 [2] 4.138973: probe:tcp_sendmsg: iocb=0xffff8800ac4cfe70 sk=0xffff88042196c140 msg=0xffff8800ac4cfda8 size=0x24
   sshd 6385 [2] 4.378966: probe:tcp_sendmsg: iocb=0xffff8800ac4cfe70 sk=0xffff88042196c140 msg=0xffff8800ac4cfda8 size=0x24
   sshd 6385 [2] 4.603681: probe:tcp_sendmsg: iocb=0xffff8800ac4cfe70 sk=0xffff88042196c140 msg=0xffff8800ac4cfda8 size=0x24
   sshd 6385 [2] 4.818455: probe:tcp_sendmsg: iocb=0xffff8800ac4cfe70 sk=0xffff88042196c140 msg=0xffff8800ac4cfda8 size=0x24
   sshd 6385 [2] 5.043603: probe:tcp_sendmsg: iocb=0xffff8800ac4cfe70 sk=0xffff88042196c140 msg=0xffff8800ac4cfda8 size=0x24
 # cat /sys/kernel/debug/tracing/events/probe/tcp_sendmsg/format
 name: tcp_sendmsg
 ID: 1927
 format:
   field:unsigned short common_type;	offset:0;	size:2;	signed:0;
   field:unsigned char common_flags;	offset:2;	size:1;	signed:0;
   field:unsigned char common_preempt_count;	offset:3;	size:1;	signed:0;
   field:int common_pid;	offset:4;	size:4;	signed:1;

   field:unsigned long __probe_ip;	offset:8;	size:8;	signed:0;
   field:u64 iocb;	offset:16;	size:8;	signed:0;
   field:u64 sk;	offset:24;	size:8;	signed:0;
   field:u64 msg;	offset:32;	size:8;	signed:0;
   field:u64 size;	offset:40;	size:8;	signed:0;

 print fmt: "(%lx) iocb=0x%Lx sk=0x%Lx msg=0x%Lx size=0x%Lx", REC->__probe_ip, REC->iocb, REC->sk, REC->msg, REC->size
 #

 Do some system wide tracing of this probe + write syscalls:

 # perf trace -e write --ev probe:* --filter-pids 6385
  462.612 (0.010 ms): bash/19153 write(fd: 1</dev/pts/1>, buf: 0x7f7556c78000, count: 29               ) = 29
  462.701 (0.027 ms): sshd/19152 write(fd: 3<socket:[63117]>, buf: 0x7f78dd12e160, count: 68           ) ...
  462.701 (        ): probe:tcp_sendmsg:(ffffffff8163db30) iocb=0xffff8803ebec7e70 sk=0xffff88042196ab80 msg=0xffff8803ebec7da8 size=0x44)
  462.710 (0.035 ms): sshd/19152  ... [continued]: write()) = 68
  462.787 (0.009 ms): bash/19153 write(fd: 2</dev/pts/1>, buf: 0x7f7556c77000, count: 22               ) = 22
  462.865 (0.002 ms): sshd/19152 write(fd: 3<socket:[63117]>, buf: 0x7f78dd12e160, count: 68           ) ...
  462.865 (        ): probe:tcp_sendmsg:(ffffffff8163db30) iocb=0xffff8803ebec7e70 sk=0xffff88042196ab80 msg=0xffff8803ebec7da8 size=0x44)
  462.873 (0.010 ms): sshd/19152  ... [continued]: write()) = 68

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150506124653.4961.59806.stgit@localhost.localdomain
[ Add some examples to the changelog message showing how to use it ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-probe.txt |  2 +-
 tools/perf/util/probe-finder.c          | 29 ++++++++++++++-----------
 tools/perf/util/probe-finder.h          |  3 +++
 3 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index a272f2e9a1cf..ad3e355adc1f 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -151,7 +151,7 @@ Each probe argument follows below syntax.
  [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE]
 
 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.)
-'$vars' special argument is also available for NAME, it is expanded to the local variables which can access at given probe point.
+'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters.
 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type.
 
 On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid.
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index b5bf9d5efeaf..63d33893d853 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1087,6 +1087,7 @@ found:
 struct local_vars_finder {
 	struct probe_finder *pf;
 	struct perf_probe_arg *args;
+	bool vars;
 	int max_args;
 	int nargs;
 	int ret;
@@ -1101,7 +1102,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data)
 
 	tag = dwarf_tag(die_mem);
 	if (tag == DW_TAG_formal_parameter ||
-	    tag == DW_TAG_variable) {
+	    (tag == DW_TAG_variable && vf->vars)) {
 		if (convert_variable_location(die_mem, vf->pf->addr,
 					      vf->pf->fb_ops, &pf->sp_die,
 					      NULL) == 0) {
@@ -1127,26 +1128,28 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf,
 	Dwarf_Die die_mem;
 	int i;
 	int n = 0;
-	struct local_vars_finder vf = {.pf = pf, .args = args,
+	struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false,
 				.max_args = MAX_PROBE_ARGS, .ret = 0};
 
 	for (i = 0; i < pf->pev->nargs; i++) {
 		/* var never be NULL */
-		if (strcmp(pf->pev->args[i].var, "$vars") == 0) {
-			pr_debug("Expanding $vars into:");
-			vf.nargs = n;
-			/* Special local variables */
-			die_find_child(sc_die, copy_variables_cb, (void *)&vf,
-				       &die_mem);
-			pr_debug(" (%d)\n", vf.nargs - n);
-			if (vf.ret < 0)
-				return vf.ret;
-			n = vf.nargs;
-		} else {
+		if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0)
+			vf.vars = true;
+		else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) {
 			/* Copy normal argument */
 			args[n] = pf->pev->args[i];
 			n++;
+			continue;
 		}
+		pr_debug("Expanding %s into:", pf->pev->args[i].var);
+		vf.nargs = n;
+		/* Special local variables */
+		die_find_child(sc_die, copy_variables_cb, (void *)&vf,
+			       &die_mem);
+		pr_debug(" (%d)\n", vf.nargs - n);
+		if (vf.ret < 0)
+			return vf.ret;
+		n = vf.nargs;
 	}
 	return n;
 }
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index ebf8c8c81453..f53553d38845 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -10,6 +10,9 @@
 #define MAX_PROBES		 128
 #define MAX_PROBE_ARGS		 128
 
+#define PROBE_ARG_VARS		"$vars"
+#define PROBE_ARG_PARAMS	"$params"
+
 static inline int is_c_varname(const char *name)
 {
 	/* TODO */

From 361c564eeff4b78f1303b86e8e8f07fc547bd2c9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 30 Apr 2015 12:33:22 -0300
Subject: [PATCH 109/305] perf tools: Move x86 barrier.h stuff to
 tools/arch/x86/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Other aches will follow, each in a cset.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-vy6bqmsvm6puibpay2cy4wid@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/barrier.h | 28 ++++++++++++++++++++++++++++
 tools/include/asm/barrier.h          |  3 +++
 tools/perf/MANIFEST                  |  2 ++
 tools/perf/perf-sys.h                |  7 +------
 4 files changed, 34 insertions(+), 6 deletions(-)
 create mode 100644 tools/arch/x86/include/asm/barrier.h
 create mode 100644 tools/include/asm/barrier.h

diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h
new file mode 100644
index 000000000000..f366d8e550e4
--- /dev/null
+++ b/tools/arch/x86/include/asm/barrier.h
@@ -0,0 +1,28 @@
+#ifndef _TOOLS_LINUX_ASM_X86_BARRIER_H
+#define _TOOLS_LINUX_ASM_X86_BARRIER_H
+
+/*
+ * Copied from the Linux kernel sources, and also moving code
+ * out from tools/perf/perf-sys.h so as to make it be located
+ * in a place similar as in the kernel sources.
+ *
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#if defined(__i386__)
+/*
+ * Some non-Intel clones support out of order store. wmb() ceases to be a
+ * nop for these.
+ */
+#define mb()	asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define rmb()	asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#define wmb()	asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
+#elif defined(__x86_64__)
+#define mb() 	asm volatile("mfence":::"memory")
+#define rmb()	asm volatile("lfence":::"memory")
+#define wmb()	asm volatile("sfence" ::: "memory")
+#endif
+
+#endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
new file mode 100644
index 000000000000..9a55c12d6f41
--- /dev/null
+++ b/tools/include/asm/barrier.h
@@ -0,0 +1,3 @@
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/barrier.h"
+#endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 11ccbb22ea2b..594737a17b1b 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,5 @@
 tools/perf
+tools/arch/x86/include/asm/barrier.h
 tools/scripts
 tools/build
 tools/lib/traceevent
@@ -6,6 +7,7 @@ tools/lib/api
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
 tools/lib/util/find_next_bit.c
+tools/include/asm/barrier.h
 tools/include/asm/bug.h
 tools/include/asm-generic/bitops/arch_hweight.h
 tools/include/asm-generic/bitops/atomic.h
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 6ef68165c9db..781d441cffd7 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -6,11 +6,9 @@
 #include <sys/syscall.h>
 #include <linux/types.h>
 #include <linux/perf_event.h>
+#include <asm/barrier.h>
 
 #if defined(__i386__)
-#define mb()		asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define wmb()		asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
-#define rmb()		asm volatile("lock; addl $0,0(%%esp)" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	{"model name"}
 #ifndef __NR_perf_event_open
@@ -25,9 +23,6 @@
 #endif
 
 #if defined(__x86_64__)
-#define mb()		asm volatile("mfence" ::: "memory")
-#define wmb()		asm volatile("sfence" ::: "memory")
-#define rmb()		asm volatile("lfence" ::: "memory")
 #define cpu_relax()	asm volatile("rep; nop" ::: "memory");
 #define CPUINFO_PROC	{"model name"}
 #ifndef __NR_perf_event_open

From e43a19c9c2c30cf88ffafb8390a4c53400b2467e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 6 May 2015 18:35:20 -0300
Subject: [PATCH 110/305] perf tools: Move powerpc barrier.h stuff to
 tools/arch/powerpc/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-pk6f5x9vh8k2ebzhh9uj5wo2@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/powerpc/include/asm/barrier.h | 29 ++++++++++++++++++++++++
 tools/include/asm/barrier.h              |  2 ++
 tools/perf/MANIFEST                      |  1 +
 tools/perf/perf-sys.h                    |  3 ---
 4 files changed, 32 insertions(+), 3 deletions(-)
 create mode 100644 tools/arch/powerpc/include/asm/barrier.h

diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h
new file mode 100644
index 000000000000..b23aee8e6d90
--- /dev/null
+++ b/tools/arch/powerpc/include/asm/barrier.h
@@ -0,0 +1,29 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu>
+ */
+#ifndef _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+#define _TOOLS_LINUX_ASM_POWERPC_BARRIER_H
+
+/*
+ * Memory barrier.
+ * The sync instruction guarantees that all memory accesses initiated
+ * by this processor have been performed (with respect to all other
+ * mechanisms that access memory).  The eieio instruction is a barrier
+ * providing an ordering (separately) for (a) cacheable stores and (b)
+ * loads and stores to non-cacheable memory (e.g. I/O devices).
+ *
+ * mb() prevents loads and stores being reordered across this point.
+ * rmb() prevents loads being reordered across this point.
+ * wmb() prevents stores being reordered across this point.
+ *
+ * *mb() variants without smp_ prefix must order all types of memory
+ * operations with one another. sync is the only instruction sufficient
+ * to do this.
+ */
+#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+
+#endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 9a55c12d6f41..249f67238668 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -1,3 +1,5 @@
 #if defined(__i386__) || defined(__x86_64__)
 #include "../../arch/x86/include/asm/barrier.h"
+#elif defined(__powerpc__)
+#include "../../arch/powerpc/include/asm/barrier.h"
 #endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 594737a17b1b..4b3346e81d85 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,5 @@
 tools/perf
+tools/arch/powerpc/include/asm/barrier.h
 tools/arch/x86/include/asm/barrier.h
 tools/scripts
 tools/build
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 781d441cffd7..01c06954bf16 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -38,9 +38,6 @@
 
 #ifdef __powerpc__
 #include "../../arch/powerpc/include/uapi/asm/unistd.h"
-#define mb()		asm volatile ("sync" ::: "memory")
-#define wmb()		asm volatile ("sync" ::: "memory")
-#define rmb()		asm volatile ("sync" ::: "memory")
 #define CPUINFO_PROC	{"cpu"}
 #endif
 

From 07d207ac0c55a413c358153bd3012c5fa51a0554 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 6 May 2015 18:44:53 -0300
Subject: [PATCH 111/305] perf tools: Move s390 barrier.h stuff to
 tools/arch/s390/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-zv4x77074resrkl4ayzf5e7d@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/s390/include/asm/barrier.h | 30 +++++++++++++++++++++++++++
 tools/include/asm/barrier.h           |  2 ++
 tools/perf/MANIFEST                   |  1 +
 tools/perf/perf-sys.h                 |  3 ---
 4 files changed, 33 insertions(+), 3 deletions(-)
 create mode 100644 tools/arch/s390/include/asm/barrier.h

diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h
new file mode 100644
index 000000000000..f85141266b92
--- /dev/null
+++ b/tools/arch/s390/include/asm/barrier.h
@@ -0,0 +1,30 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright IBM Corp. 1999, 2009
+ *
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#ifndef __TOOLS_LINUX_ASM_BARRIER_H
+#define __TOOLS_LINUX_ASM_BARRIER_H
+
+/*
+ * Force strict CPU ordering.
+ * And yes, this is required on UP too when we're talking
+ * to devices.
+ */
+
+#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES
+/* Fast-BCR without checkpoint synchronization */
+#define __ASM_BARRIER "bcr 14,0\n"
+#else
+#define __ASM_BARRIER "bcr 15,0\n"
+#endif
+
+#define mb() do {  asm volatile(__ASM_BARRIER : : : "memory"); } while (0)
+
+#define rmb()				mb()
+#define wmb()				mb()
+
+#endif /* __TOOLS_LIB_ASM_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 249f67238668..7eb88946e072 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -2,4 +2,6 @@
 #include "../../arch/x86/include/asm/barrier.h"
 #elif defined(__powerpc__)
 #include "../../arch/powerpc/include/asm/barrier.h"
+#elif defined(__s390__)
+#include "../../arch/s390/include/asm/barrier.h"
 #endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 4b3346e81d85..00754afb4866 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,5 +1,6 @@
 tools/perf
 tools/arch/powerpc/include/asm/barrier.h
+tools/arch/s390/include/asm/barrier.h
 tools/arch/x86/include/asm/barrier.h
 tools/scripts
 tools/build
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 01c06954bf16..bf349ea3984f 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -42,9 +42,6 @@
 #endif
 
 #ifdef __s390__
-#define mb()		asm volatile("bcr 15,0" ::: "memory")
-#define wmb()		asm volatile("bcr 15,0" ::: "memory")
-#define rmb()		asm volatile("bcr 15,0" ::: "memory")
 #define CPUINFO_PROC	{"vendor_id"}
 #endif
 

From 5ac69737dc5e25fb1c26d18059c8e17ce2671d5d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 13:38:16 -0300
Subject: [PATCH 112/305] perf tools: Move barrier() definition to
 tools/include/linux/compiler.h

To make it generally accessible by other tools/ projects, also will be
used in the tools/arch/*/include/asm/barrier.h files that are being
introduced now.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-qnjdqwu3vcnt14vqmr6wu788@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/compiler.h | 4 ++++
 tools/perf/perf-sys.h          | 2 --
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h
index 88461f09cc86..f0e72674c52d 100644
--- a/tools/include/linux/compiler.h
+++ b/tools/include/linux/compiler.h
@@ -1,6 +1,10 @@
 #ifndef _TOOLS_LINUX_COMPILER_H_
 #define _TOOLS_LINUX_COMPILER_H_
 
+/* Optimization barrier */
+/* The "volatile" is due to gcc bugs */
+#define barrier() __asm__ __volatile__("": : :"memory")
+
 #ifndef __always_inline
 # define __always_inline	inline __attribute__((always_inline))
 #endif
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index bf349ea3984f..f44e53efd9a4 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -153,8 +153,6 @@
 #define CPUINFO_PROC    {"model name"}
 #endif
 
-#define barrier() asm volatile ("" ::: "memory")
-
 #ifndef cpu_relax
 #define cpu_relax() barrier()
 #endif

From c6e39db1d0284ff573f171b6754837d39b8a358d Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 17:04:06 -0300
Subject: [PATCH 113/305] tools: Adopt asm-generic/barrier.h

From the kernel's include/asm-generic/barrier.h, will be used by the
sh barrier.h implementation.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-emjznw0rjsmfyx2wfixss1gv@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/asm-generic/barrier.h | 44 +++++++++++++++++++++++++++++
 tools/perf/MANIFEST                 |  1 +
 2 files changed, 45 insertions(+)
 create mode 100644 tools/include/asm-generic/barrier.h

diff --git a/tools/include/asm-generic/barrier.h b/tools/include/asm-generic/barrier.h
new file mode 100644
index 000000000000..47b933903eaf
--- /dev/null
+++ b/tools/include/asm-generic/barrier.h
@@ -0,0 +1,44 @@
+/*
+ * Copied from the kernel sources to tools/perf/:
+ *
+ * Generic barrier definitions, originally based on MN10300 definitions.
+ *
+ * It should be possible to use these on really simple architectures,
+ * but it serves more as a starting point for new ports.
+ *
+ * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+#ifndef __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+#define __TOOLS_LINUX_ASM_GENERIC_BARRIER_H
+
+#ifndef __ASSEMBLY__
+
+#include <linux/compiler.h>
+
+/*
+ * Force strict CPU ordering. And yes, this is required on UP too when we're
+ * talking to devices.
+ *
+ * Fall back to compiler barriers if nothing better is provided.
+ */
+
+#ifndef mb
+#define mb()	barrier()
+#endif
+
+#ifndef rmb
+#define rmb()	mb()
+#endif
+
+#ifndef wmb
+#define wmb()	mb()
+#endif
+
+#endif /* !__ASSEMBLY__ */
+#endif /* __TOOLS_LINUX_ASM_GENERIC_BARRIER_H */
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 00754afb4866..ac62cafa62ec 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -11,6 +11,7 @@ tools/lib/symbol/kallsyms.h
 tools/lib/util/find_next_bit.c
 tools/include/asm/barrier.h
 tools/include/asm/bug.h
+tools/include/asm-generic/barrier.h
 tools/include/asm-generic/bitops/arch_hweight.h
 tools/include/asm-generic/bitops/atomic.h
 tools/include/asm-generic/bitops/const_hweight.h

From 827634added7f38b7d724cab1dccdb2b004c13c3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 17:09:48 -0300
Subject: [PATCH 114/305] perf tools: Move sh barrier.h stuff to
 tools/arch/sh/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-6xqb97k782wqp1r3v6jqayki@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/sh/include/asm/barrier.h | 32 +++++++++++++++++++++++++++++
 tools/include/asm/barrier.h         |  2 ++
 tools/perf/MANIFEST                 |  1 +
 tools/perf/perf-sys.h               |  9 --------
 4 files changed, 35 insertions(+), 9 deletions(-)
 create mode 100644 tools/arch/sh/include/asm/barrier.h

diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h
new file mode 100644
index 000000000000..c18fd7599b97
--- /dev/null
+++ b/tools/arch/sh/include/asm/barrier.h
@@ -0,0 +1,32 @@
+/*
+ * Copied from the kernel sources:
+ *
+ * Copyright (C) 1999, 2000  Niibe Yutaka  &  Kaz Kojima
+ * Copyright (C) 2002 Paul Mundt
+ */
+#ifndef __TOOLS_LINUX_ASM_SH_BARRIER_H
+#define __TOOLS_LINUX_ASM_SH_BARRIER_H
+
+/*
+ * A brief note on ctrl_barrier(), the control register write barrier.
+ *
+ * Legacy SH cores typically require a sequence of 8 nops after
+ * modification of a control register in order for the changes to take
+ * effect. On newer cores (like the sh4a and sh5) this is accomplished
+ * with icbi.
+ *
+ * Also note that on sh4a in the icbi case we can forego a synco for the
+ * write barrier, as it's not necessary for control registers.
+ *
+ * Historically we have only done this type of barrier for the MMUCR, but
+ * it's also necessary for the CCR, so we make it generic here instead.
+ */
+#if defined(__SH4A__) || defined(__SH5__)
+#define mb()		__asm__ __volatile__ ("synco": : :"memory")
+#define rmb()		mb()
+#define wmb()		mb()
+#endif
+
+#include <asm-generic/barrier.h>
+
+#endif /* __TOOLS_LINUX_ASM_SH_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 7eb88946e072..1338c8a404ef 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -4,4 +4,6 @@
 #include "../../arch/powerpc/include/asm/barrier.h"
 #elif defined(__s390__)
 #include "../../arch/s390/include/asm/barrier.h"
+#elif defined(__sh__)
+#include "../../arch/sh/include/asm/barrier.h"
 #endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index ac62cafa62ec..2ceb1b262729 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,6 +1,7 @@
 tools/perf
 tools/arch/powerpc/include/asm/barrier.h
 tools/arch/s390/include/asm/barrier.h
+tools/arch/sh/include/asm/barrier.h
 tools/arch/x86/include/asm/barrier.h
 tools/scripts
 tools/build
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index f44e53efd9a4..7ef5e821d418 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -46,15 +46,6 @@
 #endif
 
 #ifdef __sh__
-#if defined(__SH4A__) || defined(__SH5__)
-# define mb()		asm volatile("synco" ::: "memory")
-# define wmb()		asm volatile("synco" ::: "memory")
-# define rmb()		asm volatile("synco" ::: "memory")
-#else
-# define mb()		asm volatile("" ::: "memory")
-# define wmb()		asm volatile("" ::: "memory")
-# define rmb()		asm volatile("" ::: "memory")
-#endif
 #define CPUINFO_PROC	{"cpu type"}
 #endif
 

From 94cdda6b98b3708f04f426204542aa69d4780fa9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 17:27:23 -0300
Subject: [PATCH 115/305] perf tools: Move sparc barrier.h stuff to
 tools/arch/sparc/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-f0d04b9x63grt30nahpw9ei0@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/sparc/include/asm/barrier.h    |  8 +++++
 tools/arch/sparc/include/asm/barrier_32.h |  6 ++++
 tools/arch/sparc/include/asm/barrier_64.h | 42 +++++++++++++++++++++++
 tools/include/asm/barrier.h               |  2 ++
 tools/perf/MANIFEST                       |  3 ++
 tools/perf/perf-sys.h                     |  9 -----
 6 files changed, 61 insertions(+), 9 deletions(-)
 create mode 100644 tools/arch/sparc/include/asm/barrier.h
 create mode 100644 tools/arch/sparc/include/asm/barrier_32.h
 create mode 100644 tools/arch/sparc/include/asm/barrier_64.h

diff --git a/tools/arch/sparc/include/asm/barrier.h b/tools/arch/sparc/include/asm/barrier.h
new file mode 100644
index 000000000000..8c017b3b1391
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier.h
@@ -0,0 +1,8 @@
+#ifndef ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#define ___TOOLS_LINUX_ASM_SPARC_BARRIER_H
+#if defined(__sparc__) && defined(__arch64__)
+#include "barrier_64.h"
+#else
+#include "barrier_32.h"
+#endif
+#endif
diff --git a/tools/arch/sparc/include/asm/barrier_32.h b/tools/arch/sparc/include/asm/barrier_32.h
new file mode 100644
index 000000000000..c5eadd0a7233
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_32.h
@@ -0,0 +1,6 @@
+#ifndef __TOOLS_PERF_SPARC_BARRIER_H
+#define __TOOLS_PERF_SPARC_BARRIER_H
+
+#include <asm-generic/barrier.h>
+
+#endif /* !(__TOOLS_PERF_SPARC_BARRIER_H) */
diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h
new file mode 100644
index 000000000000..9a7d7322c3f7
--- /dev/null
+++ b/tools/arch/sparc/include/asm/barrier_64.h
@@ -0,0 +1,42 @@
+#ifndef __TOOLS_LINUX_SPARC64_BARRIER_H
+#define __TOOLS_LINUX_SPARC64_BARRIER_H
+
+/* Copied from the kernel sources to tools/:
+ *
+ * These are here in an effort to more fully work around Spitfire Errata
+ * #51.  Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ *	call	sym_ccb_from_dsa, 0
+ *	 movge	%icc, 0, %l0
+ *	brz,pn	%o0, .LL1303
+ *	 mov	%o0, %l2
+ *	membar	#LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur.  Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do {	__asm__ __volatile__("ba,pt	%%xcc, 1f\n\t" \
+			     " membar	" type "\n" \
+			     "1:\n" \
+			     : : : "memory"); \
+} while (0)
+
+/* The kernel always executes in TSO memory model these days,
+ * and furthermore most sparc64 chips implement more stringent
+ * memory ordering than required by the specifications.
+ */
+#define mb()	membar_safe("#StoreLoad")
+#define rmb()	__asm__ __volatile__("":::"memory")
+#define wmb()	__asm__ __volatile__("":::"memory")
+
+#endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 1338c8a404ef..87f6309ef97d 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -6,4 +6,6 @@
 #include "../../arch/s390/include/asm/barrier.h"
 #elif defined(__sh__)
 #include "../../arch/sh/include/asm/barrier.h"
+#elif defined(__sparc__)
+#include "../../arch/sparc/include/asm/barrier.h"
 #endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 2ceb1b262729..ee692408f8dd 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -2,6 +2,9 @@ tools/perf
 tools/arch/powerpc/include/asm/barrier.h
 tools/arch/s390/include/asm/barrier.h
 tools/arch/sh/include/asm/barrier.h
+tools/arch/sparc/include/asm/barrier.h
+tools/arch/sparc/include/asm/barrier_32.h
+tools/arch/sparc/include/asm/barrier_64.h
 tools/arch/x86/include/asm/barrier.h
 tools/scripts
 tools/build
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 7ef5e821d418..b3e911afa1de 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -57,15 +57,6 @@
 #endif
 
 #ifdef __sparc__
-#ifdef __LP64__
-#define mb()		asm volatile("ba,pt %%xcc, 1f\n"	\
-				     "membar #StoreLoad\n"	\
-				     "1:\n":::"memory")
-#else
-#define mb()		asm volatile("":::"memory")
-#endif
-#define wmb()		asm volatile("":::"memory")
-#define rmb()		asm volatile("":::"memory")
 #define CPUINFO_PROC	{"cpu"}
 #endif
 

From 0da85d1e38b4c186d2248f0239b72d465f5b406c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 17:52:19 -0300
Subject: [PATCH 116/305] perf tools: Move alpha barrier.h stuff to
 tools/arch/alpha/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-vs2plxuph0ne3zcupijgjy9z@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/alpha/include/asm/barrier.h | 8 ++++++++
 tools/include/asm/barrier.h            | 2 ++
 tools/perf/MANIFEST                    | 1 +
 tools/perf/perf-sys.h                  | 3 ---
 4 files changed, 11 insertions(+), 3 deletions(-)
 create mode 100644 tools/arch/alpha/include/asm/barrier.h

diff --git a/tools/arch/alpha/include/asm/barrier.h b/tools/arch/alpha/include/asm/barrier.h
new file mode 100644
index 000000000000..95df19c95482
--- /dev/null
+++ b/tools/arch/alpha/include/asm/barrier.h
@@ -0,0 +1,8 @@
+#ifndef __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+#define __TOOLS_LINUX_ASM_ALPHA_BARRIER_H
+
+#define mb()	__asm__ __volatile__("mb": : :"memory")
+#define rmb()	__asm__ __volatile__("mb": : :"memory")
+#define wmb()	__asm__ __volatile__("wmb": : :"memory")
+
+#endif		/* __TOOLS_LINUX_ASM_ALPHA_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 87f6309ef97d..a579a2e85711 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -8,4 +8,6 @@
 #include "../../arch/sh/include/asm/barrier.h"
 #elif defined(__sparc__)
 #include "../../arch/sparc/include/asm/barrier.h"
+#elif defined(__alpha__)
+#include "../../arch/alpha/include/asm/barrier.h"
 #endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index ee692408f8dd..9919ee3eae52 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,4 +1,5 @@
 tools/perf
+tools/arch/alpha/include/asm/barrier.h
 tools/arch/powerpc/include/asm/barrier.h
 tools/arch/s390/include/asm/barrier.h
 tools/arch/sh/include/asm/barrier.h
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index b3e911afa1de..4710f057150d 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -61,9 +61,6 @@
 #endif
 
 #ifdef __alpha__
-#define mb()		asm volatile("mb" ::: "memory")
-#define wmb()		asm volatile("wmb" ::: "memory")
-#define rmb()		asm volatile("mb" ::: "memory")
 #define CPUINFO_PROC	{"cpu model"}
 #endif
 

From 163e589d0519b6d6c1e5500f4d14b1fc10b736fe Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 18:03:14 -0300
Subject: [PATCH 117/305] perf tools: Move ia64 barrier.h stuff to
 tools/arch/ia64/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-4op0qdukegrdumyefz4icxk0@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/ia64/include/asm/barrier.h | 48 +++++++++++++++++++++++++++
 tools/include/asm/barrier.h           |  2 ++
 tools/perf/MANIFEST                   |  1 +
 tools/perf/perf-sys.h                 |  3 --
 4 files changed, 51 insertions(+), 3 deletions(-)
 create mode 100644 tools/arch/ia64/include/asm/barrier.h

diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h
new file mode 100644
index 000000000000..e4422b4b634e
--- /dev/null
+++ b/tools/arch/ia64/include/asm/barrier.h
@@ -0,0 +1,48 @@
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * Memory barrier definitions.  This is based on information published
+ * in the Processor Abstraction Layer and the System Abstraction Layer
+ * manual.
+ *
+ * Copyright (C) 1998-2003 Hewlett-Packard Co
+ *	David Mosberger-Tang <davidm@hpl.hp.com>
+ * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com>
+ * Copyright (C) 1999 Don Dugger <don.dugger@intel.com>
+ */
+#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H
+#define _TOOLS_LINUX_ASM_IA64_BARRIER_H
+
+#include <linux/compiler.h>
+
+/*
+ * Macros to force memory ordering.  In these descriptions, "previous"
+ * and "subsequent" refer to program order; "visible" means that all
+ * architecturally visible effects of a memory access have occurred
+ * (at a minimum, this means the memory has been read or written).
+ *
+ *   wmb():	Guarantees that all preceding stores to memory-
+ *		like regions are visible before any subsequent
+ *		stores and that all following stores will be
+ *		visible only after all previous stores.
+ *   rmb():	Like wmb(), but for reads.
+ *   mb():	wmb()/rmb() combo, i.e., all previous memory
+ *		accesses are visible before all subsequent
+ *		accesses and vice versa.  This is also known as
+ *		a "fence."
+ *
+ * Note: "mb()" and its variants cannot be used as a fence to order
+ * accesses to memory mapped I/O registers.  For that, mf.a needs to
+ * be used.  However, we don't want to always use mf.a because (a)
+ * it's (presumably) much slower than mf and (b) mf.a is supported for
+ * sequential memory pages only.
+ */
+
+/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */
+#define ia64_mf()       asm volatile ("mf" ::: "memory")
+
+#define mb()		ia64_mf()
+#define rmb()		mb()
+#define wmb()		mb()
+
+#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index a579a2e85711..659aa604ee8a 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -10,4 +10,6 @@
 #include "../../arch/sparc/include/asm/barrier.h"
 #elif defined(__alpha__)
 #include "../../arch/alpha/include/asm/barrier.h"
+#elif defined(__ia64__)
+#include "../../arch/ia64/include/asm/barrier.h"
 #endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 9919ee3eae52..74981a63fbd6 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,5 +1,6 @@
 tools/perf
 tools/arch/alpha/include/asm/barrier.h
+tools/arch/ia64/include/asm/barrier.h
 tools/arch/powerpc/include/asm/barrier.h
 tools/arch/s390/include/asm/barrier.h
 tools/arch/sh/include/asm/barrier.h
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 4710f057150d..79052fd1a695 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -65,9 +65,6 @@
 #endif
 
 #ifdef __ia64__
-#define mb()		asm volatile ("mf" ::: "memory")
-#define wmb()		asm volatile ("mf" ::: "memory")
-#define rmb()		asm volatile ("mf" ::: "memory")
 #define cpu_relax()	asm volatile ("hint @pause" ::: "memory")
 #define CPUINFO_PROC	{"model name"}
 #endif

From 4f3db0746c91453a88e302d1aac30be08ba9e52f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 18:14:25 -0300
Subject: [PATCH 118/305] perf tools: Move arm(64) barrier.h stuff to
 tools/arch/arm*/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-cgfhreaejd7ohitdjccu9k2o@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/arm/include/asm/barrier.h   | 12 ++++++++++++
 tools/arch/arm64/include/asm/barrier.h | 16 ++++++++++++++++
 tools/include/asm/barrier.h            |  4 ++++
 tools/perf/MANIFEST                    |  1 +
 tools/perf/perf-sys.h                  | 10 ----------
 5 files changed, 33 insertions(+), 10 deletions(-)
 create mode 100644 tools/arch/arm/include/asm/barrier.h
 create mode 100644 tools/arch/arm64/include/asm/barrier.h

diff --git a/tools/arch/arm/include/asm/barrier.h b/tools/arch/arm/include/asm/barrier.h
new file mode 100644
index 000000000000..005c618a0ab0
--- /dev/null
+++ b/tools/arch/arm/include/asm/barrier.h
@@ -0,0 +1,12 @@
+#ifndef _TOOLS_LINUX_ASM_ARM_BARRIER_H
+#define _TOOLS_LINUX_ASM_ARM_BARRIER_H
+
+/*
+ * Use the __kuser_memory_barrier helper in the CPU helper page. See
+ * arch/arm/kernel/entry-armv.S in the kernel source for details.
+ */
+#define mb()		((void(*)(void))0xffff0fa0)()
+#define wmb()		((void(*)(void))0xffff0fa0)()
+#define rmb()		((void(*)(void))0xffff0fa0)()
+
+#endif /* _TOOLS_LINUX_ASM_ARM_BARRIER_H */
diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h
new file mode 100644
index 000000000000..a0483c8e0142
--- /dev/null
+++ b/tools/arch/arm64/include/asm/barrier.h
@@ -0,0 +1,16 @@
+#ifndef _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+#define _TOOLS_LINUX_ASM_AARCH64_BARRIER_H
+
+/*
+ * From tools/perf/perf-sys.h, last modified in:
+ * f428ebd184c82a7914b2aa7e9f868918aaf7ea78 perf tools: Fix AAAAARGH64 memory barriers
+ *
+ * XXX: arch/arm64/include/asm/barrier.h in the kernel sources use dsb, is this
+ * a case like for arm32 where we do things differently in userspace?
+ */
+
+#define mb()		asm volatile("dmb ish" ::: "memory")
+#define wmb()		asm volatile("dmb ishst" ::: "memory")
+#define rmb()		asm volatile("dmb ishld" ::: "memory")
+
+#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index 659aa604ee8a..d2ddca50ec72 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -1,5 +1,9 @@
 #if defined(__i386__) || defined(__x86_64__)
 #include "../../arch/x86/include/asm/barrier.h"
+#elif defined(__arm__)
+#include "../../arch/arm/include/asm/barrier.h"
+#elif defined(__aarch64__)
+#include "../../arch/arm64/include/asm/barrier.h"
 #elif defined(__powerpc__)
 #include "../../arch/powerpc/include/asm/barrier.h"
 #elif defined(__s390__)
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 74981a63fbd6..1b330dd8a137 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -1,5 +1,6 @@
 tools/perf
 tools/arch/alpha/include/asm/barrier.h
+tools/arch/arm/include/asm/barrier.h
 tools/arch/ia64/include/asm/barrier.h
 tools/arch/powerpc/include/asm/barrier.h
 tools/arch/s390/include/asm/barrier.h
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 79052fd1a695..56edbe16f583 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -70,20 +70,10 @@
 #endif
 
 #ifdef __arm__
-/*
- * Use the __kuser_memory_barrier helper in the CPU helper page. See
- * arch/arm/kernel/entry-armv.S in the kernel source for details.
- */
-#define mb()		((void(*)(void))0xffff0fa0)()
-#define wmb()		((void(*)(void))0xffff0fa0)()
-#define rmb()		((void(*)(void))0xffff0fa0)()
 #define CPUINFO_PROC	{"model name", "Processor"}
 #endif
 
 #ifdef __aarch64__
-#define mb()		asm volatile("dmb ish" ::: "memory")
-#define wmb()		asm volatile("dmb ishst" ::: "memory")
-#define rmb()		asm volatile("dmb ishld" ::: "memory")
 #define cpu_relax()	asm volatile("yield" ::: "memory")
 #endif
 

From 3d3337de8900a199a7ab304f742a6ad2a973b9f4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 8 May 2015 08:53:26 -0300
Subject: [PATCH 119/305] perf tools: Move xtensa barrier.h stuff to
 tools/arch/xtensa/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-lp68dspbtjcwbpzd7x5c6zp5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/xtensa/include/asm/barrier.h | 18 ++++++++++++++++++
 tools/include/asm/barrier.h             |  2 ++
 tools/perf/MANIFEST                     |  1 +
 tools/perf/perf-sys.h                   |  3 ---
 4 files changed, 21 insertions(+), 3 deletions(-)
 create mode 100644 tools/arch/xtensa/include/asm/barrier.h

diff --git a/tools/arch/xtensa/include/asm/barrier.h b/tools/arch/xtensa/include/asm/barrier.h
new file mode 100644
index 000000000000..583800bd7259
--- /dev/null
+++ b/tools/arch/xtensa/include/asm/barrier.h
@@ -0,0 +1,18 @@
+/*
+ * Copied from the kernel sources to tools/:
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2001 - 2012 Tensilica Inc.
+ */
+
+#ifndef _TOOLS_LINUX_XTENSA_SYSTEM_H
+#define _TOOLS_LINUX_XTENSA_SYSTEM_H
+
+#define mb()  ({ __asm__ __volatile__("memw" : : : "memory"); })
+#define rmb() barrier()
+#define wmb() mb()
+
+#endif /* _TOOLS_LINUX_XTENSA_SYSTEM_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index d2ddca50ec72..ccb2244903a4 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -16,4 +16,6 @@
 #include "../../arch/alpha/include/asm/barrier.h"
 #elif defined(__ia64__)
 #include "../../arch/ia64/include/asm/barrier.h"
+#elif defined(__xtensa__)
+#include "../../arch/xtensa/include/asm/barrier.h"
 #endif
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 1b330dd8a137..6d3c398c1021 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -9,6 +9,7 @@ tools/arch/sparc/include/asm/barrier.h
 tools/arch/sparc/include/asm/barrier_32.h
 tools/arch/sparc/include/asm/barrier_64.h
 tools/arch/x86/include/asm/barrier.h
+tools/arch/xtensa/include/asm/barrier.h
 tools/scripts
 tools/build
 tools/lib/traceevent
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 56edbe16f583..ebe228f1bde4 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -105,9 +105,6 @@
 #endif
 
 #ifdef __xtensa__
-#define mb()		asm volatile("memw" ::: "memory")
-#define wmb()		asm volatile("memw" ::: "memory")
-#define rmb()		asm volatile("" ::: "memory")
 #define CPUINFO_PROC	{"core ID"}
 #endif
 

From e2164f0406db75cb1ae0bf49ae7915f0ab10e201 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 8 May 2015 09:40:36 -0300
Subject: [PATCH 120/305] perf tools: Move mips barrier.h stuff to
 tools/arch/mips/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Deng-Cheng Zhu <dengcheng.zhu@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-c5a8m8lbjuy0agep6giykxbz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/mips/include/asm/barrier.h | 20 ++++++++++++++++++++
 tools/include/asm/barrier.h           |  2 ++
 tools/perf/MANIFEST                   |  1 +
 tools/perf/perf-sys.h                 |  9 ---------
 4 files changed, 23 insertions(+), 9 deletions(-)
 create mode 100644 tools/arch/mips/include/asm/barrier.h

diff --git a/tools/arch/mips/include/asm/barrier.h b/tools/arch/mips/include/asm/barrier.h
new file mode 100644
index 000000000000..80f96f7556e3
--- /dev/null
+++ b/tools/arch/mips/include/asm/barrier.h
@@ -0,0 +1,20 @@
+#ifndef _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+#define _TOOLS_LINUX_ASM_MIPS_BARRIER_H
+/*
+ * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
+ * in c1e028ef40b8d6943b767028ba17d4f2ba020edb, more work needed to make it
+ * more closely follow the Linux kernel arch/mips/include/asm/barrier.h file.
+ * Probably when we continue work on tools/ Kconfig support to have all the
+ * CONFIG_ needed for properly doing that.
+ */
+#define mb()		asm volatile(					\
+				".set	mips2\n\t"			\
+				"sync\n\t"				\
+				".set	mips0"				\
+				: /* no output */			\
+				: /* no input */			\
+				: "memory")
+#define wmb()	mb()
+#define rmb()	mb()
+
+#endif /* _TOOLS_LINUX_ASM_MIPS_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index ccb2244903a4..a00b9480adcf 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -14,6 +14,8 @@
 #include "../../arch/sparc/include/asm/barrier.h"
 #elif defined(__alpha__)
 #include "../../arch/alpha/include/asm/barrier.h"
+#elif defined(__mips__)
+#include "../../arch/mips/include/asm/barrier.h"
 #elif defined(__ia64__)
 #include "../../arch/ia64/include/asm/barrier.h"
 #elif defined(__xtensa__)
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 6d3c398c1021..e4a722050f4f 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -2,6 +2,7 @@ tools/perf
 tools/arch/alpha/include/asm/barrier.h
 tools/arch/arm/include/asm/barrier.h
 tools/arch/ia64/include/asm/barrier.h
+tools/arch/mips/include/asm/barrier.h
 tools/arch/powerpc/include/asm/barrier.h
 tools/arch/s390/include/asm/barrier.h
 tools/arch/sh/include/asm/barrier.h
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index ebe228f1bde4..876001e7e3c9 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -78,15 +78,6 @@
 #endif
 
 #ifdef __mips__
-#define mb()		asm volatile(					\
-				".set	mips2\n\t"			\
-				"sync\n\t"				\
-				".set	mips0"				\
-				: /* no output */			\
-				: /* no input */			\
-				: "memory")
-#define wmb()	mb()
-#define rmb()	mb()
 #define CPUINFO_PROC	{"cpu model"}
 #endif
 

From d3bd708114a37bcb472715818ba21d61ef71e86a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 8 May 2015 10:02:00 -0300
Subject: [PATCH 121/305] perf tools: Move tile barrier.h stuff to
 tools/arch/tile/include/asm/barrier.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-jwcs4r1lo0ld8a4ricbe0zug@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/tile/include/asm/barrier.h | 15 +++++++++++++++
 tools/include/asm/barrier.h           |  2 ++
 tools/perf/MANIFEST                   |  1 +
 tools/perf/perf-sys.h                 |  3 ---
 4 files changed, 18 insertions(+), 3 deletions(-)
 create mode 100644 tools/arch/tile/include/asm/barrier.h

diff --git a/tools/arch/tile/include/asm/barrier.h b/tools/arch/tile/include/asm/barrier.h
new file mode 100644
index 000000000000..7d3692c3d4ac
--- /dev/null
+++ b/tools/arch/tile/include/asm/barrier.h
@@ -0,0 +1,15 @@
+#ifndef _TOOLS_LINUX_ASM_TILE_BARRIER_H
+#define _TOOLS_LINUX_ASM_TILE_BARRIER_H
+/*
+ * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced
+ * in 620830b6954913647b7c7f68920cf48eddf6ad92, more work needed to make it
+ * more closely follow the Linux kernel arch/tile/include/asm/barrier.h file.
+ * Probably when we continue work on tools/ Kconfig support to have all the
+ * CONFIG_ needed for properly doing that.
+ */
+
+#define mb()		asm volatile ("mf" ::: "memory")
+#define wmb()		mb()
+#define rmb()		mb()
+
+#endif /* _TOOLS_LINUX_ASM_TILE_BARRIER_H */
diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index a00b9480adcf..c23fff3becd4 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -12,6 +12,8 @@
 #include "../../arch/sh/include/asm/barrier.h"
 #elif defined(__sparc__)
 #include "../../arch/sparc/include/asm/barrier.h"
+#elif defined(__tile__)
+#include "../../arch/tile/include/asm/barrier.h"
 #elif defined(__alpha__)
 #include "../../arch/alpha/include/asm/barrier.h"
 #elif defined(__mips__)
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index e4a722050f4f..7ef14d4434a4 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -9,6 +9,7 @@ tools/arch/sh/include/asm/barrier.h
 tools/arch/sparc/include/asm/barrier.h
 tools/arch/sparc/include/asm/barrier_32.h
 tools/arch/sparc/include/asm/barrier_64.h
+tools/arch/tile/include/asm/barrier.h
 tools/arch/x86/include/asm/barrier.h
 tools/arch/xtensa/include/asm/barrier.h
 tools/scripts
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index 876001e7e3c9..e5be325a439e 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -100,9 +100,6 @@
 #endif
 
 #ifdef __tile__
-#define mb()		asm volatile ("mf" ::: "memory")
-#define wmb()		asm volatile ("mf" ::: "memory")
-#define rmb()		asm volatile ("mf" ::: "memory")
 #define cpu_relax()	asm volatile ("mfspr zero, PASS" ::: "memory")
 #define CPUINFO_PROC    {"model name"}
 #endif

From 42b09d7b0e3d57a92b938fde5fcb532e9a88e1ea Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 7 May 2015 17:17:17 -0300
Subject: [PATCH 122/305] perf tools: Move generic barriers out of perf-sys.h

We will need it for atomic.h, so move it from the ad-hoc tools/perf/
place to a tools/ subset of the kernel arch/ hierarchy.

The parisc stuff was just using the asm-generic/barrier.h, no need to
introduce a tools/arch/parisc/ tree just yet.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-tfas9bs1gje0hfsvhqgrosd6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/asm/barrier.h | 2 ++
 tools/perf/perf-sys.h       | 9 ---------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h
index c23fff3becd4..ac66ac594685 100644
--- a/tools/include/asm/barrier.h
+++ b/tools/include/asm/barrier.h
@@ -22,4 +22,6 @@
 #include "../../arch/ia64/include/asm/barrier.h"
 #elif defined(__xtensa__)
 #include "../../arch/xtensa/include/asm/barrier.h"
+#else
+#include <asm-generic/barrier.h>
 #endif
diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h
index e5be325a439e..83a25cef82fd 100644
--- a/tools/perf/perf-sys.h
+++ b/tools/perf/perf-sys.h
@@ -50,9 +50,6 @@
 #endif
 
 #ifdef __hppa__
-#define mb()		asm volatile("" ::: "memory")
-#define wmb()		asm volatile("" ::: "memory")
-#define rmb()		asm volatile("" ::: "memory")
 #define CPUINFO_PROC	{"cpu"}
 #endif
 
@@ -82,16 +79,10 @@
 #endif
 
 #ifdef __arc__
-#define mb()		asm volatile("" ::: "memory")
-#define wmb()		asm volatile("" ::: "memory")
-#define rmb()		asm volatile("" ::: "memory")
 #define CPUINFO_PROC	{"Processor"}
 #endif
 
 #ifdef __metag__
-#define mb()		asm volatile("" ::: "memory")
-#define wmb()		asm volatile("" ::: "memory")
-#define rmb()		asm volatile("" ::: "memory")
 #define CPUINFO_PROC	{"CPU"}
 #endif
 

From da6d8567512df11e0473b710c07de87efde5709c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 7 Apr 2015 11:53:41 -0300
Subject: [PATCH 123/305] tools include: Add basic atomic.h implementation from
 the kernel sources

Uses the arch/x86/ kernel code for x86_64/i386, fallbacking to a gcc
intrinsics implementation that has been tested in at least sparc64.

Will be used for reference counting in tools/perf.

Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-knfpjowhgyh6x4z0kfuk389j@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/arch/x86/include/asm/atomic.h    | 65 ++++++++++++++++++++++++++
 tools/arch/x86/include/asm/rmwcc.h     | 41 ++++++++++++++++
 tools/include/asm-generic/atomic-gcc.h | 63 +++++++++++++++++++++++++
 tools/include/asm/atomic.h             | 10 ++++
 tools/include/linux/atomic.h           |  6 +++
 tools/include/linux/types.h            |  4 ++
 tools/perf/MANIFEST                    |  4 ++
 7 files changed, 193 insertions(+)
 create mode 100644 tools/arch/x86/include/asm/atomic.h
 create mode 100644 tools/arch/x86/include/asm/rmwcc.h
 create mode 100644 tools/include/asm-generic/atomic-gcc.h
 create mode 100644 tools/include/asm/atomic.h
 create mode 100644 tools/include/linux/atomic.h

diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h
new file mode 100644
index 000000000000..059e33e94260
--- /dev/null
+++ b/tools/arch/x86/include/asm/atomic.h
@@ -0,0 +1,65 @@
+#ifndef _TOOLS_LINUX_ASM_X86_ATOMIC_H
+#define _TOOLS_LINUX_ASM_X86_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include "rmwcc.h"
+
+#define LOCK_PREFIX "\n\tlock; "
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ */
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+	return ACCESS_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+	v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+	asm volatile(LOCK_PREFIX "incl %0"
+		     : "+m" (v->counter));
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+	GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e");
+}
+
+#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */
diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h
new file mode 100644
index 000000000000..a6669bc06939
--- /dev/null
+++ b/tools/arch/x86/include/asm/rmwcc.h
@@ -0,0 +1,41 @@
+#ifndef _TOOLS_LINUX_ASM_X86_RMWcc
+#define _TOOLS_LINUX_ASM_X86_RMWcc
+
+#ifdef CC_HAVE_ASM_GOTO
+
+#define __GEN_RMWcc(fullop, var, cc, ...)				\
+do {									\
+	asm_volatile_goto (fullop "; j" cc " %l[cc_label]"		\
+			: : "m" (var), ## __VA_ARGS__ 			\
+			: "memory" : cc_label);				\
+	return 0;							\
+cc_label:								\
+	return 1;							\
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc) 				\
+	__GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
+	__GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val))
+
+#else /* !CC_HAVE_ASM_GOTO */
+
+#define __GEN_RMWcc(fullop, var, cc, ...)				\
+do {									\
+	char c;								\
+	asm volatile (fullop "; set" cc " %1"				\
+			: "+m" (var), "=qm" (c)				\
+			: __VA_ARGS__ : "memory");			\
+	return c != 0;							\
+} while (0)
+
+#define GEN_UNARY_RMWcc(op, var, arg0, cc)				\
+	__GEN_RMWcc(op " " arg0, var, cc)
+
+#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc)			\
+	__GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val))
+
+#endif /* CC_HAVE_ASM_GOTO */
+
+#endif /* _TOOLS_LINUX_ASM_X86_RMWcc */
diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h
new file mode 100644
index 000000000000..2ba78c9f5701
--- /dev/null
+++ b/tools/include/asm-generic/atomic-gcc.h
@@ -0,0 +1,63 @@
+#ifndef __TOOLS_ASM_GENERIC_ATOMIC_H
+#define __TOOLS_ASM_GENERIC_ATOMIC_H
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+
+/*
+ * Atomic operations that C can't guarantee us.  Useful for
+ * resource counting etc..
+ *
+ * Excerpts obtained from the Linux kernel sources.
+ */
+
+#define ATOMIC_INIT(i)	{ (i) }
+
+/**
+ * atomic_read - read atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically reads the value of @v.
+ */
+static inline int atomic_read(const atomic_t *v)
+{
+	return ACCESS_ONCE((v)->counter);
+}
+
+/**
+ * atomic_set - set atomic variable
+ * @v: pointer of type atomic_t
+ * @i: required value
+ *
+ * Atomically sets the value of @v to @i.
+ */
+static inline void atomic_set(atomic_t *v, int i)
+{
+        v->counter = i;
+}
+
+/**
+ * atomic_inc - increment atomic variable
+ * @v: pointer of type atomic_t
+ *
+ * Atomically increments @v by 1.
+ */
+static inline void atomic_inc(atomic_t *v)
+{
+	__sync_add_and_fetch(&v->counter, 1);
+}
+
+/**
+ * atomic_dec_and_test - decrement and test
+ * @v: pointer of type atomic_t
+ *
+ * Atomically decrements @v by 1 and
+ * returns true if the result is 0, or false for all other
+ * cases.
+ */
+static inline int atomic_dec_and_test(atomic_t *v)
+{
+	return __sync_sub_and_fetch(&v->counter, 1) == 0;
+}
+
+#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */
diff --git a/tools/include/asm/atomic.h b/tools/include/asm/atomic.h
new file mode 100644
index 000000000000..70794f538a86
--- /dev/null
+++ b/tools/include/asm/atomic.h
@@ -0,0 +1,10 @@
+#ifndef __TOOLS_LINUX_ASM_ATOMIC_H
+#define __TOOLS_LINUX_ASM_ATOMIC_H
+
+#if defined(__i386__) || defined(__x86_64__)
+#include "../../arch/x86/include/asm/atomic.h"
+#else
+#include <asm-generic/atomic-gcc.h>
+#endif
+
+#endif /* __TOOLS_LINUX_ASM_ATOMIC_H */
diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h
new file mode 100644
index 000000000000..4e3d3d18ebab
--- /dev/null
+++ b/tools/include/linux/atomic.h
@@ -0,0 +1,6 @@
+#ifndef __TOOLS_LINUX_ATOMIC_H
+#define __TOOLS_LINUX_ATOMIC_H
+
+#include <asm/atomic.h>
+
+#endif /* __TOOLS_LINUX_ATOMIC_H */
diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index b5cf25e05df2..0bdeda66aae5 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -60,6 +60,10 @@ typedef __u32 __bitwise __be32;
 typedef __u64 __bitwise __le64;
 typedef __u64 __bitwise __be64;
 
+typedef struct {
+	int counter;
+} atomic_t;
+
 struct list_head {
 	struct list_head *next, *prev;
 };
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index 7ef14d4434a4..a83cf75164e1 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -14,11 +14,14 @@ tools/arch/x86/include/asm/barrier.h
 tools/arch/xtensa/include/asm/barrier.h
 tools/scripts
 tools/build
+tools/arch/x86/include/asm/atomic.h
+tools/arch/x86/include/asm/rmwcc.h
 tools/lib/traceevent
 tools/lib/api
 tools/lib/symbol/kallsyms.c
 tools/lib/symbol/kallsyms.h
 tools/lib/util/find_next_bit.c
+tools/include/asm/atomic.h
 tools/include/asm/barrier.h
 tools/include/asm/bug.h
 tools/include/asm-generic/barrier.h
@@ -32,6 +35,7 @@ tools/include/asm-generic/bitops/fls64.h
 tools/include/asm-generic/bitops/fls.h
 tools/include/asm-generic/bitops/hweight.h
 tools/include/asm-generic/bitops.h
+tools/include/linux/atomic.h
 tools/include/linux/bitops.h
 tools/include/linux/compiler.h
 tools/include/linux/export.h

From e1ed3a5b87ed6759e16ec93f16aae83d2cc77ca2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 7 Apr 2015 11:59:50 -0300
Subject: [PATCH 124/305] perf tools: Use atomic_t to implement
 thread__{get,put} refcnt

Fixing bugs in 'perf top' where the used thread unsafe 'struct thread'
refcount implementation was falling apart because we really use two
threads.

Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-hil2hol294u5ntcuof4jhmn6@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/thread.c | 6 +++---
 tools/perf/util/thread.h | 3 ++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 1c8fbc9588c5..1b265521836c 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -53,7 +53,7 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 			goto err_thread;
 
 		list_add(&comm->list, &thread->comm_list);
-
+		atomic_set(&thread->refcnt, 0);
 	}
 
 	return thread;
@@ -84,13 +84,13 @@ void thread__delete(struct thread *thread)
 
 struct thread *thread__get(struct thread *thread)
 {
-	++thread->refcnt;
+	atomic_inc(&thread->refcnt);
 	return thread;
 }
 
 void thread__put(struct thread *thread)
 {
-	if (thread && --thread->refcnt == 0) {
+	if (thread && atomic_dec_and_test(&thread->refcnt)) {
 		list_del_init(&thread->node);
 		thread__delete(thread);
 	}
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 9b8a54dc34a8..f33c48cfdaa0 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -1,6 +1,7 @@
 #ifndef __PERF_THREAD_H
 #define __PERF_THREAD_H
 
+#include <linux/atomic.h>
 #include <linux/rbtree.h>
 #include <linux/list.h>
 #include <unistd.h>
@@ -21,7 +22,7 @@ struct thread {
 	pid_t			tid;
 	pid_t			ppid;
 	int			cpu;
-	int			refcnt;
+	atomic_t		refcnt;
 	char			shortname[3];
 	bool			comm_set;
 	bool			dead; /* if set thread has exited */

From b91fc39f4ad7503419dd617df78401fa36266cb3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 6 Apr 2015 20:43:22 -0300
Subject: [PATCH 125/305] perf machine: Protect the machine->threads with a
 rwlock

In addition to using refcounts for the struct thread lifetime
management, we need to protect access to machine->threads from
concurrent access.

That happens in 'perf top', where a thread processes events, inserting
and deleting entries from that rb_tree while another thread decays
hist_entries, that end up dropping references and ultimately deleting
threads from the rb_tree and releasing its resources when no further
hist_entry (or other data structures, like in 'perf sched') references
it.

So the rule is the same for refcounts + protected trees in the kernel,
get the tree lock, find object, bump the refcount, drop the tree lock,
return, use object, drop the refcount if no more use of it is needed,
keep it if storing it in some other data structure, drop when releasing
that data structure.

I.e. pair "t = machine__find(new)_thread()" with a "thread__put(t)", and
"perf_event__preprocess_sample(&al)" with "addr_location__put(&al)".

The addr_location__put() one is because as we return references to
several data structures, we may end up adding more reference counting
for the other data structures and then we'll drop it at
addr_location__put() time.

Acked-by: David Ahern <dsahern@gmail.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-bs9rt4n0jw3hi9f3zxyy3xln@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c         | 10 +--
 tools/perf/builtin-diff.c             |  9 ++-
 tools/perf/builtin-inject.c           |  1 +
 tools/perf/builtin-kmem.c             |  7 ++-
 tools/perf/builtin-kvm.c              |  6 +-
 tools/perf/builtin-lock.c             |  8 ++-
 tools/perf/builtin-mem.c              |  5 +-
 tools/perf/builtin-report.c           |  9 +--
 tools/perf/builtin-sched.c            | 82 ++++++++++++++++--------
 tools/perf/builtin-script.c           | 20 +++---
 tools/perf/builtin-timechart.c        |  5 +-
 tools/perf/builtin-top.c              |  2 +-
 tools/perf/builtin-trace.c            | 36 +++++++----
 tools/perf/tests/code-reading.c       | 22 ++++---
 tools/perf/tests/dwarf-unwind.c       |  1 +
 tools/perf/tests/hists_common.c       |  1 +
 tools/perf/tests/hists_cumulate.c     |  4 +-
 tools/perf/tests/hists_filter.c       |  4 +-
 tools/perf/tests/hists_link.c         |  8 ++-
 tools/perf/tests/hists_output.c       |  4 +-
 tools/perf/tests/mmap-thread-lookup.c |  2 +
 tools/perf/tests/thread-mg-share.c    | 12 ++--
 tools/perf/util/build-id.c            |  5 +-
 tools/perf/util/db-export.c           | 14 +++--
 tools/perf/util/event.c               | 15 +++++
 tools/perf/util/event.h               |  2 +
 tools/perf/util/machine.c             | 89 ++++++++++++++++++++++-----
 tools/perf/util/machine.h             |  5 +-
 tools/perf/util/thread.c              | 10 ++-
 29 files changed, 284 insertions(+), 114 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 71bf7451c0ca..b57a027fb200 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -84,6 +84,7 @@ static int process_sample_event(struct perf_tool *tool,
 {
 	struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool);
 	struct addr_location al;
+	int ret = 0;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
@@ -92,15 +93,16 @@ static int process_sample_event(struct perf_tool *tool,
 	}
 
 	if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap))
-		return 0;
+		goto out_put;
 
 	if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) {
 		pr_warning("problem incrementing symbol count, "
 			   "skipping event\n");
-		return -1;
+		ret = -1;
 	}
-
-	return 0;
+out_put:
+	addr_location__put(&al);
+	return ret;
 }
 
 static int hist_entry__tty_annotate(struct hist_entry *he,
diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c
index df6307b4050a..daaa7dca9c3b 100644
--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@@ -328,6 +328,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 {
 	struct addr_location al;
 	struct hists *hists = evsel__hists(evsel);
+	int ret = -1;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		pr_warning("problem processing %d event, skipping it.\n",
@@ -338,7 +339,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 	if (hists__add_entry(hists, &al, sample->period,
 			     sample->weight, sample->transaction)) {
 		pr_warning("problem incrementing symbol period, skipping event\n");
-		return -1;
+		goto out_put;
 	}
 
 	/*
@@ -350,8 +351,10 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 	hists->stats.total_period += sample->period;
 	if (!al.filtered)
 		hists->stats.total_non_filtered_period += sample->period;
-
-	return 0;
+	ret = 0;
+out_put:
+	addr_location__put(&al);
+	return ret;
 }
 
 static struct perf_tool tool = {
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index d6a47e854b2b..52ec66b23607 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -365,6 +365,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool,
 		}
 	}
 
+	thread__put(thread);
 repipe:
 	perf_event__repipe(tool, event, sample, machine);
 	return 0;
diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index e628bf1a0c24..fe3fcb75f20a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -906,6 +906,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
+	int err = 0;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
 							sample->tid);
 
@@ -919,10 +920,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 
 	if (evsel->handler != NULL) {
 		tracepoint_handler f = evsel->handler;
-		return f(evsel, sample);
+		err = f(evsel, sample);
 	}
 
-	return 0;
+	thread__put(thread);
+
+	return err;
 }
 
 static struct perf_tool perf_kmem = {
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 1f9338f6109c..15fecd3dc5d8 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -651,6 +651,7 @@ static int process_sample_event(struct perf_tool *tool,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
+	int err = 0;
 	struct thread *thread;
 	struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat,
 						 tool);
@@ -666,9 +667,10 @@ static int process_sample_event(struct perf_tool *tool,
 	}
 
 	if (!handle_kvm_event(kvm, thread, evsel, sample))
-		return -1;
+		err = -1;
 
-	return 0;
+	thread__put(thread);
+	return err;
 }
 
 static int cpu_isa_config(struct perf_kvm_stat *kvm)
diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c
index d49c2ab85fc2..de16aaed516e 100644
--- a/tools/perf/builtin-lock.c
+++ b/tools/perf/builtin-lock.c
@@ -769,6 +769,7 @@ static void dump_threads(void)
 		t = perf_session__findnew(session, st->tid);
 		pr_info("%10d: %s\n", st->tid, thread__comm_str(t));
 		node = rb_next(node);
+		thread__put(t);
 	};
 }
 
@@ -810,6 +811,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 				struct perf_evsel *evsel,
 				struct machine *machine)
 {
+	int err = 0;
 	struct thread *thread = machine__findnew_thread(machine, sample->pid,
 							sample->tid);
 
@@ -821,10 +823,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 
 	if (evsel->handler != NULL) {
 		tracepoint_handler f = evsel->handler;
-		return f(evsel, sample);
+		err = f(evsel, sample);
 	}
 
-	return 0;
+	thread__put(thread);
+
+	return err;
 }
 
 static void sort_result(void)
diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
index 675216e08bfc..da2ec06f0742 100644
--- a/tools/perf/builtin-mem.c
+++ b/tools/perf/builtin-mem.c
@@ -74,7 +74,7 @@ dump_raw_samples(struct perf_tool *tool,
 	}
 
 	if (al.filtered || (mem->hide_unresolved && al.sym == NULL))
-		return 0;
+		goto out_put;
 
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
@@ -103,7 +103,8 @@ dump_raw_samples(struct perf_tool *tool,
 		symbol_conf.field_sep,
 		al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???",
 		al.sym ? al.sym->name : "???");
-
+out_put:
+	addr_location__put(&al);
 	return 0;
 }
 
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 18cb0ff39b4e..8d5118f5cedd 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -142,7 +142,7 @@ static int process_sample_event(struct perf_tool *tool,
 		.hide_unresolved = rep->hide_unresolved,
 		.add_entry_cb = hist_iter__report_callback,
 	};
-	int ret;
+	int ret = 0;
 
 	if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) {
 		pr_debug("problem processing %d event, skipping it.\n",
@@ -151,10 +151,10 @@ static int process_sample_event(struct perf_tool *tool,
 	}
 
 	if (rep->hide_unresolved && al.sym == NULL)
-		return 0;
+		goto out_put;
 
 	if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap))
-		return 0;
+		goto out_put;
 
 	if (sort__mode == SORT_MODE__BRANCH)
 		iter.ops = &hist_iter_branch;
@@ -172,7 +172,8 @@ static int process_sample_event(struct perf_tool *tool,
 				   rep);
 	if (ret < 0)
 		pr_debug("problem adding hist entry, skipping event\n");
-
+out_put:
+	addr_location__put(&al);
 	return ret;
 }
 
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 5275bab70313..79273ecf92eb 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -770,7 +770,7 @@ static int replay_fork_event(struct perf_sched *sched,
 	if (child == NULL || parent == NULL) {
 		pr_debug("thread does not exist on fork event: child %p, parent %p\n",
 				 child, parent);
-		return 0;
+		goto out_put;
 	}
 
 	if (verbose) {
@@ -781,6 +781,9 @@ static int replay_fork_event(struct perf_sched *sched,
 
 	register_pid(sched, parent->tid, thread__comm_str(parent));
 	register_pid(sched, child->tid, thread__comm_str(child));
+out_put:
+	thread__put(child);
+	thread__put(parent);
 	return 0;
 }
 
@@ -957,7 +960,7 @@ static int latency_switch_event(struct perf_sched *sched,
 	struct work_atoms *out_events, *in_events;
 	struct thread *sched_out, *sched_in;
 	u64 timestamp0, timestamp = sample->time;
-	int cpu = sample->cpu;
+	int cpu = sample->cpu, err = -1;
 	s64 delta;
 
 	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
@@ -976,15 +979,17 @@ static int latency_switch_event(struct perf_sched *sched,
 
 	sched_out = machine__findnew_thread(machine, -1, prev_pid);
 	sched_in = machine__findnew_thread(machine, -1, next_pid);
+	if (sched_out == NULL || sched_in == NULL)
+		goto out_put;
 
 	out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
 	if (!out_events) {
 		if (thread_atoms_insert(sched, sched_out))
-			return -1;
+			goto out_put;
 		out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid);
 		if (!out_events) {
 			pr_err("out-event: Internal tree error");
-			return -1;
+			goto out_put;
 		}
 	}
 	if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp))
@@ -993,22 +998,25 @@ static int latency_switch_event(struct perf_sched *sched,
 	in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
 	if (!in_events) {
 		if (thread_atoms_insert(sched, sched_in))
-			return -1;
+			goto out_put;
 		in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid);
 		if (!in_events) {
 			pr_err("in-event: Internal tree error");
-			return -1;
+			goto out_put;
 		}
 		/*
 		 * Take came in we have not heard about yet,
 		 * add in an initial atom in runnable state:
 		 */
 		if (add_sched_out_event(in_events, 'R', timestamp))
-			return -1;
+			goto out_put;
 	}
 	add_sched_in_event(in_events, timestamp);
-
-	return 0;
+	err = 0;
+out_put:
+	thread__put(sched_out);
+	thread__put(sched_in);
+	return err;
 }
 
 static int latency_runtime_event(struct perf_sched *sched,
@@ -1021,23 +1029,29 @@ static int latency_runtime_event(struct perf_sched *sched,
 	struct thread *thread = machine__findnew_thread(machine, -1, pid);
 	struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
 	u64 timestamp = sample->time;
-	int cpu = sample->cpu;
+	int cpu = sample->cpu, err = -1;
+
+	if (thread == NULL)
+		return -1;
 
 	BUG_ON(cpu >= MAX_CPUS || cpu < 0);
 	if (!atoms) {
 		if (thread_atoms_insert(sched, thread))
-			return -1;
+			goto out_put;
 		atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid);
 		if (!atoms) {
 			pr_err("in-event: Internal tree error");
-			return -1;
+			goto out_put;
 		}
 		if (add_sched_out_event(atoms, 'R', timestamp))
-			return -1;
+			goto out_put;
 	}
 
 	add_runtime_event(atoms, runtime, timestamp);
-	return 0;
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
 }
 
 static int latency_wakeup_event(struct perf_sched *sched,
@@ -1050,19 +1064,22 @@ static int latency_wakeup_event(struct perf_sched *sched,
 	struct work_atom *atom;
 	struct thread *wakee;
 	u64 timestamp = sample->time;
+	int err = -1;
 
 	wakee = machine__findnew_thread(machine, -1, pid);
+	if (wakee == NULL)
+		return -1;
 	atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
 	if (!atoms) {
 		if (thread_atoms_insert(sched, wakee))
-			return -1;
+			goto out_put;
 		atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid);
 		if (!atoms) {
 			pr_err("wakeup-event: Internal tree error");
-			return -1;
+			goto out_put;
 		}
 		if (add_sched_out_event(atoms, 'S', timestamp))
-			return -1;
+			goto out_put;
 	}
 
 	BUG_ON(list_empty(&atoms->work_list));
@@ -1081,17 +1098,21 @@ static int latency_wakeup_event(struct perf_sched *sched,
 	 * skip in this case.
 	 */
 	if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING)
-		return 0;
+		goto out_ok;
 
 	sched->nr_timestamps++;
 	if (atom->sched_out_time > timestamp) {
 		sched->nr_unordered_timestamps++;
-		return 0;
+		goto out_ok;
 	}
 
 	atom->state = THREAD_WAIT_CPU;
 	atom->wake_up_time = timestamp;
-	return 0;
+out_ok:
+	err = 0;
+out_put:
+	thread__put(wakee);
+	return err;
 }
 
 static int latency_migrate_task_event(struct perf_sched *sched,
@@ -1104,6 +1125,7 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 	struct work_atoms *atoms;
 	struct work_atom *atom;
 	struct thread *migrant;
+	int err = -1;
 
 	/*
 	 * Only need to worry about migration when profiling one CPU.
@@ -1112,18 +1134,20 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 		return 0;
 
 	migrant = machine__findnew_thread(machine, -1, pid);
+	if (migrant == NULL)
+		return -1;
 	atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
 	if (!atoms) {
 		if (thread_atoms_insert(sched, migrant))
-			return -1;
+			goto out_put;
 		register_pid(sched, migrant->tid, thread__comm_str(migrant));
 		atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid);
 		if (!atoms) {
 			pr_err("migration-event: Internal tree error");
-			return -1;
+			goto out_put;
 		}
 		if (add_sched_out_event(atoms, 'R', timestamp))
-			return -1;
+			goto out_put;
 	}
 
 	BUG_ON(list_empty(&atoms->work_list));
@@ -1135,8 +1159,10 @@ static int latency_migrate_task_event(struct perf_sched *sched,
 
 	if (atom->sched_out_time > timestamp)
 		sched->nr_unordered_timestamps++;
-
-	return 0;
+	err = 0;
+out_put:
+	thread__put(migrant);
+	return err;
 }
 
 static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list)
@@ -1330,8 +1356,10 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 	}
 
 	sched_in = machine__findnew_thread(machine, -1, next_pid);
+	if (sched_in == NULL)
+		return -1;
 
-	sched->curr_thread[this_cpu] = sched_in;
+	sched->curr_thread[this_cpu] = thread__get(sched_in);
 
 	printf("  ");
 
@@ -1381,6 +1409,8 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel,
 		printf("\n");
 	}
 
+	thread__put(sched_in);
+
 	return 0;
 }
 
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 6805098e3751..24809787369f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -607,13 +607,14 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused,
 	}
 
 	if (al.filtered)
-		return 0;
+		goto out_put;
 
 	if (cpu_list && !test_bit(sample->cpu, cpu_bitmap))
-		return 0;
+		goto out_put;
 
 	scripting_ops->process_event(event, sample, evsel, &al);
-
+out_put:
+	addr_location__put(&al);
 	return 0;
 }
 
@@ -681,8 +682,8 @@ static int process_comm_event(struct perf_tool *tool,
 	print_sample_start(sample, thread, evsel);
 	perf_event__fprintf(event, stdout);
 	ret = 0;
-
 out:
+	thread__put(thread);
 	return ret;
 }
 
@@ -713,6 +714,7 @@ static int process_fork_event(struct perf_tool *tool,
 	}
 	print_sample_start(sample, thread, evsel);
 	perf_event__fprintf(event, stdout);
+	thread__put(thread);
 
 	return 0;
 }
@@ -721,6 +723,7 @@ static int process_exit_event(struct perf_tool *tool,
 			      struct perf_sample *sample,
 			      struct machine *machine)
 {
+	int err = 0;
 	struct thread *thread;
 	struct perf_script *script = container_of(tool, struct perf_script, tool);
 	struct perf_session *session = script->session;
@@ -742,9 +745,10 @@ static int process_exit_event(struct perf_tool *tool,
 	perf_event__fprintf(event, stdout);
 
 	if (perf_event__process_exit(tool, event, sample, machine) < 0)
-		return -1;
+		err = -1;
 
-	return 0;
+	thread__put(thread);
+	return err;
 }
 
 static int process_mmap_event(struct perf_tool *tool,
@@ -774,7 +778,7 @@ static int process_mmap_event(struct perf_tool *tool,
 	}
 	print_sample_start(sample, thread, evsel);
 	perf_event__fprintf(event, stdout);
-
+	thread__put(thread);
 	return 0;
 }
 
@@ -805,7 +809,7 @@ static int process_mmap2_event(struct perf_tool *tool,
 	}
 	print_sample_start(sample, thread, evsel);
 	perf_event__fprintf(event, stdout);
-
+	thread__put(thread);
 	return 0;
 }
 
diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index e50fe1187b0b..3b884e37ab8b 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -523,7 +523,7 @@ static const char *cat_backtrace(union perf_event *event,
 				 * Discard all.
 				 */
 				zfree(&p);
-				goto exit;
+				goto exit_put;
 			}
 			continue;
 		}
@@ -538,7 +538,8 @@ static const char *cat_backtrace(union perf_event *event,
 		else
 			fprintf(f, "..... %016" PRIx64 "\n", ip);
 	}
-
+exit_put:
+	addr_location__put(&al);
 exit:
 	fclose(f);
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 1cb3436276d1..232658337f18 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -793,7 +793,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 		pthread_mutex_unlock(&hists->lock);
 	}
 
-	return;
+	addr_location__put(&al);
 }
 
 static void perf_top__mmap_read_idx(struct perf_top *top, int idx)
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index d1139b6bd534..bb05e44f8f0c 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1712,7 +1712,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	void *args;
 	size_t printed = 0;
 	struct thread *thread;
-	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	struct thread_trace *ttrace;
 
@@ -1725,14 +1725,14 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
-		return -1;
+		goto out_put;
 
 	args = perf_evsel__sc_tp_ptr(evsel, args, sample);
 
 	if (ttrace->entry_str == NULL) {
 		ttrace->entry_str = malloc(1024);
 		if (!ttrace->entry_str)
-			return -1;
+			goto out_put;
 	}
 
 	if (!trace->summary_only)
@@ -1757,8 +1757,10 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel,
 		thread__put(trace->current);
 		trace->current = thread__get(thread);
 	}
-
-	return 0;
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
 }
 
 static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
@@ -1768,7 +1770,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	long ret;
 	u64 duration = 0;
 	struct thread *thread;
-	int id = perf_evsel__sc_tp_uint(evsel, id, sample);
+	int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1;
 	struct syscall *sc = trace__syscall_info(trace, evsel, id);
 	struct thread_trace *ttrace;
 
@@ -1781,7 +1783,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel,
 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
-		return -1;
+		goto out_put;
 
 	if (trace->summary)
 		thread__update_stats(ttrace, id, sample);
@@ -1835,8 +1837,10 @@ signed_print:
 	fputc('\n', trace->output);
 out:
 	ttrace->entry_pending = false;
-
-	return 0;
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
 }
 
 static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel,
@@ -1863,6 +1867,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs
 
 	ttrace->runtime_ms += runtime_ms;
 	trace->runtime_ms += runtime_ms;
+	thread__put(thread);
 	return 0;
 
 out_dump:
@@ -1872,6 +1877,7 @@ out_dump:
 	       (pid_t)perf_evsel__intval(evsel, sample, "pid"),
 	       runtime,
 	       perf_evsel__intval(evsel, sample, "vruntime"));
+	thread__put(thread);
 	return 0;
 }
 
@@ -1924,11 +1930,12 @@ static int trace__pgfault(struct trace *trace,
 	struct addr_location al;
 	char map_type = 'd';
 	struct thread_trace *ttrace;
+	int err = -1;
 
 	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
 	ttrace = thread__trace(thread, trace->output);
 	if (ttrace == NULL)
-		return -1;
+		goto out_put;
 
 	if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ)
 		ttrace->pfmaj++;
@@ -1936,7 +1943,7 @@ static int trace__pgfault(struct trace *trace,
 		ttrace->pfmin++;
 
 	if (trace->summary_only)
-		return 0;
+		goto out;
 
 	thread__find_addr_location(thread, cpumode, MAP__FUNCTION,
 			      sample->ip, &al);
@@ -1967,8 +1974,11 @@ static int trace__pgfault(struct trace *trace,
 	print_location(trace->output, sample, &al, true, false);
 
 	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
-
-	return 0;
+out:
+	err = 0;
+out_put:
+	thread__put(thread);
+	return err;
 }
 
 static bool skip_sample(struct trace *trace, struct perf_sample *sample)
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index ca0e480e741b..e2a432b67d52 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -248,6 +248,7 @@ static int process_sample_event(struct machine *machine,
 	struct perf_sample sample;
 	struct thread *thread;
 	u8 cpumode;
+	int ret;
 
 	if (perf_evlist__parse_sample(evlist, event, &sample)) {
 		pr_debug("perf_evlist__parse_sample failed\n");
@@ -262,7 +263,9 @@ static int process_sample_event(struct machine *machine,
 
 	cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	return read_object_code(sample.ip, READLEN, cpumode, thread, state);
+	ret = read_object_code(sample.ip, READLEN, cpumode, thread, state);
+	thread__put(thread);
+	return ret;
 }
 
 static int process_event(struct machine *machine, struct perf_evlist *evlist,
@@ -457,13 +460,13 @@ static int do_test_code_reading(bool try_kcore)
 	thread = machine__findnew_thread(machine, pid, pid);
 	if (!thread) {
 		pr_debug("machine__findnew_thread failed\n");
-		goto out_err;
+		goto out_put;
 	}
 
 	cpus = cpu_map__new(NULL);
 	if (!cpus) {
 		pr_debug("cpu_map__new failed\n");
-		goto out_err;
+		goto out_put;
 	}
 
 	while (1) {
@@ -472,7 +475,7 @@ static int do_test_code_reading(bool try_kcore)
 		evlist = perf_evlist__new();
 		if (!evlist) {
 			pr_debug("perf_evlist__new failed\n");
-			goto out_err;
+			goto out_put;
 		}
 
 		perf_evlist__set_maps(evlist, cpus, threads);
@@ -485,7 +488,7 @@ static int do_test_code_reading(bool try_kcore)
 		ret = parse_events(evlist, str, NULL);
 		if (ret < 0) {
 			pr_debug("parse_events failed\n");
-			goto out_err;
+			goto out_put;
 		}
 
 		perf_evlist__config(evlist, &opts);
@@ -506,7 +509,7 @@ static int do_test_code_reading(bool try_kcore)
 				continue;
 			}
 			pr_debug("perf_evlist__open failed\n");
-			goto out_err;
+			goto out_put;
 		}
 		break;
 	}
@@ -514,7 +517,7 @@ static int do_test_code_reading(bool try_kcore)
 	ret = perf_evlist__mmap(evlist, UINT_MAX, false);
 	if (ret < 0) {
 		pr_debug("perf_evlist__mmap failed\n");
-		goto out_err;
+		goto out_put;
 	}
 
 	perf_evlist__enable(evlist);
@@ -525,7 +528,7 @@ static int do_test_code_reading(bool try_kcore)
 
 	ret = process_events(machine, evlist, &state);
 	if (ret < 0)
-		goto out_err;
+		goto out_put;
 
 	if (!have_vmlinux && !have_kcore && !try_kcore)
 		err = TEST_CODE_READING_NO_KERNEL_OBJ;
@@ -535,7 +538,10 @@ static int do_test_code_reading(bool try_kcore)
 		err = TEST_CODE_READING_NO_ACCESS;
 	else
 		err = TEST_CODE_READING_OK;
+out_put:
+	thread__put(thread);
 out_err:
+
 	if (evlist) {
 		perf_evlist__delete(evlist);
 	} else {
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 0bf06bec68c7..9b748e1ad46e 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -170,6 +170,7 @@ int test__dwarf_unwind(void)
 	}
 
 	err = krava_1(thread);
+	thread__put(thread);
 
  out:
 	machine__delete_threads(machine);
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index a62c09134516..456f884eb27b 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -96,6 +96,7 @@ struct machine *setup_fake_machine(struct machines *machines)
 			goto out;
 
 		thread__set_comm(thread, fake_threads[i].comm, 0);
+		thread__put(thread);
 	}
 
 	for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) {
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index b08a95a5ca1a..620f626e5b35 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -105,8 +105,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 			goto out;
 
 		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-					 PERF_MAX_STACK_DEPTH, NULL) < 0)
+					 PERF_MAX_STACK_DEPTH, NULL) < 0) {
+			addr_location__put(&al);
 			goto out;
+		}
 
 		fake_samples[i].thread = al.thread;
 		fake_samples[i].map = al.map;
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 108488cd71fa..82e1ee52e024 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -82,8 +82,10 @@ static int add_hist_entries(struct perf_evlist *evlist,
 				goto out;
 
 			if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-						 PERF_MAX_STACK_DEPTH, NULL) < 0)
+						 PERF_MAX_STACK_DEPTH, NULL) < 0) {
+				addr_location__put(&al);
 				goto out;
+			}
 
 			fake_samples[i].thread = al.thread;
 			fake_samples[i].map = al.map;
diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c
index 34c61e4d3352..8c102b011424 100644
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@@ -91,8 +91,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 
 			he = __hists__add_entry(hists, &al, NULL,
 						NULL, NULL, 1, 1, 0, true);
-			if (he == NULL)
+			if (he == NULL) {
+				addr_location__put(&al);
 				goto out;
+			}
 
 			fake_common_samples[k].thread = al.thread;
 			fake_common_samples[k].map = al.map;
@@ -115,8 +117,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 
 			he = __hists__add_entry(hists, &al, NULL,
 						NULL, NULL, 1, 1, 0, true);
-			if (he == NULL)
+			if (he == NULL) {
+				addr_location__put(&al);
 				goto out;
+			}
 
 			fake_samples[i][k].thread = al.thread;
 			fake_samples[i][k].map = al.map;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index d8a23db80094..fd7ec4f9aeb4 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -71,8 +71,10 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 			goto out;
 
 		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-					 PERF_MAX_STACK_DEPTH, NULL) < 0)
+					 PERF_MAX_STACK_DEPTH, NULL) < 0) {
+			addr_location__put(&al);
 			goto out;
+		}
 
 		fake_samples[i].thread = al.thread;
 		fake_samples[i].map = al.map;
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 2113f1c8611f..264e215c0d36 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -191,6 +191,8 @@ static int mmap_events(synth_cb synth)
 				      PERF_RECORD_MISC_USER, MAP__FUNCTION,
 				      (unsigned long) (td->map + 1), &al);
 
+		thread__put(thread);
+
 		if (!al.map) {
 			pr_debug("failed, couldn't find map\n");
 			err = -1;
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index b028499dd3cf..dc05bd62b4a3 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -64,22 +64,22 @@ int test__thread_mg_share(void)
 	TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
 
 	/* release thread group */
-	thread__delete(leader);
+	thread__put(leader);
 	TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3);
 
-	thread__delete(t1);
+	thread__put(t1);
 	TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2);
 
-	thread__delete(t2);
+	thread__put(t2);
 	TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1);
 
-	thread__delete(t3);
+	thread__put(t3);
 
 	/* release other group  */
-	thread__delete(other_leader);
+	thread__put(other_leader);
 	TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1);
 
-	thread__delete(other);
+	thread__put(other);
 
 	/*
 	 * Cannot call machine__delete_threads(machine) now,
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index 61867dff5d5a..ad8cfcbaa25d 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -43,6 +43,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused,
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
+	thread__put(thread);
 	return 0;
 }
 
@@ -59,8 +60,10 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused,
 	dump_printf("(%d:%d):(%d:%d)\n", event->fork.pid, event->fork.tid,
 		    event->fork.ppid, event->fork.ptid);
 
-	if (thread)
+	if (thread) {
 		machine__remove_thread(machine, thread);
+		thread__put(thread);
+	}
 
 	return 0;
 }
diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index bb39a3ffc70b..eb7a2acb973b 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -122,6 +122,7 @@ int db_export__machine(struct db_export *dbe, struct machine *machine)
 int db_export__thread(struct db_export *dbe, struct thread *thread,
 		      struct machine *machine, struct comm *comm)
 {
+	struct thread *main_thread;
 	u64 main_thread_db_id = 0;
 	int err;
 
@@ -131,8 +132,6 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
 	thread->db_id = ++dbe->thread_last_db_id;
 
 	if (thread->pid_ != -1) {
-		struct thread *main_thread;
-
 		if (thread->pid_ == thread->tid) {
 			main_thread = thread;
 		} else {
@@ -144,14 +143,16 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
 			err = db_export__thread(dbe, main_thread, machine,
 						comm);
 			if (err)
-				return err;
+				goto out_put;
 			if (comm) {
 				err = db_export__comm_thread(dbe, comm, thread);
 				if (err)
-					return err;
+					goto out_put;
 			}
 		}
 		main_thread_db_id = main_thread->db_id;
+		if (main_thread != thread)
+			thread__put(main_thread);
 	}
 
 	if (dbe->export_thread)
@@ -159,6 +160,10 @@ int db_export__thread(struct db_export *dbe, struct thread *thread,
 					  machine);
 
 	return 0;
+
+out_put:
+	thread__put(main_thread);
+	return err;
 }
 
 int db_export__comm(struct db_export *dbe, struct comm *comm,
@@ -303,6 +308,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 	if (err)
 		return err;
 
+	/* FIXME: check refcounting for get_main_thread, that calls machine__find_thread... */
 	main_thread = get_main_thread(al->machine, thread);
 	if (main_thread)
 		comm = machine__thread_exec_comm(al->machine, main_thread);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index db526091f580..a513a51f7330 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -919,6 +919,10 @@ void thread__find_addr_location(struct thread *thread,
 		al->sym = NULL;
 }
 
+/*
+ * Callers need to drop the reference to al->thread, obtained in
+ * machine__findnew_thread()
+ */
 int perf_event__preprocess_sample(const union perf_event *event,
 				  struct machine *machine,
 				  struct addr_location *al,
@@ -979,6 +983,17 @@ int perf_event__preprocess_sample(const union perf_event *event,
 	return 0;
 }
 
+/*
+ * The preprocess_sample method will return with reference counts for the
+ * in it, when done using (and perhaps getting ref counts if needing to
+ * keep a pointer to one of those entries) it must be paired with
+ * addr_location__put(), so that the refcounts can be decremented.
+ */
+void addr_location__put(struct addr_location *al)
+{
+	thread__zput(al->thread);
+}
+
 bool is_bts_event(struct perf_event_attr *attr)
 {
 	return attr->type == PERF_TYPE_HARDWARE &&
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 7eecd5e23d77..97179abc80a1 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -426,6 +426,8 @@ int perf_event__preprocess_sample(const union perf_event *event,
 				  struct addr_location *al,
 				  struct perf_sample *sample);
 
+void addr_location__put(struct addr_location *al);
+
 struct thread;
 
 bool is_bts_event(struct perf_event_attr *attr);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2f471105efb1..8b0b307d91f4 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -14,6 +14,8 @@
 #include "unwind.h"
 #include "linux/hash.h"
 
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock);
+
 static void dsos__init(struct dsos *dsos)
 {
 	INIT_LIST_HEAD(&dsos->head);
@@ -28,6 +30,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 	dsos__init(&machine->kernel_dsos);
 
 	machine->threads = RB_ROOT;
+	pthread_rwlock_init(&machine->threads_lock, NULL);
 	INIT_LIST_HEAD(&machine->dead_threads);
 	machine->last_match = NULL;
 
@@ -54,6 +57,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 
 		snprintf(comm, sizeof(comm), "[guest/%d]", pid);
 		thread__set_comm(thread, comm, 0);
+		thread__put(thread);
 	}
 
 	machine->current_tid = NULL;
@@ -91,14 +95,17 @@ static void dsos__delete(struct dsos *dsos)
 
 void machine__delete_threads(struct machine *machine)
 {
-	struct rb_node *nd = rb_first(&machine->threads);
+	struct rb_node *nd;
 
+	pthread_rwlock_wrlock(&machine->threads_lock);
+	nd = rb_first(&machine->threads);
 	while (nd) {
 		struct thread *t = rb_entry(nd, struct thread, rb_node);
 
 		nd = rb_next(nd);
-		machine__remove_thread(machine, t);
+		__machine__remove_thread(machine, t, false);
 	}
+	pthread_rwlock_unlock(&machine->threads_lock);
 }
 
 void machine__exit(struct machine *machine)
@@ -109,6 +116,7 @@ void machine__exit(struct machine *machine)
 	vdso__exit(machine);
 	zfree(&machine->root_dir);
 	zfree(&machine->current_tid);
+	pthread_rwlock_destroy(&machine->threads_lock);
 }
 
 void machine__delete(struct machine *machine)
@@ -303,7 +311,7 @@ static void machine__update_thread_pid(struct machine *machine,
 	if (th->pid_ == th->tid)
 		return;
 
-	leader = machine__findnew_thread(machine, th->pid_, th->pid_);
+	leader = __machine__findnew_thread(machine, th->pid_, th->pid_);
 	if (!leader)
 		goto out_err;
 
@@ -336,9 +344,9 @@ out_err:
 	pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid);
 }
 
-static struct thread *__machine__findnew_thread(struct machine *machine,
-						pid_t pid, pid_t tid,
-						bool create)
+static struct thread *____machine__findnew_thread(struct machine *machine,
+						  pid_t pid, pid_t tid,
+						  bool create)
 {
 	struct rb_node **p = &machine->threads.rb_node;
 	struct rb_node *parent = NULL;
@@ -393,6 +401,7 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
 		 */
 		if (thread__init_map_groups(th, machine)) {
 			rb_erase(&th->rb_node, &machine->threads);
+			RB_CLEAR_NODE(&th->rb_node);
 			thread__delete(th);
 			return NULL;
 		}
@@ -406,16 +415,30 @@ static struct thread *__machine__findnew_thread(struct machine *machine,
 	return th;
 }
 
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid)
+{
+	return ____machine__findnew_thread(machine, pid, tid, true);
+}
+
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
 				       pid_t tid)
 {
-	return __machine__findnew_thread(machine, pid, tid, true);
+	struct thread *th;
+
+	pthread_rwlock_wrlock(&machine->threads_lock);
+	th = thread__get(__machine__findnew_thread(machine, pid, tid));
+	pthread_rwlock_unlock(&machine->threads_lock);
+	return th;
 }
 
 struct thread *machine__find_thread(struct machine *machine, pid_t pid,
 				    pid_t tid)
 {
-	return __machine__findnew_thread(machine, pid, tid, false);
+	struct thread *th;
+	pthread_rwlock_rdlock(&machine->threads_lock);
+	th =  thread__get(____machine__findnew_thread(machine, pid, tid, false));
+	pthread_rwlock_unlock(&machine->threads_lock);
+	return th;
 }
 
 struct comm *machine__thread_exec_comm(struct machine *machine,
@@ -434,6 +457,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
 							event->comm.pid,
 							event->comm.tid);
 	bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC;
+	int err = 0;
 
 	if (exec)
 		machine->comm_exec = true;
@@ -444,10 +468,12 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event
 	if (thread == NULL ||
 	    __thread__set_comm(thread, event->comm.comm, sample->time, exec)) {
 		dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n");
-		return -1;
+		err = -1;
 	}
 
-	return 0;
+	thread__put(thread);
+
+	return err;
 }
 
 int machine__process_lost_event(struct machine *machine __maybe_unused,
@@ -591,12 +617,16 @@ size_t machine__fprintf(struct machine *machine, FILE *fp)
 	size_t ret = 0;
 	struct rb_node *nd;
 
+	pthread_rwlock_rdlock(&machine->threads_lock);
+
 	for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) {
 		struct thread *pos = rb_entry(nd, struct thread, rb_node);
 
 		ret += thread__fprintf(pos, fp);
 	}
 
+	pthread_rwlock_unlock(&machine->threads_lock);
+
 	return ret;
 }
 
@@ -1213,11 +1243,14 @@ int machine__process_mmap2_event(struct machine *machine,
 			event->mmap2.filename, type, thread);
 
 	if (map == NULL)
-		goto out_problem;
+		goto out_problem_map;
 
 	thread__insert_map(thread, map);
+	thread__put(thread);
 	return 0;
 
+out_problem_map:
+	thread__put(thread);
 out_problem:
 	dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n");
 	return 0;
@@ -1260,31 +1293,45 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 			type, thread);
 
 	if (map == NULL)
-		goto out_problem;
+		goto out_problem_map;
 
 	thread__insert_map(thread, map);
+	thread__put(thread);
 	return 0;
 
+out_problem_map:
+	thread__put(thread);
 out_problem:
 	dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n");
 	return 0;
 }
 
-void machine__remove_thread(struct machine *machine, struct thread *th)
+static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
 {
 	if (machine->last_match == th)
 		thread__zput(machine->last_match);
 
+	BUG_ON(th->refcnt.counter == 0);
+	if (lock)
+		pthread_rwlock_wrlock(&machine->threads_lock);
 	rb_erase(&th->rb_node, &machine->threads);
+	RB_CLEAR_NODE(&th->rb_node);
 	/*
 	 * Move it first to the dead_threads list, then drop the reference,
 	 * if this is the last reference, then the thread__delete destructor
 	 * will be called and we will remove it from the dead_threads list.
 	 */
 	list_add_tail(&th->node, &machine->dead_threads);
+	if (lock)
+		pthread_rwlock_unlock(&machine->threads_lock);
 	thread__put(th);
 }
 
+void machine__remove_thread(struct machine *machine, struct thread *th)
+{
+	return __machine__remove_thread(machine, th, true);
+}
+
 int machine__process_fork_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample)
 {
@@ -1294,10 +1341,13 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	struct thread *parent = machine__findnew_thread(machine,
 							event->fork.ppid,
 							event->fork.ptid);
+	int err = 0;
 
 	/* if a thread currently exists for the thread id remove it */
-	if (thread != NULL)
+	if (thread != NULL) {
 		machine__remove_thread(machine, thread);
+		thread__put(thread);
+	}
 
 	thread = machine__findnew_thread(machine, event->fork.pid,
 					 event->fork.tid);
@@ -1307,10 +1357,12 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 	if (thread == NULL || parent == NULL ||
 	    thread__fork(thread, parent, sample->time) < 0) {
 		dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
-		return -1;
+		err = -1;
 	}
+	thread__put(thread);
+	thread__put(parent);
 
-	return 0;
+	return err;
 }
 
 int machine__process_exit_event(struct machine *machine, union perf_event *event,
@@ -1323,8 +1375,10 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event
 	if (dump_trace)
 		perf_event__fprintf_task(event, stdout);
 
-	if (thread != NULL)
+	if (thread != NULL) {
 		thread__exited(thread);
+		thread__put(thread);
+	}
 
 	return 0;
 }
@@ -1841,6 +1895,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid,
 		return -ENOMEM;
 
 	thread->cpu = cpu;
+	thread__put(thread);
 
 	return 0;
 }
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 1d992961d5d1..c7963c63c474 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -30,6 +30,7 @@ struct machine {
 	bool		  comm_exec;
 	char		  *root_dir;
 	struct rb_root	  threads;
+	pthread_rwlock_t  threads_lock;
 	struct list_head  dead_threads;
 	struct thread	  *last_match;
 	struct vdso_info  *vdso_info;
@@ -151,8 +152,8 @@ static inline bool machine__is_host(struct machine *machine)
 	return machine ? machine->pid == HOST_KERNEL_ID : false;
 }
 
-struct thread *machine__findnew_thread(struct machine *machine, pid_t pid,
-				       pid_t tid);
+struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
+struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 
 size_t machine__fprintf(struct machine *machine, FILE *fp);
 
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 1b265521836c..16c28a37a9e4 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -18,7 +18,7 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine)
 	if (pid == thread->tid || pid == -1) {
 		thread->mg = map_groups__new(machine);
 	} else {
-		leader = machine__findnew_thread(machine, pid, pid);
+		leader = __machine__findnew_thread(machine, pid, pid);
 		if (leader)
 			thread->mg = map_groups__get(leader->mg);
 	}
@@ -54,6 +54,8 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 
 		list_add(&comm->list, &thread->comm_list);
 		atomic_set(&thread->refcnt, 0);
+		INIT_LIST_HEAD(&thread->node);
+		RB_CLEAR_NODE(&thread->rb_node);
 	}
 
 	return thread;
@@ -67,6 +69,9 @@ void thread__delete(struct thread *thread)
 {
 	struct comm *comm, *tmp;
 
+	BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
+	BUG_ON(!list_empty(&thread->node));
+
 	thread_stack__free(thread);
 
 	if (thread->mg) {
@@ -84,7 +89,8 @@ void thread__delete(struct thread *thread)
 
 struct thread *thread__get(struct thread *thread)
 {
-	atomic_inc(&thread->refcnt);
+	if (thread)
+		atomic_inc(&thread->refcnt);
 	return thread;
 }
 

From d65817b4e707068c2dd3e002e87c2a0294aabc2c Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 8 May 2015 11:37:59 -0700
Subject: [PATCH 126/305] perf bench futex: Support parallel waker threads

The futex-wake benchmark only measures wakeups done within a single
process. While this has value in its own, it does not really generate
any hb->lock contention.

A new benchmark 'wake-parallel' is added, by extending the futex-wake
code such that we can measure parallel waker threads. The program output
shows the avg per-thread latency in order to complete its share of
wakeups:

Run summary [PID 13474]: blocking on 512 threads (at [private] futex 0xa88668), 8 threads waking up 64 at a time.

[Run 1]: Avg per-thread latency (waking 64/512 threads) in 0.6230 ms (+-15.31%)
[Run 2]: Avg per-thread latency (waking 64/512 threads) in 0.5175 ms (+-29.95%)
[Run 3]: Avg per-thread latency (waking 64/512 threads) in 0.7578 ms (+-18.03%)
[Run 4]: Avg per-thread latency (waking 64/512 threads) in 0.8944 ms (+-12.54%)
[Run 5]: Avg per-thread latency (waking 64/512 threads) in 1.1204 ms (+-23.85%)
Avg per-thread latency (waking 64/512 threads) in 0.7826 ms (+-9.91%)

Naturally, different combinations of numbers of blocking and waker
threads will exhibit different information.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Davidlohr Bueso <dbueso@suse.de>
Link: http://lkml.kernel.org/r/1431110280-20231-1-git-send-email-dave@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-bench.txt |   3 +
 tools/perf/bench/Build                  |   1 +
 tools/perf/bench/bench.h                |   2 +
 tools/perf/bench/futex-wake-parallel.c  | 294 ++++++++++++++++++++++++
 tools/perf/builtin-bench.c              |   1 +
 5 files changed, 301 insertions(+)
 create mode 100644 tools/perf/bench/futex-wake-parallel.c

diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt
index f6480cbf309b..bf3d0644bf10 100644
--- a/tools/perf/Documentation/perf-bench.txt
+++ b/tools/perf/Documentation/perf-bench.txt
@@ -210,6 +210,9 @@ Suite for evaluating hash tables.
 *wake*::
 Suite for evaluating wake calls.
 
+*wake-parallel*::
+Suite for evaluating parallel wake calls.
+
 *requeue*::
 Suite for evaluating requeue calls.
 
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index 5ce98023d518..c3ab760e06b4 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -3,6 +3,7 @@ perf-y += sched-pipe.o
 perf-y += mem-memcpy.o
 perf-y += futex-hash.o
 perf-y += futex-wake.o
+perf-y += futex-wake-parallel.o
 perf-y += futex-requeue.o
 
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h
index 3c4dd44d45cb..70b2f718cc21 100644
--- a/tools/perf/bench/bench.h
+++ b/tools/perf/bench/bench.h
@@ -33,6 +33,8 @@ extern int bench_mem_memcpy(int argc, const char **argv,
 extern int bench_mem_memset(int argc, const char **argv, const char *prefix);
 extern int bench_futex_hash(int argc, const char **argv, const char *prefix);
 extern int bench_futex_wake(int argc, const char **argv, const char *prefix);
+extern int bench_futex_wake_parallel(int argc, const char **argv,
+				     const char *prefix);
 extern int bench_futex_requeue(int argc, const char **argv, const char *prefix);
 
 #define BENCH_FORMAT_DEFAULT_STR	"default"
diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c
new file mode 100644
index 000000000000..6d8c9fa2a16c
--- /dev/null
+++ b/tools/perf/bench/futex-wake-parallel.c
@@ -0,0 +1,294 @@
+/*
+ * Copyright (C) 2015 Davidlohr Bueso.
+ *
+ * Block a bunch of threads and let parallel waker threads wakeup an
+ * equal amount of them. The program output reflects the avg latency
+ * for each individual thread to service its share of work. Ultimately
+ * it can be used to measure futex_wake() changes.
+ */
+
+#include "../perf.h"
+#include "../util/util.h"
+#include "../util/stat.h"
+#include "../util/parse-options.h"
+#include "../util/header.h"
+#include "bench.h"
+#include "futex.h"
+
+#include <err.h>
+#include <stdlib.h>
+#include <sys/time.h>
+#include <pthread.h>
+
+struct thread_data {
+	pthread_t worker;
+	unsigned int nwoken;
+	struct timeval runtime;
+};
+
+static unsigned int nwakes = 1;
+
+/* all threads will block on the same futex -- hash bucket chaos ;) */
+static u_int32_t futex = 0;
+
+static pthread_t *blocked_worker;
+static bool done = false, silent = false, fshared = false;
+static unsigned int nblocked_threads = 0, nwaking_threads = 0;
+static pthread_mutex_t thread_lock;
+static pthread_cond_t thread_parent, thread_worker;
+static struct stats waketime_stats, wakeup_stats;
+static unsigned int ncpus, threads_starting;
+static int futex_flag = 0;
+
+static const struct option options[] = {
+	OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"),
+	OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"),
+	OPT_BOOLEAN( 's', "silent",  &silent,   "Silent mode: do not display data/details"),
+	OPT_BOOLEAN( 'S', "shared",  &fshared,  "Use shared futexes instead of private ones"),
+	OPT_END()
+};
+
+static const char * const bench_futex_wake_parallel_usage[] = {
+	"perf bench futex wake-parallel <options>",
+	NULL
+};
+
+static void *waking_workerfn(void *arg)
+{
+	struct thread_data *waker = (struct thread_data *) arg;
+	struct timeval start, end;
+
+	gettimeofday(&start, NULL);
+
+	waker->nwoken = futex_wake(&futex, nwakes, futex_flag);
+	if (waker->nwoken != nwakes)
+		warnx("couldn't wakeup all tasks (%d/%d)",
+		      waker->nwoken, nwakes);
+
+	gettimeofday(&end, NULL);
+	timersub(&end, &start, &waker->runtime);
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr)
+{
+	unsigned int i;
+
+	pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
+
+	/* create and block all threads */
+	for (i = 0; i < nwaking_threads; i++) {
+		/*
+		 * Thread creation order will impact per-thread latency
+		 * as it will affect the order to acquire the hb spinlock.
+		 * For now let the scheduler decide.
+		 */
+		if (pthread_create(&td[i].worker, &thread_attr,
+				   waking_workerfn, (void *)&td[i]))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+
+	for (i = 0; i < nwaking_threads; i++)
+		if (pthread_join(td[i].worker, NULL))
+			err(EXIT_FAILURE, "pthread_join");
+}
+
+static void *blocked_workerfn(void *arg __maybe_unused)
+{
+	pthread_mutex_lock(&thread_lock);
+	threads_starting--;
+	if (!threads_starting)
+		pthread_cond_signal(&thread_parent);
+	pthread_cond_wait(&thread_worker, &thread_lock);
+	pthread_mutex_unlock(&thread_lock);
+
+	while (1) { /* handle spurious wakeups */
+		if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
+	return NULL;
+}
+
+static void block_threads(pthread_t *w, pthread_attr_t thread_attr)
+{
+	cpu_set_t cpu;
+	unsigned int i;
+
+	threads_starting = nblocked_threads;
+
+	/* create and block all threads */
+	for (i = 0; i < nblocked_threads; i++) {
+		CPU_ZERO(&cpu);
+		CPU_SET(i % ncpus, &cpu);
+
+		if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu))
+			err(EXIT_FAILURE, "pthread_attr_setaffinity_np");
+
+		if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL))
+			err(EXIT_FAILURE, "pthread_create");
+	}
+}
+
+static void print_run(struct thread_data *waking_worker, unsigned int run_num)
+{
+	unsigned int i, wakeup_avg;
+	double waketime_avg, waketime_stddev;
+	struct stats __waketime_stats, __wakeup_stats;
+
+	init_stats(&__wakeup_stats);
+	init_stats(&__waketime_stats);
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&__wakeup_stats, waking_worker[i].nwoken);
+	}
+
+	waketime_avg = avg_stats(&__waketime_stats);
+	waketime_stddev = stddev_stats(&__waketime_stats);
+	wakeup_avg = avg_stats(&__wakeup_stats);
+
+	printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) "
+	       "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg,
+	       nblocked_threads, waketime_avg/1e3,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+static void print_summary(void)
+{
+	unsigned int wakeup_avg;
+	double waketime_avg, waketime_stddev;
+
+	waketime_avg = avg_stats(&waketime_stats);
+	waketime_stddev = stddev_stats(&waketime_stats);
+	wakeup_avg = avg_stats(&wakeup_stats);
+
+	printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n",
+	       wakeup_avg,
+	       nblocked_threads,
+	       waketime_avg/1e3,
+	       rel_stddev_stats(waketime_stddev, waketime_avg));
+}
+
+
+static void do_run_stats(struct thread_data *waking_worker)
+{
+	unsigned int i;
+
+	for (i = 0; i < nwaking_threads; i++) {
+		update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec);
+		update_stats(&wakeup_stats, waking_worker[i].nwoken);
+	}
+
+}
+
+static void toggle_done(int sig __maybe_unused,
+			siginfo_t *info __maybe_unused,
+			void *uc __maybe_unused)
+{
+	done = true;
+}
+
+int bench_futex_wake_parallel(int argc, const char **argv,
+			      const char *prefix __maybe_unused)
+{
+	int ret = 0;
+	unsigned int i, j;
+	struct sigaction act;
+	pthread_attr_t thread_attr;
+	struct thread_data *waking_worker;
+
+	argc = parse_options(argc, argv, options,
+			     bench_futex_wake_parallel_usage, 0);
+	if (argc) {
+		usage_with_options(bench_futex_wake_parallel_usage, options);
+		exit(EXIT_FAILURE);
+	}
+
+	sigfillset(&act.sa_mask);
+	act.sa_sigaction = toggle_done;
+	sigaction(SIGINT, &act, NULL);
+
+	ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+	if (!nblocked_threads)
+		nblocked_threads = ncpus;
+
+	/* some sanity checks */
+	if (nwaking_threads > nblocked_threads || !nwaking_threads)
+		nwaking_threads = nblocked_threads;
+
+	if (nblocked_threads % nwaking_threads)
+		errx(EXIT_FAILURE, "Must be perfectly divisible");
+	/*
+	 * Each thread will wakeup nwakes tasks in
+	 * a single futex_wait call.
+	 */
+	nwakes = nblocked_threads/nwaking_threads;
+
+	blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker));
+	if (!blocked_worker)
+		err(EXIT_FAILURE, "calloc");
+
+	if (!fshared)
+		futex_flag = FUTEX_PRIVATE_FLAG;
+
+	printf("Run summary [PID %d]: blocking on %d threads (at [%s] "
+	       "futex %p), %d threads waking up %d at a time.\n\n",
+	       getpid(), nblocked_threads, fshared ? "shared":"private",
+	       &futex, nwaking_threads, nwakes);
+
+	init_stats(&wakeup_stats);
+	init_stats(&waketime_stats);
+
+	pthread_attr_init(&thread_attr);
+	pthread_mutex_init(&thread_lock, NULL);
+	pthread_cond_init(&thread_parent, NULL);
+	pthread_cond_init(&thread_worker, NULL);
+
+	for (j = 0; j < bench_repeat && !done; j++) {
+		waking_worker = calloc(nwaking_threads, sizeof(*waking_worker));
+		if (!waking_worker)
+			err(EXIT_FAILURE, "calloc");
+
+		/* create, launch & block all threads */
+		block_threads(blocked_worker, thread_attr);
+
+		/* make sure all threads are already blocked */
+		pthread_mutex_lock(&thread_lock);
+		while (threads_starting)
+			pthread_cond_wait(&thread_parent, &thread_lock);
+		pthread_cond_broadcast(&thread_worker);
+		pthread_mutex_unlock(&thread_lock);
+
+		usleep(100000);
+
+		/* Ok, all threads are patiently blocked, start waking folks up */
+		wakeup_threads(waking_worker, thread_attr);
+
+		for (i = 0; i < nblocked_threads; i++) {
+			ret = pthread_join(blocked_worker[i], NULL);
+			if (ret)
+				err(EXIT_FAILURE, "pthread_join");
+		}
+
+		do_run_stats(waking_worker);
+		if (!silent)
+			print_run(waking_worker, j);
+
+		free(waking_worker);
+	}
+
+	/* cleanup & report results */
+	pthread_cond_destroy(&thread_parent);
+	pthread_cond_destroy(&thread_worker);
+	pthread_mutex_destroy(&thread_lock);
+	pthread_attr_destroy(&thread_attr);
+
+	print_summary();
+
+	free(blocked_worker);
+	return ret;
+}
diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c
index b9a56fa83330..b5314e452ec7 100644
--- a/tools/perf/builtin-bench.c
+++ b/tools/perf/builtin-bench.c
@@ -58,6 +58,7 @@ static struct bench mem_benchmarks[] = {
 static struct bench futex_benchmarks[] = {
 	{ "hash",	"Benchmark for futex hash table",               bench_futex_hash	},
 	{ "wake",	"Benchmark for futex wake calls",               bench_futex_wake	},
+	{ "wake-parallel", "Benchmark for parallel futex wake calls",   bench_futex_wake_parallel },
 	{ "requeue",	"Benchmark for futex requeue calls",            bench_futex_requeue	},
 	{ "all",	"Test all futex benchmarks",			NULL			},
 	{ NULL,		NULL,						NULL			}

From 598adc5c9c1cfd3f154f6d9df72b38eda63e306e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 8 May 2015 11:38:00 -0700
Subject: [PATCH 127/305] perf bench futex: Handle spurious wakeups

Wrap futex_wait around a loop and catch for EINTR.

Either a spurious wakeup occurred or a signal interrupted is, either way
we need to block again.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Cc: Davidlohr Bueso <dbueso@suse.de>
Link: http://lkml.kernel.org/r/1431110280-20231-2-git-send-email-dave@stgolabs.net
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/futex-wake.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c
index 929f762be47e..e5e41d3bdce7 100644
--- a/tools/perf/bench/futex-wake.c
+++ b/tools/perf/bench/futex-wake.c
@@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused)
 	pthread_cond_wait(&thread_worker, &thread_lock);
 	pthread_mutex_unlock(&thread_lock);
 
-	futex_wait(&futex1, 0, NULL, futex_flag);
+	while (1) {
+		if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR)
+			break;
+	}
+
+	pthread_exit(NULL);
 	return NULL;
 }
 

From 442255215cb9651668cb09350b0d51e111219f17 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 8 May 2015 10:03:28 +0900
Subject: [PATCH 128/305] perf probe: Use perf_probe_event.target instead of
 passing as an argument

Use perf_probe_event.target field for the target binary instead of
passing it as an argument.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150508010328.24812.67887.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c    |  1 -
 tools/perf/util/probe-event.c | 55 ++++++++++++++++-------------------
 tools/perf/util/probe-event.h |  4 +--
 3 files changed, 27 insertions(+), 33 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 9c4cf5e5aad4..233f9413ec8c 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -478,7 +478,6 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 
 		ret = show_available_vars(params.events, params.nevents,
 					  params.max_probe_points,
-					  params.target,
 					  params.filter,
 					  params.show_ext_vars);
 		if (ret < 0)
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 37a3a8b04a69..b8fd48d4eff8 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -344,15 +344,14 @@ out:
 
 static int get_alternative_probe_event(struct debuginfo *dinfo,
 				       struct perf_probe_event *pev,
-				       struct perf_probe_point *tmp,
-				       const char *target)
+				       struct perf_probe_point *tmp)
 {
 	int ret;
 
 	memcpy(tmp, &pev->point, sizeof(*tmp));
 	memset(&pev->point, 0, sizeof(pev->point));
 	ret = find_alternative_probe_point(dinfo, tmp, &pev->point,
-					   target, pev->uprobes);
+					   pev->target, pev->uprobes);
 	if (ret < 0)
 		memcpy(&pev->point, tmp, sizeof(*tmp));
 
@@ -601,15 +600,14 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 					  struct probe_trace_event **tevs,
-					  int max_tevs, const char *target)
+					  int max_tevs)
 {
 	bool need_dwarf = perf_probe_event_need_dwarf(pev);
 	struct perf_probe_point tmp;
 	struct debuginfo *dinfo;
 	int ntevs, ret = 0;
 
-	dinfo = open_debuginfo(target, !need_dwarf);
-
+	dinfo = open_debuginfo(pev->target, !need_dwarf);
 	if (!dinfo) {
 		if (need_dwarf)
 			return -ENOENT;
@@ -622,7 +620,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 	ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs);
 
 	if (ntevs == 0)	{  /* Not found, retry with an alternative */
-		ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+		ret = get_alternative_probe_event(dinfo, pev, &tmp);
 		if (!ret) {
 			ntevs = debuginfo__find_trace_events(dinfo, pev,
 							     tevs, max_tevs);
@@ -640,7 +638,7 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 	if (ntevs > 0) {	/* Succeeded to find trace events */
 		pr_debug("Found %d probe_trace_events.\n", ntevs);
 		ret = post_process_probe_trace_events(*tevs, ntevs,
-							target, pev->uprobes);
+						pev->target, pev->uprobes);
 		if (ret < 0 || ret == ntevs) {
 			clear_probe_trace_events(*tevs, ntevs);
 			zfree(tevs);
@@ -824,7 +822,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user)
 static int show_available_vars_at(struct debuginfo *dinfo,
 				  struct perf_probe_event *pev,
 				  int max_vls, struct strfilter *_filter,
-				  bool externs, const char *target)
+				  bool externs)
 {
 	char *buf;
 	int ret, i, nvars;
@@ -841,7 +839,7 @@ static int show_available_vars_at(struct debuginfo *dinfo,
 	ret = debuginfo__find_available_vars_at(dinfo, pev, &vls,
 						max_vls, externs);
 	if (!ret) {  /* Not found, retry with an alternative */
-		ret = get_alternative_probe_event(dinfo, pev, &tmp, target);
+		ret = get_alternative_probe_event(dinfo, pev, &tmp);
 		if (!ret) {
 			ret = debuginfo__find_available_vars_at(dinfo, pev,
 						&vls, max_vls, externs);
@@ -891,8 +889,7 @@ end:
 
 /* Show available variables on given probe point */
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
-			int max_vls, const char *module,
-			struct strfilter *_filter, bool externs)
+			int max_vls, struct strfilter *_filter, bool externs)
 {
 	int i, ret = 0;
 	struct debuginfo *dinfo;
@@ -901,7 +898,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 	if (ret < 0)
 		return ret;
 
-	dinfo = open_debuginfo(module, false);
+	dinfo = open_debuginfo(pevs->target, false);
 	if (!dinfo) {
 		ret = -ENOENT;
 		goto out;
@@ -911,7 +908,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 
 	for (i = 0; i < npevs && ret >= 0; i++)
 		ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,
-					     externs, module);
+					     externs);
 
 	debuginfo__delete(dinfo);
 out:
@@ -931,8 +928,7 @@ find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
 
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 				struct probe_trace_event **tevs __maybe_unused,
-				int max_tevs __maybe_unused,
-				const char *target __maybe_unused)
+				int max_tevs __maybe_unused)
 {
 	if (perf_probe_event_need_dwarf(pev)) {
 		pr_warning("Debuginfo-analysis is not supported.\n");
@@ -952,7 +948,6 @@ int show_line_range(struct line_range *lr __maybe_unused,
 
 int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
 			int npevs __maybe_unused, int max_vls __maybe_unused,
-			const char *module __maybe_unused,
 			struct strfilter *filter __maybe_unused,
 			bool externs __maybe_unused)
 {
@@ -2520,7 +2515,7 @@ void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
  */
 static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 					    struct probe_trace_event **tevs,
-					    int max_tevs, const char *target)
+					    int max_tevs)
 {
 	struct map *map = NULL;
 	struct ref_reloc_sym *reloc_sym = NULL;
@@ -2531,7 +2526,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	int num_matched_functions;
 	int ret, i;
 
-	map = get_target_map(target, pev->uprobes);
+	map = get_target_map(pev->target, pev->uprobes);
 	if (!map) {
 		ret = -EINVAL;
 		goto out;
@@ -2544,12 +2539,12 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	num_matched_functions = find_probe_functions(map, pp->function);
 	if (num_matched_functions == 0) {
 		pr_err("Failed to find symbol %s in %s\n", pp->function,
-			target ? : "kernel");
+			pev->target ? : "kernel");
 		ret = -ENOENT;
 		goto out;
 	} else if (num_matched_functions > max_tevs) {
 		pr_err("Too many functions matched in %s\n",
-			target ? : "kernel");
+			pev->target ? : "kernel");
 		ret = -E2BIG;
 		goto out;
 	}
@@ -2597,8 +2592,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 			tp->offset = pp->offset;
 		}
 		tp->retprobe = pp->retprobe;
-		if (target)
-			tev->point.module = strdup_or_goto(target, nomem_out);
+		if (pev->target)
+			tev->point.module = strdup_or_goto(pev->target,
+							   nomem_out);
 		tev->uprobes = pev->uprobes;
 		tev->nargs = pev->nargs;
 		if (tev->nargs) {
@@ -2639,13 +2635,13 @@ bool __weak arch__prefers_symtab(void) { return false; }
 
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 					  struct probe_trace_event **tevs,
-					  int max_tevs, const char *target)
+					  int max_tevs)
 {
 	int ret;
 
 	if (pev->uprobes && !pev->group) {
 		/* Replace group name if not given */
-		ret = convert_exec_to_group(target, &pev->group);
+		ret = convert_exec_to_group(pev->target, &pev->group);
 		if (ret != 0) {
 			pr_warning("Failed to make a group name.\n");
 			return ret;
@@ -2653,17 +2649,17 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 	}
 
 	if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-		ret = find_probe_trace_events_from_map(pev, tevs, max_tevs, target);
+		ret = find_probe_trace_events_from_map(pev, tevs, max_tevs);
 		if (ret > 0)
 			return ret; /* Found in symbol table */
 	}
 
 	/* Convert perf_probe_event with debuginfo */
-	ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target);
+	ret = try_to_find_probe_trace_events(pev, tevs, max_tevs);
 	if (ret != 0)
 		return ret;	/* Found in debuginfo or got an error */
 
-	return find_probe_trace_events_from_map(pev, tevs, max_tevs, target);
+	return find_probe_trace_events_from_map(pev, tevs, max_tevs);
 }
 
 struct __event_package {
@@ -2696,8 +2692,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
 		/* Convert with or without debuginfo */
 		ret  = convert_to_probe_trace_events(pkgs[i].pev,
 						     &pkgs[i].tevs,
-						     max_tevs,
-						     pkgs[i].pev->target);
+						     max_tevs);
 		if (ret < 0)
 			goto end;
 		pkgs[i].ntevs = ret;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e10aedc34570..e3d9bb1400e8 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -131,8 +131,8 @@ extern int show_perf_probe_events(struct strfilter *filter);
 extern int show_line_range(struct line_range *lr, const char *module,
 			   bool user);
 extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
-			       int max_probe_points, const char *module,
-			       struct strfilter *filter, bool externs);
+			       int max_probe_points, struct strfilter *filter,
+			       bool externs);
 extern int show_available_funcs(const char *module, struct strfilter *filter,
 				bool user);
 bool arch__prefers_symtab(void);

From ddb2f58f9f8febaf817496a010130f108bb9a431 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 8 May 2015 10:03:31 +0900
Subject: [PATCH 129/305] perf probe: Introduce probe_conf global configs

Introduce probe_conf global configuration parameters for probe-event and
probe-finder, and removes related parameters from APIs.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150508010330.24812.21095.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c     | 21 +++++---------
 tools/perf/util/probe-event.c  | 53 ++++++++++++++--------------------
 tools/perf/util/probe-event.h  | 13 ++++++---
 tools/perf/util/probe-finder.c | 17 +++++------
 tools/perf/util/probe-finder.h |  7 ++---
 5 files changed, 48 insertions(+), 63 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 233f9413ec8c..dbc998f21281 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -50,8 +50,6 @@
 static struct {
 	int command;	/* Command short_name */
 	bool list_events;
-	bool force_add;
-	bool show_ext_vars;
 	bool uprobes;
 	bool quiet;
 	bool target_used;
@@ -59,7 +57,6 @@ static struct {
 	struct perf_probe_event events[MAX_PROBES];
 	struct line_range line_range;
 	char *target;
-	int max_probe_points;
 	struct strfilter *filter;
 } params;
 
@@ -364,7 +361,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		"\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n",
 #endif
 		opt_add_probe_event),
-	OPT_BOOLEAN('f', "force", &params.force_add, "forcibly add events"
+	OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events"
 		    " with existing name"),
 #ifdef HAVE_DWARF_SUPPORT
 	OPT_CALLBACK('L', "line", NULL,
@@ -373,7 +370,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK('V', "vars", NULL,
 		     "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT",
 		     "Show accessible variables on PROBEDEF", opt_show_vars),
-	OPT_BOOLEAN('\0', "externs", &params.show_ext_vars,
+	OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars,
 		    "Show external variables too (with --vars only)"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
@@ -384,7 +381,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		opt_set_target),
 #endif
 	OPT__DRY_RUN(&probe_event_dry_run),
-	OPT_INTEGER('\0', "max-probes", &params.max_probe_points,
+	OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
 		 "Set how many probe points can be found for a probe."),
 	OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]",
 			     "Show potential probe-able functions.",
@@ -440,8 +437,8 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		verbose = -1;
 	}
 
-	if (params.max_probe_points == 0)
-		params.max_probe_points = MAX_PROBES;
+	if (probe_conf.max_probes == 0)
+		probe_conf.max_probes = MAX_PROBES;
 
 	/*
 	 * Only consider the user's kernel image path if given.
@@ -477,9 +474,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 						       NULL);
 
 		ret = show_available_vars(params.events, params.nevents,
-					  params.max_probe_points,
-					  params.filter,
-					  params.show_ext_vars);
+					  params.filter);
 		if (ret < 0)
 			pr_err_with_code("  Error: Failed to show vars.", ret);
 		return ret;
@@ -498,9 +493,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 			usage_with_options(probe_usage, options);
 		}
 
-		ret = add_perf_probe_events(params.events, params.nevents,
-					    params.max_probe_points,
-					    params.force_add);
+		ret = add_perf_probe_events(params.events, params.nevents);
 		if (ret < 0) {
 			pr_err_with_code("  Error: Failed to add events.", ret);
 			return ret;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b8fd48d4eff8..a7deda450875 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -51,6 +51,7 @@
 #define PERFPROBE_GROUP "probe"
 
 bool probe_event_dry_run;	/* Dry run flag */
+struct probe_conf probe_conf;
 
 #define semantic_error(msg ...) pr_err("Semantic error :" msg)
 
@@ -599,8 +600,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 
 /* Try to find perf_probe_event with debuginfo */
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
-					  struct probe_trace_event **tevs,
-					  int max_tevs)
+					  struct probe_trace_event **tevs)
 {
 	bool need_dwarf = perf_probe_event_need_dwarf(pev);
 	struct perf_probe_point tmp;
@@ -617,13 +617,12 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
 
 	pr_debug("Try to find probe point from debuginfo.\n");
 	/* Searching trace events corresponding to a probe event */
-	ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs);
+	ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
 
 	if (ntevs == 0)	{  /* Not found, retry with an alternative */
 		ret = get_alternative_probe_event(dinfo, pev, &tmp);
 		if (!ret) {
-			ntevs = debuginfo__find_trace_events(dinfo, pev,
-							     tevs, max_tevs);
+			ntevs = debuginfo__find_trace_events(dinfo, pev, tevs);
 			/*
 			 * Write back to the original probe_event for
 			 * setting appropriate (user given) event name
@@ -821,8 +820,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user)
 
 static int show_available_vars_at(struct debuginfo *dinfo,
 				  struct perf_probe_event *pev,
-				  int max_vls, struct strfilter *_filter,
-				  bool externs)
+				  struct strfilter *_filter)
 {
 	char *buf;
 	int ret, i, nvars;
@@ -836,13 +834,12 @@ static int show_available_vars_at(struct debuginfo *dinfo,
 		return -EINVAL;
 	pr_debug("Searching variables at %s\n", buf);
 
-	ret = debuginfo__find_available_vars_at(dinfo, pev, &vls,
-						max_vls, externs);
+	ret = debuginfo__find_available_vars_at(dinfo, pev, &vls);
 	if (!ret) {  /* Not found, retry with an alternative */
 		ret = get_alternative_probe_event(dinfo, pev, &tmp);
 		if (!ret) {
 			ret = debuginfo__find_available_vars_at(dinfo, pev,
-						&vls, max_vls, externs);
+								&vls);
 			/* Release the old probe_point */
 			clear_perf_probe_point(&tmp);
 		}
@@ -889,7 +886,7 @@ end:
 
 /* Show available variables on given probe point */
 int show_available_vars(struct perf_probe_event *pevs, int npevs,
-			int max_vls, struct strfilter *_filter, bool externs)
+			struct strfilter *_filter)
 {
 	int i, ret = 0;
 	struct debuginfo *dinfo;
@@ -907,8 +904,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs,
 	setup_pager();
 
 	for (i = 0; i < npevs && ret >= 0; i++)
-		ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter,
-					     externs);
+		ret = show_available_vars_at(dinfo, &pevs[i], _filter);
 
 	debuginfo__delete(dinfo);
 out:
@@ -927,8 +923,7 @@ find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
 }
 
 static int try_to_find_probe_trace_events(struct perf_probe_event *pev,
-				struct probe_trace_event **tevs __maybe_unused,
-				int max_tevs __maybe_unused)
+				struct probe_trace_event **tevs __maybe_unused)
 {
 	if (perf_probe_event_need_dwarf(pev)) {
 		pr_warning("Debuginfo-analysis is not supported.\n");
@@ -947,9 +942,8 @@ int show_line_range(struct line_range *lr __maybe_unused,
 }
 
 int show_available_vars(struct perf_probe_event *pevs __maybe_unused,
-			int npevs __maybe_unused, int max_vls __maybe_unused,
-			struct strfilter *filter __maybe_unused,
-			bool externs __maybe_unused)
+			int npevs __maybe_unused,
+			struct strfilter *filter __maybe_unused)
 {
 	pr_warning("Debuginfo-analysis is not supported.\n");
 	return -ENOSYS;
@@ -2514,8 +2508,7 @@ void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused,
  * Return an error or the number of found probe_trace_event
  */
 static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
-					    struct probe_trace_event **tevs,
-					    int max_tevs)
+					    struct probe_trace_event **tevs)
 {
 	struct map *map = NULL;
 	struct ref_reloc_sym *reloc_sym = NULL;
@@ -2542,7 +2535,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 			pev->target ? : "kernel");
 		ret = -ENOENT;
 		goto out;
-	} else if (num_matched_functions > max_tevs) {
+	} else if (num_matched_functions > probe_conf.max_probes) {
 		pr_err("Too many functions matched in %s\n",
 			pev->target ? : "kernel");
 		ret = -E2BIG;
@@ -2634,8 +2627,7 @@ err_out:
 bool __weak arch__prefers_symtab(void) { return false; }
 
 static int convert_to_probe_trace_events(struct perf_probe_event *pev,
-					  struct probe_trace_event **tevs,
-					  int max_tevs)
+					 struct probe_trace_event **tevs)
 {
 	int ret;
 
@@ -2649,17 +2641,17 @@ static int convert_to_probe_trace_events(struct perf_probe_event *pev,
 	}
 
 	if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) {
-		ret = find_probe_trace_events_from_map(pev, tevs, max_tevs);
+		ret = find_probe_trace_events_from_map(pev, tevs);
 		if (ret > 0)
 			return ret; /* Found in symbol table */
 	}
 
 	/* Convert perf_probe_event with debuginfo */
-	ret = try_to_find_probe_trace_events(pev, tevs, max_tevs);
+	ret = try_to_find_probe_trace_events(pev, tevs);
 	if (ret != 0)
 		return ret;	/* Found in debuginfo or got an error */
 
-	return find_probe_trace_events_from_map(pev, tevs, max_tevs);
+	return find_probe_trace_events_from_map(pev, tevs);
 }
 
 struct __event_package {
@@ -2668,8 +2660,7 @@ struct __event_package {
 	int				ntevs;
 };
 
-int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-			  int max_tevs, bool force_add)
+int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
 {
 	int i, j, ret;
 	struct __event_package *pkgs;
@@ -2691,8 +2682,7 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
 		pkgs[i].pev = &pevs[i];
 		/* Convert with or without debuginfo */
 		ret  = convert_to_probe_trace_events(pkgs[i].pev,
-						     &pkgs[i].tevs,
-						     max_tevs);
+						     &pkgs[i].tevs);
 		if (ret < 0)
 			goto end;
 		pkgs[i].ntevs = ret;
@@ -2701,7 +2691,8 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
 	/* Loop 2: add all events */
 	for (i = 0; i < npevs; i++) {
 		ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs,
-						pkgs[i].ntevs, force_add);
+					       pkgs[i].ntevs,
+					       probe_conf.force_add);
 		if (ret < 0)
 			break;
 	}
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index e3d9bb1400e8..db9a9cb2d2af 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -6,6 +6,13 @@
 #include "strlist.h"
 #include "strfilter.h"
 
+/* Probe related configurations */
+struct probe_conf {
+	bool	show_ext_vars;
+	bool	force_add;
+	int	max_probes;
+};
+extern struct probe_conf probe_conf;
 extern bool probe_event_dry_run;
 
 /* kprobe-tracer and uprobe-tracer tracing point */
@@ -124,15 +131,13 @@ extern int line_range__init(struct line_range *lr);
 /* Internal use: Return kernel/module path */
 extern const char *kernel_get_module_path(const char *module);
 
-extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs,
-				 int max_probe_points, bool force_add);
+extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 extern int del_perf_probe_events(struct strfilter *filter);
 extern int show_perf_probe_events(struct strfilter *filter);
 extern int show_line_range(struct line_range *lr, const char *module,
 			   bool user);
 extern int show_available_vars(struct perf_probe_event *pevs, int npevs,
-			       int max_probe_points, struct strfilter *filter,
-			       bool externs);
+			       struct strfilter *filter);
 extern int show_available_funcs(const char *module, struct strfilter *filter,
 				bool user);
 bool arch__prefers_symtab(void);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 63d33893d853..f4f5eed00c7d 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1214,15 +1214,15 @@ end:
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
 int debuginfo__find_trace_events(struct debuginfo *dbg,
 				 struct perf_probe_event *pev,
-				 struct probe_trace_event **tevs, int max_tevs)
+				 struct probe_trace_event **tevs)
 {
 	struct trace_event_finder tf = {
 			.pf = {.pev = pev, .callback = add_probe_trace_event},
-			.mod = dbg->mod, .max_tevs = max_tevs};
+			.max_tevs = probe_conf.max_probes, .mod = dbg->mod};
 	int ret;
 
 	/* Allocate result tevs array */
-	*tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs);
+	*tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs);
 	if (*tevs == NULL)
 		return -ENOMEM;
 
@@ -1303,9 +1303,9 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf)
 	die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem);
 
 	/* Find external variables */
-	if (!af->externs)
+	if (!probe_conf.show_ext_vars)
 		goto out;
-	/* Don't need to search child DIE for externs. */
+	/* Don't need to search child DIE for external vars. */
 	af->child = false;
 	die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem);
 
@@ -1325,17 +1325,16 @@ out:
  */
 int debuginfo__find_available_vars_at(struct debuginfo *dbg,
 				      struct perf_probe_event *pev,
-				      struct variable_list **vls,
-				      int max_vls, bool externs)
+				      struct variable_list **vls)
 {
 	struct available_var_finder af = {
 			.pf = {.pev = pev, .callback = add_available_vars},
 			.mod = dbg->mod,
-			.max_vls = max_vls, .externs = externs};
+			.max_vls = probe_conf.max_probes};
 	int ret;
 
 	/* Allocate result vls array */
-	*vls = zalloc(sizeof(struct variable_list) * max_vls);
+	*vls = zalloc(sizeof(struct variable_list) * af.max_vls);
 	if (*vls == NULL)
 		return -ENOMEM;
 
diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h
index f53553d38845..bed82716e1b4 100644
--- a/tools/perf/util/probe-finder.h
+++ b/tools/perf/util/probe-finder.h
@@ -40,8 +40,7 @@ extern void debuginfo__delete(struct debuginfo *dbg);
 /* Find probe_trace_events specified by perf_probe_event from debuginfo */
 extern int debuginfo__find_trace_events(struct debuginfo *dbg,
 					struct perf_probe_event *pev,
-					struct probe_trace_event **tevs,
-					int max_tevs);
+					struct probe_trace_event **tevs);
 
 /* Find a perf_probe_point from debuginfo */
 extern int debuginfo__find_probe_point(struct debuginfo *dbg,
@@ -55,8 +54,7 @@ extern int debuginfo__find_line_range(struct debuginfo *dbg,
 /* Find available variables */
 extern int debuginfo__find_available_vars_at(struct debuginfo *dbg,
 					     struct perf_probe_event *pev,
-					     struct variable_list **vls,
-					     int max_points, bool externs);
+					     struct variable_list **vls);
 
 /* Find a src file from a DWARF tag path */
 int get_real_path(const char *raw_path, const char *comp_dir,
@@ -99,7 +97,6 @@ struct available_var_finder {
 	struct variable_list	*vls;		/* Found variable lists */
 	int			nvls;		/* Number of variable lists */
 	int			max_vls;	/* Max no. of variable lists */
-	bool			externs;	/* Find external vars too */
 	bool			child;		/* Search child scopes */
 };
 

From 6cfd1f6805ca0b4a341794d67eb605089435f938 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 8 May 2015 10:03:33 +0900
Subject: [PATCH 130/305] perf probe: Add --no-inlines option to avoid
 searching inline functions

Add --no-inlines(--inlines) option to avoid searching inline functions.

Searching all functions which matches glob pattern can take a long time
and find a lot of inline functions.

With this option perf-probe searches target on the non-inlined
functions.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150508010333.24812.86568.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-probe.txt | 4 ++++
 tools/perf/builtin-probe.c              | 2 ++
 tools/perf/util/probe-event.h           | 1 +
 tools/perf/util/probe-finder.c          | 2 +-
 4 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt
index ad3e355adc1f..3a8a9ba2b041 100644
--- a/tools/perf/Documentation/perf-probe.txt
+++ b/tools/perf/Documentation/perf-probe.txt
@@ -83,6 +83,10 @@ OPTIONS
 	(Only for --vars) Show external defined variables in addition to local
 	variables.
 
+--no-inlines::
+	(Only for --add) Search only for non-inlined functions. The functions
+	which do not have instances are ignored.
+
 -F::
 --funcs[=FILTER]::
 	Show available functions in given module or kernel. With -x/--exec,
diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index dbc998f21281..7fa2c7a1086a 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -379,6 +379,8 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_CALLBACK('m', "module", NULL, "modname|path",
 		"target module name (for online) or path (for offline)",
 		opt_set_target),
+	OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines,
+		"Don't search inlined functions"),
 #endif
 	OPT__DRY_RUN(&probe_event_dry_run),
 	OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes,
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index db9a9cb2d2af..633aba77e0cb 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -10,6 +10,7 @@
 struct probe_conf {
 	bool	show_ext_vars;
 	bool	force_add;
+	bool	no_inlines;
 	int	max_probes;
 };
 extern struct probe_conf probe_conf;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index f4f5eed00c7d..1713421112f8 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -943,7 +943,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 			/* TODO: Check the address in this function */
 			param->retval = call_probe_finder(sp_die, pf);
 		}
-	} else
+	} else if (!probe_conf.no_inlines)
 		/* Inlined function: search instances */
 		param->retval = die_walk_instances(sp_die,
 					probe_point_inline_cb, (void *)pf);

From 4c859351226c920b227fec040a3b447f0d482af3 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 8 May 2015 10:03:35 +0900
Subject: [PATCH 131/305] perf probe: Support glob wildcards for function name

Support glob wildcards for function name when adding new probes. This
will allow us to build caches of function-entry level information with
$params.

e.g.
  ----
  # perf probe --no-inlines --add 'kmalloc* $params'
  Added new events:
    probe:kmalloc_slab   (on kmalloc* with $params)
    probe:kmalloc_large_node (on kmalloc* with $params)
    probe:kmalloc_order_trace (on kmalloc* with $params)

  You can now use it in all perf tools, such as:

        perf record -e probe:kmalloc_order_trace -aR sleep 1

  # perf probe --list
    probe:kmalloc_large_node (on kmalloc_large_node@mm/slub.c with size flags node)
    probe:kmalloc_order_trace (on kmalloc_order_trace@mm/slub.c with size flags order)
    probe:kmalloc_slab   (on kmalloc_slab@mm/slab_common.c with size flags)
  ----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Hemant Kumar <hemant@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150508010335.24812.19972.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c    | 16 ++++++++++++++++
 tools/perf/util/dwarf-aux.h    |  3 +++
 tools/perf/util/probe-event.c  | 19 ++++++++++++++-----
 tools/perf/util/probe-event.h  |  1 +
 tools/perf/util/probe-finder.c | 27 +++++++++++++++++++++------
 tools/perf/util/util.h         |  4 ++++
 6 files changed, 59 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index c34e024020c7..16d46e26edac 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -139,10 +139,26 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr,
 bool die_compare_name(Dwarf_Die *dw_die, const char *tname)
 {
 	const char *name;
+
 	name = dwarf_diename(dw_die);
 	return name ? (strcmp(tname, name) == 0) : false;
 }
 
+/**
+ * die_match_name - Match diename and glob
+ * @dw_die: a DIE
+ * @glob: a string of target glob pattern
+ *
+ * Glob matching the name of @dw_die and @glob. Return false if matching fail.
+ */
+bool die_match_name(Dwarf_Die *dw_die, const char *glob)
+{
+	const char *name;
+
+	name = dwarf_diename(dw_die);
+	return name ? strglobmatch(name, glob) : false;
+}
+
 /**
  * die_get_call_lineno - Get callsite line number of inline-function instance
  * @in_die: a DIE of an inlined function instance
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index af7dbcd5f929..50a3cdc55fd7 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -47,6 +47,9 @@ extern bool die_is_func_instance(Dwarf_Die *dw_die);
 /* Compare diename and tname */
 extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname);
 
+/* Matching diename with glob pattern */
+extern bool die_match_name(Dwarf_Die *dw_die, const char *glob);
+
 /* Get callsite line number of inline-function instance */
 extern int die_get_call_lineno(Dwarf_Die *in_die);
 
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index a7deda450875..a2d8cefc597c 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -589,7 +589,11 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 				if (!tmp)
 					return -ENOMEM;
 			}
-			free(tevs[i].point.symbol);
+			/* If we have no realname, use symbol for it */
+			if (!tevs[i].point.realname)
+				tevs[i].point.realname = tevs[i].point.symbol;
+			else
+				free(tevs[i].point.symbol);
 			tevs[i].point.symbol = tmp;
 			tevs[i].point.offset = tevs[i].point.address -
 					       reloc_sym->unrelocated_addr;
@@ -1900,6 +1904,7 @@ static void clear_probe_trace_event(struct probe_trace_event *tev)
 	free(tev->event);
 	free(tev->group);
 	free(tev->point.symbol);
+	free(tev->point.realname);
 	free(tev->point.module);
 	for (i = 0; i < tev->nargs; i++) {
 		free(tev->args[i].name);
@@ -2377,6 +2382,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 	struct strlist *namelist;
 	LIST_HEAD(blacklist);
 	struct kprobe_blacklist_node *node;
+	bool safename;
 
 	if (pev->uprobes)
 		fd = open_uprobe_events(true);
@@ -2402,6 +2408,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 			pr_debug("No kprobe blacklist support, ignored\n");
 	}
 
+	safename = (pev->point.function && !strisglob(pev->point.function));
 	ret = 0;
 	pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
 	for (i = 0; i < ntevs; i++) {
@@ -2420,10 +2427,10 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 		if (pev->event)
 			event = pev->event;
 		else
-			if (pev->point.function)
+			if (safename)
 				event = pev->point.function;
 			else
-				event = tev->point.symbol;
+				event = tev->point.realname;
 		if (pev->group)
 			group = pev->group;
 		else
@@ -2488,9 +2495,11 @@ static int find_probe_functions(struct map *map, char *name)
 {
 	int found = 0;
 	struct symbol *sym;
+	struct rb_node *tmp;
 
-	map__for_each_symbol_by_name(map, name, sym) {
-		found++;
+	map__for_each_symbol(map, sym, tmp) {
+		if (strglobmatch(sym->name, name))
+			found++;
 	}
 
 	return found;
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 633aba77e0cb..1e2faa3559d2 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -18,6 +18,7 @@ extern bool probe_event_dry_run;
 
 /* kprobe-tracer and uprobe-tracer tracing point */
 struct probe_trace_point {
+	char		*realname;	/* function real name (if needed) */
 	char		*symbol;	/* Base symbol */
 	char		*module;	/* Module name */
 	unsigned long	offset;		/* Offset from symbol */
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 1713421112f8..d5f60c055554 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -717,7 +717,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data)
 	}
 	/* If the function name is given, that's what user expects */
 	if (fsp->function) {
-		if (die_compare_name(fn_die, fsp->function)) {
+		if (die_match_name(fn_die, fsp->function)) {
 			memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die));
 			fsp->found = true;
 			return 1;
@@ -920,13 +920,14 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 
 	/* Check tag and diename */
 	if (!die_is_func_def(sp_die) ||
-	    !die_compare_name(sp_die, pp->function))
+	    !die_match_name(sp_die, pp->function))
 		return DWARF_CB_OK;
 
 	/* Check declared file */
 	if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die)))
 		return DWARF_CB_OK;
 
+	pr_debug("Matched function: %s\n", dwarf_diename(sp_die));
 	pf->fname = dwarf_decl_file(sp_die);
 	if (pp->line) { /* Function relative line */
 		dwarf_decl_line(sp_die, &pf->lno);
@@ -943,10 +944,20 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data)
 			/* TODO: Check the address in this function */
 			param->retval = call_probe_finder(sp_die, pf);
 		}
-	} else if (!probe_conf.no_inlines)
+	} else if (!probe_conf.no_inlines) {
 		/* Inlined function: search instances */
 		param->retval = die_walk_instances(sp_die,
 					probe_point_inline_cb, (void *)pf);
+		/* This could be a non-existed inline definition */
+		if (param->retval == -ENOENT && strisglob(pp->function))
+			param->retval = 0;
+	}
+
+	/* We need to find other candidates */
+	if (strisglob(pp->function) && param->retval >= 0) {
+		param->retval = 0;	/* We have to clear the result */
+		return DWARF_CB_OK;
+	}
 
 	return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */
 }
@@ -975,7 +986,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data)
 		if (dwarf_tag(param->sp_die) != DW_TAG_subprogram)
 			return DWARF_CB_OK;
 
-		if (die_compare_name(param->sp_die, param->function)) {
+		if (die_match_name(param->sp_die, param->function)) {
 			if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die))
 				return DWARF_CB_OK;
 
@@ -1028,7 +1039,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg,
 		return -ENOMEM;
 
 	/* Fastpath: lookup by function name from .debug_pubnames section */
-	if (pp->function) {
+	if (pp->function && !strisglob(pp->function)) {
 		struct pubname_callback_param pubname_param = {
 			.function = pp->function,
 			.file	  = pp->file,
@@ -1177,6 +1188,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf)
 	if (ret < 0)
 		return ret;
 
+	tev->point.realname = strdup(dwarf_diename(sc_die));
+	if (!tev->point.realname)
+		return -ENOMEM;
+
 	pr_debug("Probe point found: %s+%lu\n", tev->point.symbol,
 		 tev->point.offset);
 
@@ -1535,7 +1550,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data)
 		return DWARF_CB_OK;
 
 	if (die_is_func_def(sp_die) &&
-	    die_compare_name(sp_die, lr->function)) {
+	    die_match_name(sp_die, lr->function)) {
 		lf->fname = dwarf_decl_file(sp_die);
 		dwarf_decl_line(sp_die, &lr->offset);
 		pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 1ff23e04ad27..3601ffd3d8b4 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -257,6 +257,10 @@ char **argv_split(const char *str, int *argcp);
 void argv_free(char **argv);
 bool strglobmatch(const char *str, const char *pat);
 bool strlazymatch(const char *str, const char *pat);
+static inline bool strisglob(const char *str)
+{
+	return strpbrk(str, "*?[") != NULL;
+}
 int strtailcmp(const char *s1, const char *s2);
 char *strxfrchar(char *s, char from, char to);
 unsigned long convert_unit(unsigned long value, char *unit);

From 76d408498b08447e0f61dfdd611aeb6e8e61ce80 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Wed, 29 Apr 2015 16:45:31 +0530
Subject: [PATCH 132/305] perf build: Disable libdw DWARF unwind when built
 with NO_DWARF

We get a linker error if we try to build with NO_DWARF since we build
util/unwind-libdw.c, but do not include -ldw

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1430306131-6780-1-git-send-email-naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/config/Makefile | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 435b6ca85b1f..1b957a1272d0 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -268,6 +268,10 @@ else
   endif # libelf support
 endif # NO_LIBELF
 
+ifdef NO_DWARF
+  NO_LIBDW_DWARF_UNWIND := 1
+endif
+
 ifndef NO_LIBELF
   CFLAGS += -DHAVE_LIBELF_SUPPORT
   EXTLIBS += -lelf

From a41f3c8cd4e28dcbebd8ec27a9602c86cfa5f009 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 23 Apr 2015 08:56:42 +0200
Subject: [PATCH 133/305] perf/x86/intel/uncore: Add Broadwell-U uncore IMC PMU
 support

This patch enables the uncore Memory Controller (IMC) PMU
support for Intel Broadwell-U (Model 61) mobile processors.
The IMC PMU enables measuring memory bandwidth.

To use with perf:
$ perf stat -a -I 1000 -e
uncore_imc/data_reads/,uncore_imc/data_writes/ sleep 10

Tested-by: Sonny Rao <sonnyrao@chromium.org>
Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: kan.liang@intel.com
Cc: peterz@infradead.org
Link: http://lkml.kernel.org/r/20150423065642.GA4890@thinkpad
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore.c |  3 +++
 arch/x86/kernel/cpu/perf_event_intel_uncore.h |  1 +
 .../kernel/cpu/perf_event_intel_uncore_snb.c  | 20 +++++++++++++++++++
 3 files changed, 24 insertions(+)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index c635b8b49e93..a03f96402dbe 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -922,6 +922,9 @@ static int __init uncore_pci_init(void)
 	case 69: /* Haswell Celeron */
 		ret = hsw_uncore_pci_init();
 		break;
+	case 61: /* Broadwell */
+		ret = bdw_uncore_pci_init();
+		break;
 	default:
 		return 0;
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.h b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
index 6c8c1e7e69d8..06b07930e48b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.h
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.h
@@ -326,6 +326,7 @@ extern struct event_constraint uncore_constraint_empty;
 int snb_uncore_pci_init(void);
 int ivb_uncore_pci_init(void);
 int hsw_uncore_pci_init(void);
+int bdw_uncore_pci_init(void);
 void snb_uncore_cpu_init(void);
 void nhm_uncore_cpu_init(void);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index 4562e9e22c60..b005a78c7012 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -7,6 +7,7 @@
 #define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_HSW_U_IMC	0x0a04
+#define PCI_DEVICE_ID_INTEL_BDW_IMC	0x1604
 
 /* SNB event control */
 #define SNB_UNC_CTL_EV_SEL_MASK			0x000000ff
@@ -486,6 +487,14 @@ static const struct pci_device_id hsw_uncore_pci_ids[] = {
 	{ /* end: all zeroes */ },
 };
 
+static const struct pci_device_id bdw_uncore_pci_ids[] = {
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_BDW_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
+	{ /* end: all zeroes */ },
+};
+
 static struct pci_driver snb_uncore_pci_driver = {
 	.name		= "snb_uncore",
 	.id_table	= snb_uncore_pci_ids,
@@ -501,6 +510,11 @@ static struct pci_driver hsw_uncore_pci_driver = {
 	.id_table	= hsw_uncore_pci_ids,
 };
 
+static struct pci_driver bdw_uncore_pci_driver = {
+	.name		= "bdw_uncore",
+	.id_table	= bdw_uncore_pci_ids,
+};
+
 struct imc_uncore_pci_dev {
 	__u32 pci_id;
 	struct pci_driver *driver;
@@ -514,6 +528,7 @@ static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
 	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
 	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
 	IMC_DEV(HSW_U_IMC, &hsw_uncore_pci_driver),  /* 4th Gen Core ULT Mobile Processor */
+	IMC_DEV(BDW_IMC, &bdw_uncore_pci_driver),    /* 5th Gen Core U */
 	{  /* end marker */ }
 };
 
@@ -561,6 +576,11 @@ int hsw_uncore_pci_init(void)
 	return imc_uncore_pci_init();
 }
 
+int bdw_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
+
 /* end of Sandy Bridge uncore support */
 
 /* Nehalem uncore support */

From 08a9b9857fc2d77600f09f3d342f6c64d25083b2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 11 May 2015 11:41:17 -0300
Subject: [PATCH 134/305] perf kmem: Fix compiler warning about may be
 accessing uninitialized variable

The last argument to strtok_r doesn't need to be initialized, its just a
placeholder to make this routine reentrant, but gcc doesn't know about
that and complains, breaking the build, fix it by setting it to NULL.

Fixes: 0e11115644b3 ("perf kmem: Print gfp flags in human readable string")
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-iyyvkbnkrd9g19f6ta9zfkem@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index e0173c7f17b1..254614b10c4a 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -713,7 +713,7 @@ static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample,
 		.size = sample->raw_size,
 	};
 	struct trace_seq seq;
-	char *str, *pos;
+	char *str, *pos = NULL;
 
 	if (nr_gfps) {
 		struct gfp_flag key = {

From 1f91d5fd03d410365d9781ce981f9712b19a0751 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Sun, 10 May 2015 00:19:42 +0900
Subject: [PATCH 135/305] perf tools: Document relation of per-thread event
 count feature

The 'perf record -s' and 'perf report -T' should be used together to see
per-thread event counts.  Document the relation of these commands.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1431184784-30525-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 3 ++-
 tools/perf/Documentation/perf-report.txt | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 57dd57bcef95..280533ebf9df 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -147,7 +147,8 @@ OPTIONS
 
 -s::
 --stat::
-	Per thread counts.
+	Record per-thread event counts.  Use it with 'perf report -T' to see
+	the values.
 
 -d::
 --data::
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 27190ed06f9c..c33b69f3374f 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -34,7 +34,8 @@ OPTIONS
 
 -T::
 --threads::
-	Show per-thread event counters
+	Show per-thread event counters.  The input data file should be recorded
+	with -s option.
 -c::
 --comms=::
 	Only consider symbols in these comms. CSV that understands

From b138f42ed4920f808b5599efc2c4b150c27a4153 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Sun, 10 May 2015 00:19:43 +0900
Subject: [PATCH 136/305] perf report: Force tty output if -T/--thread option
 is given

The -T/--thread option is supported only on --stdio mode (at least for
now).  So enforce the tty output if the option was requested.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1431184784-30525-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 62b49ca0fc7b..4d642db642c5 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -815,8 +815,8 @@ repeat:
 		goto error;
 	}
 
-	/* Force tty output for header output. */
-	if (report.header || report.header_only)
+	/* Force tty output for header output and per-thread stat. */
+	if (report.header || report.header_only || report.show_threads)
 		use_browser = 0;
 
 	if (strcmp(input_name, "-") != 0)

From ff8f695c0ec9d73d8a9f92fe634d6476ad74e3a1 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Mon, 11 May 2015 12:28:36 +0000
Subject: [PATCH 137/305] perf trace: Removed duplicated NULL test

No need to test trace.evlist against NULL twice.

Signed-off-by: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1431347316-30401-2-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index cbfdb9523868..96a2ebafc755 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -2721,11 +2721,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 	signal(SIGFPE, sighandler_dump_stack);
 
 	trace.evlist = perf_evlist__new();
-	if (trace.evlist == NULL)
-		return -ENOMEM;
 
 	if (trace.evlist == NULL) {
 		pr_err("Not enough memory to run!\n");
+		err = -ENOMEM;
 		goto out;
 	}
 

From fb9596d1731cc37da6489de439f8b876f3a12db2 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Mon, 11 May 2015 09:25:02 +0000
Subject: [PATCH 138/305] perf probe: Remove length limitation for showing
 available variables

Use struct strbuf instead of bare char[] to remove the length limitation
of variables in variable_list, so they will not disappear due to
overlength, and make preparation for adding more description for
variables.

Signed-off-by: He Kuang <hekuang@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1431336304-16863-1-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c    | 50 +++++++++++++++-------------------
 tools/perf/util/dwarf-aux.h    |  4 +--
 tools/perf/util/probe-finder.c | 17 +++++++-----
 3 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 16d46e26edac..737c9dbe5643 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -848,19 +848,17 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
 /**
  * die_get_typename - Get the name of given variable DIE
  * @vr_die: a variable DIE
- * @buf: a buffer for result type name
- * @len: a max-length of @buf
+ * @buf: a strbuf for result type name
  *
- * Get the name of @vr_die and stores it to @buf. Return the actual length
- * of type name if succeeded. Return -E2BIG if @len is not enough long, and
- * Return -ENOENT if failed to find type name.
+ * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded.
+ * and Return -ENOENT if failed to find type name.
  * Note that the result will stores typedef name if possible, and stores
  * "*(function_type)" if the type is a function pointer.
  */
-int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
+int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf)
 {
 	Dwarf_Die type;
-	int tag, ret, ret2;
+	int tag, ret;
 	const char *tmp = "";
 
 	if (__die_get_real_type(vr_die, &type) == NULL)
@@ -871,8 +869,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
 		tmp = "*";
 	else if (tag == DW_TAG_subroutine_type) {
 		/* Function pointer */
-		ret = snprintf(buf, len, "(function_type)");
-		return (ret >= len) ? -E2BIG : ret;
+		strbuf_addf(buf, "(function_type)");
+		return 0;
 	} else {
 		if (!dwarf_diename(&type))
 			return -ENOENT;
@@ -883,39 +881,35 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len)
 		else if (tag == DW_TAG_enumeration_type)
 			tmp = "enum ";
 		/* Write a base name */
-		ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type));
-		return (ret >= len) ? -E2BIG : ret;
-	}
-	ret = die_get_typename(&type, buf, len);
-	if (ret > 0) {
-		ret2 = snprintf(buf + ret, len - ret, "%s", tmp);
-		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
+		strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type));
+		return 0;
 	}
+	ret = die_get_typename(&type, buf);
+	if (ret == 0)
+		strbuf_addf(buf, "%s", tmp);
+
 	return ret;
 }
 
 /**
  * die_get_varname - Get the name and type of given variable DIE
  * @vr_die: a variable DIE
- * @buf: a buffer for type and variable name
- * @len: the max-length of @buf
+ * @buf: a strbuf for type and variable name
  *
  * Get the name and type of @vr_die and stores it in @buf as "type\tname".
  */
-int die_get_varname(Dwarf_Die *vr_die, char *buf, int len)
+int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
 {
-	int ret, ret2;
+	int ret;
 
-	ret = die_get_typename(vr_die, buf, len);
+	ret = die_get_typename(vr_die, buf);
 	if (ret < 0) {
 		pr_debug("Failed to get type, make it unknown.\n");
-		ret = snprintf(buf, len, "(unknown_type)");
+		strbuf_addf(buf, "(unknown_type)");
 	}
-	if (ret > 0) {
-		ret2 = snprintf(buf + ret, len - ret, "\t%s",
-				dwarf_diename(vr_die));
-		ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret;
-	}
-	return ret;
+
+	strbuf_addf(buf, "\t%s", dwarf_diename(vr_die));
+
+	return 0;
 }
 
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 50a3cdc55fd7..60676fda4824 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -117,8 +117,8 @@ extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name,
 				  Dwarf_Die *die_mem);
 
 /* Get the name of given variable DIE */
-extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len);
+extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Get the name and type of given variable DIE, stored as "type\tname" */
-extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len);
+extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
 #endif
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 8b9e274f940c..d7c2e90ab4ce 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -1255,14 +1255,11 @@ int debuginfo__find_trace_events(struct debuginfo *dbg,
 	return (ret < 0) ? ret : tf.ntevs;
 }
 
-#define MAX_VAR_LEN 64
-
 /* Collect available variables in this scope */
 static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
 {
 	struct available_var_finder *af = data;
 	struct variable_list *vl;
-	char buf[MAX_VAR_LEN];
 	int tag, ret;
 
 	vl = &af->vls[af->nvls - 1];
@@ -1274,10 +1271,16 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
 						af->pf.fb_ops, &af->pf.sp_die,
 						NULL);
 		if (ret == 0) {
-			ret = die_get_varname(die_mem, buf, MAX_VAR_LEN);
-			pr_debug2("Add new var: %s\n", buf);
-			if (ret > 0)
-				strlist__add(vl->vars, buf);
+			struct strbuf buf;
+
+			strbuf_init(&buf, 64);
+			ret = die_get_varname(die_mem, &buf);
+			pr_debug2("Add new var: %s\n", buf.buf);
+			if (ret == 0) {
+				strlist__add(vl->vars,
+					strbuf_detach(&buf, NULL));
+			}
+			strbuf_release(&buf);
 		}
 	}
 

From 349e8d2611316cce11c0b9d0830ebb585c9b82b8 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Mon, 11 May 2015 09:25:03 +0000
Subject: [PATCH 139/305] perf probe: Add --range option to show a variable's
 location range

It is not easy for users to get the accurate byte offset or the line
number where a local variable can be probed.

With '--range' option, local variables in the scope of the probe point
are showed with a byte offset range, and can be added according to this
range information.

For example, there are some variables in the function
generic_perform_write():

  <generic_perform_write@mm/filemap.c:0>
  0  ssize_t generic_perform_write(struct file *file,
  1                                 struct iov_iter *i, loff_t pos)
  2  {
  3          struct address_space *mapping = file->f_mapping;
  4          const struct address_space_operations *a_ops = mapping->a_ops;
  ...
  42                 status = a_ops->write_begin(file, mapping, pos, bytes, flags,
                                               &page, &fsdata);
  44                 if (unlikely(status < 0))

But we fail when we try to probe the variable 'a_ops' at line 42 or 44.

  $ perf probe --add 'generic_perform_write:42 a_ops'
  Failed to find the location of a_ops at this address.
    Perhaps, it has been optimized out.

This is because the source code do not match the assembly, so a variable
may not be available in the source code line where it appears.

After this patch, we can lookup the accurate byte offset range of a
variable, 'INV' indicates that this variable is not valid at the given
point, but available in the scope:

  $ perf probe --vars 'generic_perform_write:42' --range
  Available variables at generic_perform_write:42
    @<generic_perform_write+141>
       [INV] ssize_t written @<generic_perform_write+[324-331]>
       [INV] struct address_space_operations*        a_ops   @<generic_perform_write+[55-61,170-176,223-246]>
       [VAL] (unknown_type)  fsdata  @<generic_perform_write+[70-307,346-411]>
       [VAL] loff_t  pos     @<generic_perform_write+[0-286,286-336,346-411]>
       [VAL] long int        status  @<generic_perform_write+[83-342,346-411]>
       [VAL] long unsigned int       bytes   @<generic_perform_write+[122-311,320-338,346-403,403-411]>
       [VAL] struct address_space*   mapping @<generic_perform_write+[35-344,346-411]>
       [VAL] struct iov_iter*        i       @<generic_perform_write+[0-340,346-411]>
       [VAL] struct page*    page    @<generic_perform_write+[70-307,346-411]>

Then it is more clear for us to add a probe with this variable:

  $ perf probe --add 'generic_perform_write+170 a_ops'
  Added new event:
    probe:generic_perform_write (on generic_perform_write+170 with a_ops)

Signed-off-by: He Kuang <hekuang@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1431336304-16863-2-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-probe.c     |   2 +
 tools/perf/util/dwarf-aux.c    | 121 +++++++++++++++++++++++++++++++++
 tools/perf/util/dwarf-aux.h    |   2 +
 tools/perf/util/probe-event.h  |   1 +
 tools/perf/util/probe-finder.c |  57 ++++++++++++----
 5 files changed, 170 insertions(+), 13 deletions(-)

diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c
index 7fa2c7a1086a..1272559fa22d 100644
--- a/tools/perf/builtin-probe.c
+++ b/tools/perf/builtin-probe.c
@@ -372,6 +372,8 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Show accessible variables on PROBEDEF", opt_show_vars),
 	OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars,
 		    "Show external variables too (with --vars only)"),
+	OPT_BOOLEAN('\0', "range", &probe_conf.show_location_range,
+		"Show variables location range in scope (with --vars only)"),
 	OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name,
 		   "file", "vmlinux pathname"),
 	OPT_STRING('s', "source", &symbol_conf.source_prefix,
diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index 737c9dbe5643..afa09719336b 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -913,3 +913,124 @@ int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf)
 	return 0;
 }
 
+/**
+ * die_get_var_innermost_scope - Get innermost scope range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for variable byte offset range
+ *
+ * Get the innermost scope range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+				struct strbuf *buf)
+{
+	Dwarf_Die *scopes;
+	int count;
+	size_t offset = 0;
+	Dwarf_Addr base;
+	Dwarf_Addr start, end;
+	Dwarf_Addr entry;
+	int ret;
+	bool first = true;
+	const char *name;
+
+	ret = dwarf_entrypc(sp_die, &entry);
+	if (ret)
+		return ret;
+
+	name = dwarf_diename(sp_die);
+	if (!name)
+		return -ENOENT;
+
+	count = dwarf_getscopes_die(vr_die, &scopes);
+
+	/* (*SCOPES)[1] is the DIE for the scope containing that scope */
+	if (count <= 1) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	while ((offset = dwarf_ranges(&scopes[1], offset, &base,
+				&start, &end)) > 0) {
+		start -= entry;
+		end -= entry;
+
+		if (first) {
+			strbuf_addf(buf, "@<%s+[%lu-%lu",
+				name, start, end);
+			first = false;
+		} else {
+			strbuf_addf(buf, ",%lu-%lu",
+				start, end);
+		}
+	}
+
+	if (!first)
+		strbuf_addf(buf, "]>");
+
+out:
+	free(scopes);
+	return ret;
+}
+
+/**
+ * die_get_var_range - Get byte offset range of given variable DIE
+ * @sp_die: a subprogram DIE
+ * @vr_die: a variable DIE
+ * @buf: a strbuf for type and variable name and byte offset range
+ *
+ * Get the byte offset range of @vr_die and stores it in @buf as
+ * "@<function_name+[NN-NN,NN-NN]>".
+ */
+int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
+{
+	int ret = 0;
+	Dwarf_Addr base;
+	Dwarf_Addr start, end;
+	Dwarf_Addr entry;
+	Dwarf_Op *op;
+	size_t nops;
+	size_t offset = 0;
+	Dwarf_Attribute attr;
+	bool first = true;
+	const char *name;
+
+	ret = dwarf_entrypc(sp_die, &entry);
+	if (ret)
+		return ret;
+
+	name = dwarf_diename(sp_die);
+	if (!name)
+		return -ENOENT;
+
+	if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL)
+		return -EINVAL;
+
+	while ((offset = dwarf_getlocations(
+				&attr, offset, &base,
+				&start, &end, &op, &nops)) > 0) {
+		if (start == 0) {
+			/* Single Location Descriptions */
+			ret = die_get_var_innermost_scope(sp_die, vr_die, buf);
+			return ret;
+		}
+
+		/* Location Lists */
+		start -= entry;
+		end -= entry;
+		if (first) {
+			strbuf_addf(buf, "@<%s+[%lu-%lu",
+				name, start, end);
+			first = false;
+		} else {
+			strbuf_addf(buf, ",%lu-%lu",
+				start, end);
+		}
+	}
+
+	if (!first)
+		strbuf_addf(buf, "]>");
+
+	return ret;
+}
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index 60676fda4824..c154c0b80880 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -121,4 +121,6 @@ extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf);
 
 /* Get the name and type of given variable DIE, stored as "type\tname" */
 extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf);
+extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
+			struct strbuf *buf);
 #endif
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 1e2faa3559d2..537eb329c2cf 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -9,6 +9,7 @@
 /* Probe related configurations */
 struct probe_conf {
 	bool	show_ext_vars;
+	bool	show_location_range;
 	bool	force_add;
 	bool	no_inlines;
 	int	max_probes;
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index d7c2e90ab4ce..5804086425aa 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -177,7 +177,7 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
 	Dwarf_Word offs = 0;
 	bool ref = false;
 	const char *regs;
-	int ret;
+	int ret, ret2 = 0;
 
 	if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL)
 		goto static_var;
@@ -187,9 +187,19 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr,
 		return -EINVAL;	/* Broken DIE ? */
 	if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) {
 		ret = dwarf_entrypc(sp_die, &tmp);
-		if (ret || addr != tmp ||
-		    dwarf_tag(vr_die) != DW_TAG_formal_parameter ||
-		    dwarf_highpc(sp_die, &tmp))
+		if (ret)
+			return -ENOENT;
+
+		if (probe_conf.show_location_range &&
+			(dwarf_tag(vr_die) == DW_TAG_variable)) {
+			ret2 = -ERANGE;
+		} else if (addr != tmp ||
+			dwarf_tag(vr_die) != DW_TAG_formal_parameter) {
+			return -ENOENT;
+		}
+
+		ret = dwarf_highpc(sp_die, &tmp);
+		if (ret)
 			return -ENOENT;
 		/*
 		 * This is fuzzed by fentry mcount. We try to find the
@@ -210,7 +220,7 @@ found:
 	if (op->atom == DW_OP_addr) {
 static_var:
 		if (!tvar)
-			return 0;
+			return ret2;
 		/* Static variables on memory (not stack), make @varname */
 		ret = strlen(dwarf_diename(vr_die));
 		tvar->value = zalloc(ret + 2);
@@ -220,7 +230,7 @@ static_var:
 		tvar->ref = alloc_trace_arg_ref((long)offs);
 		if (tvar->ref == NULL)
 			return -ENOMEM;
-		return 0;
+		return ret2;
 	}
 
 	/* If this is based on frame buffer, set the offset */
@@ -250,14 +260,14 @@ static_var:
 	}
 
 	if (!tvar)
-		return 0;
+		return ret2;
 
 	regs = get_arch_regstr(regn);
 	if (!regs) {
 		/* This should be a bug in DWARF or this tool */
 		pr_warning("Mapping for the register number %u "
 			   "missing on this architecture.\n", regn);
-		return -ERANGE;
+		return -ENOTSUP;
 	}
 
 	tvar->value = strdup(regs);
@@ -269,7 +279,7 @@ static_var:
 		if (tvar->ref == NULL)
 			return -ENOMEM;
 	}
-	return 0;
+	return ret2;
 }
 
 #define BYTES_TO_BITS(nb)	((nb) * BITS_PER_LONG / sizeof(long))
@@ -1270,13 +1280,34 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data)
 		ret = convert_variable_location(die_mem, af->pf.addr,
 						af->pf.fb_ops, &af->pf.sp_die,
 						NULL);
-		if (ret == 0) {
+		if (ret == 0 || ret == -ERANGE) {
+			int ret2;
+			bool externs = !af->child;
 			struct strbuf buf;
 
 			strbuf_init(&buf, 64);
-			ret = die_get_varname(die_mem, &buf);
-			pr_debug2("Add new var: %s\n", buf.buf);
-			if (ret == 0) {
+
+			if (probe_conf.show_location_range) {
+				if (!externs) {
+					if (ret)
+						strbuf_addf(&buf, "[INV]\t");
+					else
+						strbuf_addf(&buf, "[VAL]\t");
+				} else
+					strbuf_addf(&buf, "[EXT]\t");
+			}
+
+			ret2 = die_get_varname(die_mem, &buf);
+
+			if (!ret2 && probe_conf.show_location_range &&
+				!externs) {
+				strbuf_addf(&buf, "\t");
+				ret2 = die_get_var_range(&af->pf.sp_die,
+							die_mem, &buf);
+			}
+
+			pr_debug("Add new var: %s\n", buf.buf);
+			if (ret2 == 0) {
 				strlist__add(vl->vars,
 					strbuf_detach(&buf, NULL));
 			}

From 7d5eaba9b33682b734e3a79c21c4a9a5f91624b1 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Mon, 11 May 2015 09:25:04 +0000
Subject: [PATCH 140/305] perf probe: Show better error message when failed to
 find variable

Indicate to check variable location range in error message when we got
failed to find the variable.

Before this patch:

  $ perf probe --add 'generic_perform_write+118 bytes'
  Failed to find the location of bytes at this address.
   Perhaps, it has been optimized out.
    Error: Failed to add events.

After this patch:

  $ perf probe --add 'generic_perform_write+118 bytes'
  Failed to find the location of the 'bytes' variable at this address.
   Perhaps it has been optimized out.
   Use -V with the --range option to show 'bytes' location range.
    Error: Failed to add events.

Signed-off-by: He Kuang <hekuang@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1431336304-16863-3-git-send-email-hekuang@huawei.com
[ Improve the error message based on lkml thread ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-finder.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 5804086425aa..590a24a7f555 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -527,10 +527,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf)
 
 	ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops,
 					&pf->sp_die, pf->tvar);
-	if (ret == -ENOENT || ret == -EINVAL)
-		pr_err("Failed to find the location of %s at this address.\n"
-		       " Perhaps, it has been optimized out.\n", pf->pvar->var);
-	else if (ret == -ENOTSUP)
+	if (ret == -ENOENT || ret == -EINVAL) {
+		pr_err("Failed to find the location of the '%s' variable at this address.\n"
+		       " Perhaps it has been optimized out.\n"
+		       " Use -V with the --range option to show '%s' location range.\n",
+		       pf->pvar->var, pf->pvar->var);
+	} else if (ret == -ENOTSUP)
 		pr_err("Sorry, we don't support this variable location yet.\n");
 	else if (ret == 0 && pf->pvar->field) {
 		ret = convert_variable_fields(vr_die, pf->pvar->var,

From 189c466f77d421aef5c196454ab2e9517af7abc9 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Mon, 11 May 2015 12:28:35 +0000
Subject: [PATCH 141/305] perf tests: Fix to get negative exit codes

WEXITSTATUS consists of the least significant 8 bits of the status
argument, so we should convert the value to signed char if we have valid
negative exit codes. And the return value of test->func() contains
negative values:

  enum {
          TEST_OK   =  0,
          TEST_FAIL = -1,
          TEST_SKIP = -2,
  };

Before this patch:

  $ perf test -v 1
  ...
  test child finished with 254
  ---- end ----
  vmlinux symtab matches kallsyms: FAILED!

After this patch:

  $ perf test -v 1
  ...
  test child finished with -2
  ---- end ----
  vmlinux symtab matches kallsyms: Skip

Signed-off-by: He Kuang <hekuang@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1431347316-30401-1-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index 4f4098167112..f42af98a5c16 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -219,7 +219,7 @@ static int run_test(struct test *test)
 	wait(&status);
 
 	if (WIFEXITED(status)) {
-		err = WEXITSTATUS(status);
+		err = (signed char)WEXITSTATUS(status);
 		pr_debug("test child finished with %d\n", err);
 	} else if (WIFSIGNALED(status)) {
 		err = -1;

From 021162cf02fcfa80cbae5f3b7304e9cb392962eb Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 11 May 2015 22:44:39 +0900
Subject: [PATCH 142/305] perf report: Do not restrict -T option by other
 options

It seems there's no reason to suppress per-thread event stat by -T
option when -s or -p option is used.  Make it work with those options.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1431351879-23798-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 4d642db642c5..92fca2157e5e 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -333,15 +333,14 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 	}
 
 	if (sort_order == NULL &&
-	    parent_pattern == default_parent_pattern) {
+	    parent_pattern == default_parent_pattern)
 		fprintf(stdout, "#\n# (%s)\n#\n", help);
 
-		if (rep->show_threads) {
-			bool style = !strcmp(rep->pretty_printing_style, "raw");
-			perf_read_values_display(stdout, &rep->show_threads_values,
-						 style);
-			perf_read_values_destroy(&rep->show_threads_values);
-		}
+	if (rep->show_threads) {
+		bool style = !strcmp(rep->pretty_printing_style, "raw");
+		perf_read_values_display(stdout, &rep->show_threads_values,
+					 style);
+		perf_read_values_destroy(&rep->show_threads_values);
 	}
 
 	return 0;

From 8f1960138baf8a6c139917ecbc032e7916d109ef Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 11 May 2015 16:30:20 -0300
Subject: [PATCH 143/305] perf tests: Show refcounting broken expectations in
 thread-mg-share test

To help understand the failure.

  [acme@zoo linux]$ perf test -v 30
  30: Test thread mg sharing                                 :
  --- start ---
  test child forked, pid 12275
  FAILED tests/thread-mg-share.c:68 wrong refcnt (4 != 3)
  test child finished with -1
  ---- end ----
  Test thread mg sharing: FAILED!
  [acme@zoo linux]$

This is under investigation, the thread__delete() calls were replaced
with thread__put(), and those cause mismatches because now we need to be
more judicious with the thread lifetime management.

I.e. previously the thread__delete() would drop the map_group refcount,
but now since thread__put doesn't call thread__delete() necessarily.
because we have other refcount holders, the map_group refcount will not
be as we expected when this test was implemented.

Will be fixed soon...

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-9y8e3f7ukzco5loxvnlitpfq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/tests.h           |  9 +++++++++
 tools/perf/tests/thread-mg-share.c | 12 ++++++------
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 52758a33f64c..a10eaf5c4767 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -9,6 +9,15 @@ do {									 \
 	}								 \
 } while (0)
 
+#define TEST_ASSERT_EQUAL(text, val, expected)				 \
+do {									 \
+	if (val != expected) {						 \
+		pr_debug("FAILED %s:%d %s (%d != %d)\n",		 \
+			 __FILE__, __LINE__, text, val, expected);	 \
+		return -1;						 \
+	}								 \
+} while (0)
+
 enum {
 	TEST_OK   =  0,
 	TEST_FAIL = -1,
diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index dc05bd62b4a3..04ecceed4e5b 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -43,7 +43,7 @@ int test__thread_mg_share(void)
 			leader && t1 && t2 && t3 && other);
 
 	mg = leader->mg;
-	TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 4);
+	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 4);
 
 	/* test the map groups pointer is shared */
 	TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
@@ -59,25 +59,25 @@ int test__thread_mg_share(void)
 	TEST_ASSERT_VAL("failed to find other leader", other_leader);
 
 	other_mg = other->mg;
-	TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 2);
+	TEST_ASSERT_EQUAL("wrong refcnt", other_mg->refcnt, 2);
 
 	TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
 
 	/* release thread group */
 	thread__put(leader);
-	TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3);
+	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 3);
 
 	thread__put(t1);
-	TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2);
+	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 2);
 
 	thread__put(t2);
-	TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1);
+	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 1);
 
 	thread__put(t3);
 
 	/* release other group  */
 	thread__put(other_leader);
-	TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1);
+	TEST_ASSERT_EQUAL("wrong refcnt", other_mg->refcnt, 1);
 
 	thread__put(other);
 

From 0ceb8f6e6cbafee0fa0e671e48213e24fae887f7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 11 May 2015 18:08:12 -0300
Subject: [PATCH 144/305] perf machine: No need to keep a refcnt for last_match

Since it is all associated with the refcount for keeping the thread
in the rbtree, it is excessive and unecessarily complex to hold a
refcont when changing machine->last_match.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-98kuesmfwtvhsrzx7ttyb0kt@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8b0b307d91f4..34bf89f7f4f3 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -364,7 +364,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 			return th;
 		}
 
-		thread__zput(machine->last_match);
+		machine->last_match = NULL;
 	}
 
 	while (*p != NULL) {
@@ -372,7 +372,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		th = rb_entry(parent, struct thread, rb_node);
 
 		if (th->tid == tid) {
-			machine->last_match = thread__get(th);
+			machine->last_match = th;
 			machine__update_thread_pid(machine, th, pid);
 			return th;
 		}
@@ -409,7 +409,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		 * It is now in the rbtree, get a ref
 		 */
 		thread__get(th);
-		machine->last_match = thread__get(th);
+		machine->last_match = th;
 	}
 
 	return th;
@@ -1309,7 +1309,7 @@ out_problem:
 static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock)
 {
 	if (machine->last_match == th)
-		thread__zput(machine->last_match);
+		machine->last_match = NULL;
 
 	BUG_ON(th->refcnt.counter == 0);
 	if (lock)

From 8b00f46951bed1edd9c5cb9d9adb62d28bbe7623 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 11 May 2015 18:13:14 -0300
Subject: [PATCH 145/305] perf tests: Fix map_groups refcount test

When introducing reference counting for struct thread instances I forgot
to remove the synthetic threads from the machine's rbtree so that it
then the threads would have just one reference and thus the
thread__put() replacing the thread__delete() really turns into a
thread__delete() (thread->refcnt == 1 at thread__put() time) and thus
drop the thread->mg refcount, as expected by the this test.

Fix it by calling machine__remove_thread() (the counterpart of
machine__findnew_thread()) on all the synthetic threads after the
checks that involves the rbtree were done.

Before:

  # perf test -v mg
  30: Test thread mg sharing                                 :
  --- start ---
  test child forked, pid 26995
  FAILED tests/thread-mg-share.c:68 wrong refcnt (4 != 3)
  test child finished with -1
  ---- end ----
  Test thread mg sharing: FAILED!
  #

After:

  # perf test mg
  30: Test thread mg sharing: Ok
  #

Fixes: b91fc39f4ad7 ("perf machine: Protect the machine->threads with a rwlock")
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-uoqq0fjei90ohhhcboz6ay33@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/thread-mg-share.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index 04ecceed4e5b..c0ed56f7efc6 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -58,6 +58,18 @@ int test__thread_mg_share(void)
 	other_leader = machine__find_thread(machine, 4, 4);
 	TEST_ASSERT_VAL("failed to find other leader", other_leader);
 
+	/*
+	 * Ok, now that all the rbtree related operations were done,
+	 * lets remove all of them from there so that we can do the
+	 * refcounting tests.
+	 */
+	machine__remove_thread(machine, leader);
+	machine__remove_thread(machine, t1);
+	machine__remove_thread(machine, t2);
+	machine__remove_thread(machine, t3);
+	machine__remove_thread(machine, other);
+	machine__remove_thread(machine, other_leader);
+
 	other_mg = other->mg;
 	TEST_ASSERT_EQUAL("wrong refcnt", other_mg->refcnt, 2);
 
@@ -81,11 +93,6 @@ int test__thread_mg_share(void)
 
 	thread__put(other);
 
-	/*
-	 * Cannot call machine__delete_threads(machine) now,
-	 * because we've already released all the threads.
-	 */
-
 	machines__exit(&machines);
 	return 0;
 }

From d4c537e6bf860c12262cb936eef663180d7a3d45 Mon Sep 17 00:00:00 2001
From: "Naveen N. Rao" <naveen.n.rao@linux.vnet.ibm.com>
Date: Thu, 30 Apr 2015 17:12:31 +0530
Subject: [PATCH 146/305] perf probe: Ignore tail calls to probed functions

perf probe currently errors out if there are any tail calls to probed
functions:

[root@rhel71be]# perf probe do_fork
Failed to find probe point in any functions.
  Error: Failed to add events.

Fix this by teaching perf to ignore tail calls.

Without patch:

  [root@rhel71be perf]# ./perf probe -v do_fork
  probe-definition(0): do_fork symbol:do_fork file:(null) line:0 offset:0
  return:0 lazy:(null)
  0 arguments
  Looking at the vmlinux_path (7 entries long)
  symsrc__init: build id mismatch for /boot/vmlinux.
  Using /usr/lib/debug/lib/modules/3.10.0-201.el7.ppc64/vmlinux for symbols
  Open Debuginfo file:
  /usr/lib/debug/lib/modules/3.10.0-201.el7.ppc64/vmlinux
  Try to find probe point from debuginfo.
  found inline addr: 0xc0000000000bb9b0
  Probe point found: do_fork+0
  found inline addr: 0xc0000000000bbe20
  Probe point found: kernel_thread+48
  found inline addr: 0xc0000000000bbe5c
  Probe point found: sys_fork+28
  found inline addr: 0xc0000000000bbfac
  Probe point found: sys_vfork+44
  found inline addr: 0xc0000000000bc27c
  Failed to find probe point in any functions.
  An error occurred in debuginfo analysis (-2).
  Error: Failed to add events. Reason: No such file or directory (Code: -2)

With patch:

  [root@rhel71be perf]# ./perf probe -v do_fork
  probe-definition(0): do_fork symbol:do_fork file:(null) line:0 offset:0
  return:0 lazy:(null)
  0 arguments
  Looking at the vmlinux_path (7 entries long)
  symsrc__init: build id mismatch for /boot/vmlinux.
  Using /usr/lib/debug/lib/modules/3.10.0-201.el7.ppc64/vmlinux for symbols
  Open Debuginfo file:
  /usr/lib/debug/lib/modules/3.10.0-201.el7.ppc64/vmlinux
  Try to find probe point from debuginfo.
  found inline addr: 0xc0000000000bb9b0
  Probe point found: do_fork+0
  found inline addr: 0xc0000000000bbe20
  Probe point found: kernel_thread+48
  found inline addr: 0xc0000000000bbe5c
  Probe point found: sys_fork+28
  found inline addr: 0xc0000000000bbfac
  Probe point found: sys_vfork+44
  found inline addr: 0xc0000000000bc27c
  Ignoring tail call from SyS_clone
  Found 4 probe_trace_events.
  Opening /sys/kernel/debug/tracing/kprobe_events write=1
  No kprobe blacklist support, ignored
  Added new events:
  Writing event: p:probe/do_fork _text+768432
  Failed to write event: Invalid argument
    Error: Failed to add events. Reason: Invalid argument (Code: -22)

[Ignore the error about failure to write event - this kernel is missing
a patch to resolve _text properly]

The reason to ignore tail calls is that the address does not belong to
any function frame. In the example above, the address in SyS_clone is
0xc0000000000bc27c, but looking at the debug-info:

 <1><830081>: Abbrev Number: 133 (DW_TAG_subprogram)
    <830083>   DW_AT_external    : 1
    <830083>   DW_AT_name        : (indirect string, offset: 0x3cea3): SyS_clone
    <830087>   DW_AT_decl_file   : 7
    <830088>   DW_AT_decl_line   : 1689
    <83008a>   DW_AT_prototyped  : 1
    <83008a>   DW_AT_type        : <0x8110eb>
    <83008e>   DW_AT_low_pc      : 0xc0000000000bc270
    <830096>   DW_AT_high_pc     : 0xc
    <83009e>   DW_AT_frame_base  : 1 byte block: 9c 	(DW_OP_call_frame_cfa)
    <8300a0>   DW_AT_GNU_all_call_sites: 1
    <8300a0>   DW_AT_sibling     : <0x830178>
<snip>
 <3><830147>: Abbrev Number: 125 (DW_TAG_GNU_call_site)
    <830148>   DW_AT_low_pc      : 0xc0000000000bc27c
    <830150>   DW_AT_GNU_tail_call: 1
    <830150>   DW_AT_abstract_origin: <0x82e7e1>

The frame ends at 0xc0000000000bc27c. I suppose this is why this
particular call is a "tail" call. FWIW, systemtap seems to ignore these
as well and requires users to explicitly place probes at these call
sites if necessary. I print out the caller so that users know.

Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Link: http://lkml.kernel.org/r/1430394151-15928-1-git-send-email-naveen.n.rao@linux.vnet.ibm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c    | 37 ++++++++++++++++++++++++++++++++++
 tools/perf/util/dwarf-aux.h    |  4 ++++
 tools/perf/util/probe-finder.c | 12 ++++++++---
 3 files changed, 50 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index afa09719336b..eb47abdcf0ac 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -433,6 +433,43 @@ struct __addr_die_search_param {
 	Dwarf_Die	*die_mem;
 };
 
+static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data)
+{
+	struct __addr_die_search_param *ad = data;
+	Dwarf_Addr addr = 0;
+
+	if (dwarf_tag(fn_die) == DW_TAG_subprogram &&
+	    !dwarf_highpc(fn_die, &addr) &&
+	    addr == ad->addr) {
+		memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die));
+		return DWARF_CB_ABORT;
+	}
+	return DWARF_CB_OK;
+}
+
+/**
+ * die_find_tailfunc - Search for a non-inlined function with tail call at
+ * given address
+ * @cu_die: a CU DIE which including @addr
+ * @addr: target address
+ * @die_mem: a buffer for result DIE
+ *
+ * Search for a non-inlined function DIE with tail call at @addr. Stores the
+ * DIE to @die_mem and returns it if found. Returns NULL if failed.
+ */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem)
+{
+	struct __addr_die_search_param ad;
+	ad.addr = addr;
+	ad.die_mem = die_mem;
+	/* dwarf_getscopes can't find subprogram. */
+	if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0))
+		return NULL;
+	else
+		return die_mem;
+}
+
 /* die_find callback for non-inlined function search */
 static int __die_search_func_cb(Dwarf_Die *fn_die, void *data)
 {
diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h
index c154c0b80880..c42ec366f2a7 100644
--- a/tools/perf/util/dwarf-aux.h
+++ b/tools/perf/util/dwarf-aux.h
@@ -85,6 +85,10 @@ extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die,
 extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
 				    Dwarf_Die *die_mem);
 
+/* Search a non-inlined function with tail call at given address */
+Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr,
+				    Dwarf_Die *die_mem);
+
 /* Search the top inlined function including given address */
 extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr,
 					  Dwarf_Die *die_mem);
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index 590a24a7f555..c50da392e256 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -674,9 +674,15 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf)
 	/* If not a real subprogram, find a real one */
 	if (!die_is_func_def(sc_die)) {
 		if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
-			pr_warning("Failed to find probe point in any "
-				   "functions.\n");
-			return -ENOENT;
+			if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) {
+				pr_warning("Ignoring tail call from %s\n",
+						dwarf_diename(&pf->sp_die));
+				return 0;
+			} else {
+				pr_warning("Failed to find probe point in any "
+					   "functions.\n");
+				return -ENOENT;
+			}
 		}
 	} else
 		memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die));

From 4fd113b5ce803da0b8fa0494513bedfdf2feb483 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 14 May 2015 00:03:26 +0900
Subject: [PATCH 147/305] perf report: Fix some option handling on --stdio

There's a bug that perf report sometimes ignore some options on --stdio
output.  This bug is triggered only if a related config variable is set.
For example, let's assume we have a following config file.

  $ cat ~/.perfconfig
  [call-graph]
    print-type = graph
  [hist]
    percentage = absolute

Then, following perf config will not honor some options.

  $ perf record -ag sleep 1
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.199 MB perf.data (77 samples) ]

  $ perf report -g none --stdio
  # To display the perf.data header info, please use --header/--header-only options.
  #
  # Samples: 77  of event 'cycles'
  # Event count (approx.): 25425383
  #
  # Overhead  Command          Shared Object            Symbol
  # ........  ...............  .......................  ..............
  #
      16.34%  swapper          [kernel.vmlinux]         [k] intel_idle
                      |
                      ---intel_idle
                         cpuidle_enter_state
                         cpuidle_enter
                         cpu_startup_entry
   ...

With '-g none' option, it should not show callchains, but it still shows
callchains.  However it works as expected on --tui output.

Similarly, '--percentage relative' option is not work and still shows a
absolute percentage values.

Looking at the source, I found that those setting were overwritten by
config variables when setup_pager() called.  The setup_pager() is to
start a pager process so that it can manage long lines of output on the
stdio mode.  But as it calls the perf_config() after parsing arguments,
the settings were overwritten regardless of command line options.

The reason it calls perf_config() is to find the 'pager_program' which
might be set by a config variable, I guess.  However current perf code
does not provide the config variable for it, so it's just meaningless
IMHO.  Eliminating the call makes the option working as expected.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Taeung Song <treeze.taeung@gmail.com>
Link: http://lkml.kernel.org/r/1431529406-6762-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cache.h       | 1 -
 tools/perf/util/environment.c | 1 -
 tools/perf/util/pager.c       | 5 -----
 3 files changed, 7 deletions(-)

diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h
index fbcca21d66ab..c861373aaed3 100644
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@@ -30,7 +30,6 @@ extern const char *perf_config_dirname(const char *, const char *);
 
 /* pager.c */
 extern void setup_pager(void);
-extern const char *pager_program;
 extern int pager_in_use(void);
 extern int pager_use_color;
 
diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c
index 275b0ee345f5..7405123692f1 100644
--- a/tools/perf/util/environment.c
+++ b/tools/perf/util/environment.c
@@ -5,5 +5,4 @@
  */
 #include "cache.h"
 
-const char *pager_program;
 int pager_use_color = 1;
diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c
index 31ee02d4e988..53ef006a951c 100644
--- a/tools/perf/util/pager.c
+++ b/tools/perf/util/pager.c
@@ -50,11 +50,6 @@ void setup_pager(void)
 
 	if (!isatty(1))
 		return;
-	if (!pager) {
-		if (!pager_program)
-			perf_config(perf_default_config, NULL);
-		pager = pager_program;
-	}
 	if (!pager)
 		pager = getenv("PAGER");
 	if (!(pager || access("/usr/bin/pager", X_OK)))

From b390d850607346162467bf1805a365e07de7c079 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 May 2015 12:28:11 -0300
Subject: [PATCH 148/305] tools lib traceevent: Provide le16toh define for
 older systems

Where such macro is not present, so just copy its definition from
glibc's endian.h and define it if not already.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/n/tip-4j90i2na07ppidt0z6cbuxr7@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/plugin_cfg80211.c | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c
index 4592d8438318..ec57d0c1fbc2 100644
--- a/tools/lib/traceevent/plugin_cfg80211.c
+++ b/tools/lib/traceevent/plugin_cfg80211.c
@@ -4,6 +4,19 @@
 #include <endian.h>
 #include "event-parse.h"
 
+/*
+ * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5,
+ * Fedora6.
+ */
+#ifndef le16toh
+# if __BYTE_ORDER == __LITTLE_ENDIAN
+#  define le16toh(x) (x)
+# else
+#  define le16toh(x) __bswap_16 (x)
+# endif
+#endif
+
+
 static unsigned long long
 process___le16_to_cpup(struct trace_seq *s, unsigned long long *args)
 {

From c188e7acd24f132279a91f4fe1fb3b078e0329dd Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 14 May 2015 17:39:03 -0300
Subject: [PATCH 149/305] perf trace: Fix the build on older distros

Such as RHEL5, where CLOEXEC, NONBLOCK flags are not present, use a
ifdef+define approach instead to make it build on all distros.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vinson Lee <vlee@twitter.com>
Link: http://lkml.kernel.org/n/tip-pioazikk9d9oz5qdeor3eldu@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 96a2ebafc755..a05490d06374 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -16,7 +16,6 @@
 
 #include <libaudit.h>
 #include <stdlib.h>
-#include <sys/eventfd.h>
 #include <sys/mman.h>
 #include <linux/futex.h>
 
@@ -41,6 +40,34 @@
 # define EFD_SEMAPHORE		1
 #endif
 
+#ifndef EFD_NONBLOCK
+# define EFD_NONBLOCK		00004000
+#endif
+
+#ifndef EFD_CLOEXEC
+# define EFD_CLOEXEC		02000000
+#endif
+
+#ifndef O_CLOEXEC
+# define O_CLOEXEC		02000000
+#endif
+
+#ifndef SOCK_DCCP
+# define SOCK_DCCP		6
+#endif
+
+#ifndef SOCK_CLOEXEC
+# define SOCK_CLOEXEC		02000000
+#endif
+
+#ifndef SOCK_NONBLOCK
+# define SOCK_NONBLOCK		00004000
+#endif
+
+#ifndef MSG_CMSG_CLOEXEC
+# define MSG_CMSG_CLOEXEC	0x40000000
+#endif
+
 struct tp_field {
 	int offset;
 	union {

From 70923bd26c732ac2d9e248d80214df6dfd75f78c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 14 May 2015 12:31:48 -0300
Subject: [PATCH 150/305] perf tools: Make flex/bison calls honour V=1

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-dnc2ggwhffdpuvijwq4rkic9@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/Build | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index d552203aead0..6966d0743bf7 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -102,19 +102,19 @@ CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="B
 
 $(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c
 	$(call rule_mkdir)
-	@$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l
 
 $(OUTPUT)util/parse-events-bison.c: util/parse-events.y
 	$(call rule_mkdir)
-	@$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
+	$(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_
 
 $(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c
 	$(call rule_mkdir)
-	@$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
+	$(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l
 
 $(OUTPUT)util/pmu-bison.c: util/pmu.y
 	$(call rule_mkdir)
-	@$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
+	$(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_
 
 CFLAGS_parse-events-flex.o  += -w
 CFLAGS_pmu-flex.o           += -w

From 848cbd2562bb85953884be72b3bdbee0b2f32474 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 May 2015 15:20:44 -0300
Subject: [PATCH 151/305] perf tools: Use atomic.h for the map_groups refcount

Now that we have atomic.h, we should convert all of the existing
refcounts to use it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-onm5u3pioba1hqqhjs8on03e@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/thread-mg-share.c | 12 ++++++------
 tools/perf/util/map.c              |  4 ++--
 tools/perf/util/map.h              |  6 ++++--
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c
index c0ed56f7efc6..01fabb19d746 100644
--- a/tools/perf/tests/thread-mg-share.c
+++ b/tools/perf/tests/thread-mg-share.c
@@ -43,7 +43,7 @@ int test__thread_mg_share(void)
 			leader && t1 && t2 && t3 && other);
 
 	mg = leader->mg;
-	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 4);
+	TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4);
 
 	/* test the map groups pointer is shared */
 	TEST_ASSERT_VAL("map groups don't match", mg == t1->mg);
@@ -71,25 +71,25 @@ int test__thread_mg_share(void)
 	machine__remove_thread(machine, other_leader);
 
 	other_mg = other->mg;
-	TEST_ASSERT_EQUAL("wrong refcnt", other_mg->refcnt, 2);
+	TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2);
 
 	TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg);
 
 	/* release thread group */
 	thread__put(leader);
-	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 3);
+	TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3);
 
 	thread__put(t1);
-	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 2);
+	TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2);
 
 	thread__put(t2);
-	TEST_ASSERT_EQUAL("wrong refcnt", mg->refcnt, 1);
+	TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1);
 
 	thread__put(t3);
 
 	/* release other group  */
 	thread__put(other_leader);
-	TEST_ASSERT_EQUAL("wrong refcnt", other_mg->refcnt, 1);
+	TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1);
 
 	thread__put(other);
 
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index cd0e335008b4..2d20c5ff8653 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -426,7 +426,7 @@ void map_groups__init(struct map_groups *mg, struct machine *machine)
 		INIT_LIST_HEAD(&mg->removed_maps[i]);
 	}
 	mg->machine = machine;
-	mg->refcnt = 1;
+	atomic_set(&mg->refcnt, 1);
 }
 
 static void maps__delete(struct rb_root *maps)
@@ -494,7 +494,7 @@ void map_groups__delete(struct map_groups *mg)
 
 void map_groups__put(struct map_groups *mg)
 {
-	if (--mg->refcnt == 0)
+	if (mg && atomic_dec_and_test(&mg->refcnt))
 		map_groups__delete(mg);
 }
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 4e0c729841ab..7f39217d29bf 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -1,6 +1,7 @@
 #ifndef __PERF_MAP_H
 #define __PERF_MAP_H
 
+#include <linux/atomic.h>
 #include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
@@ -61,7 +62,7 @@ struct map_groups {
 	struct rb_root	 maps[MAP__NR_TYPES];
 	struct list_head removed_maps[MAP__NR_TYPES];
 	struct machine	 *machine;
-	int		 refcnt;
+	atomic_t	 refcnt;
 };
 
 struct map_groups *map_groups__new(struct machine *machine);
@@ -70,7 +71,8 @@ bool map_groups__empty(struct map_groups *mg);
 
 static inline struct map_groups *map_groups__get(struct map_groups *mg)
 {
-	++mg->refcnt;
+	if (mg)
+		atomic_inc(&mg->refcnt);
 	return mg;
 }
 

From 59a51c1dc9fbb3fb4af928b852d7b35df83edd74 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 May 2015 15:32:55 -0300
Subject: [PATCH 152/305] perf machine: Stop accessing atomic_t::counter
 directly

Use atomic_read(&counter) instead.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-k3hvfvpaut8wp02lzq27muhb@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 34bf89f7f4f3..daa55910ff28 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1311,7 +1311,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
 	if (machine->last_match == th)
 		machine->last_match = NULL;
 
-	BUG_ON(th->refcnt.counter == 0);
+	BUG_ON(atomic_read(&th->refcnt) == 0);
 	if (lock)
 		pthread_rwlock_wrlock(&machine->threads_lock);
 	rb_erase(&th->rb_node, &machine->threads);

From 7143849a5d6a5c623d81790d92f0033507c5b14f Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 May 2015 15:45:16 -0300
Subject: [PATCH 153/305] perf evlist: Use atomic.h for the perf_mmap refcount

Now that we have atomic.h, we should convert all of the existing
refcounts to use it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-qhpv2etncj3hfofgj1aitkyv@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 12 ++++++------
 tools/perf/util/evlist.h |  3 ++-
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 7ec1bf93ab28..dc1dc2c181ef 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -700,14 +700,14 @@ static bool perf_mmap__empty(struct perf_mmap *md)
 
 static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx)
 {
-	++evlist->mmap[idx].refcnt;
+	atomic_inc(&evlist->mmap[idx].refcnt);
 }
 
 static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx)
 {
-	BUG_ON(evlist->mmap[idx].refcnt == 0);
+	BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0);
 
-	if (--evlist->mmap[idx].refcnt == 0)
+	if (atomic_dec_and_test(&evlist->mmap[idx].refcnt))
 		__perf_evlist__munmap(evlist, idx);
 }
 
@@ -721,7 +721,7 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx)
 		perf_mmap__write_tail(md, old);
 	}
 
-	if (md->refcnt == 1 && perf_mmap__empty(md))
+	if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md))
 		perf_evlist__mmap_put(evlist, idx);
 }
 
@@ -758,7 +758,7 @@ static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx)
 	if (evlist->mmap[idx].base != NULL) {
 		munmap(evlist->mmap[idx].base, evlist->mmap_len);
 		evlist->mmap[idx].base = NULL;
-		evlist->mmap[idx].refcnt = 0;
+		atomic_set(&evlist->mmap[idx].refcnt, 0);
 	}
 	auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap);
 }
@@ -807,7 +807,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx,
 	 * evlist layer can't just drop it when filtering events in
 	 * perf_evlist__filter_pollfd().
 	 */
-	evlist->mmap[idx].refcnt = 2;
+	atomic_set(&evlist->mmap[idx].refcnt, 2);
 	evlist->mmap[idx].prev = 0;
 	evlist->mmap[idx].mask = mp->mask;
 	evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot,
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index c07b1a94a724..670e7f2114b1 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -1,6 +1,7 @@
 #ifndef __PERF_EVLIST_H
 #define __PERF_EVLIST_H 1
 
+#include <linux/atomic.h>
 #include <linux/list.h>
 #include <api/fd/array.h>
 #include <stdio.h>
@@ -27,7 +28,7 @@ struct record_opts;
 struct perf_mmap {
 	void		 *base;
 	int		 mask;
-	int		 refcnt;
+	atomic_t	 refcnt;
 	u64		 prev;
 	struct auxtrace_mmap auxtrace_mmap;
 	char		 event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8)));

From f812d3045c2385ac16237e68b156859c4005526e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 May 2015 16:12:20 -0300
Subject: [PATCH 154/305] perf cgroup: Use atomic.h for refcounting

Now that we have atomic.h, we should convert all of the existing
refcounts to use it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-t3v2uma5digcj2tpkrs3m84u@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/cgroup.c | 10 +++-------
 tools/perf/util/cgroup.h |  4 +++-
 2 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c
index 88f7be399432..32e12ecfe9c5 100644
--- a/tools/perf/util/cgroup.c
+++ b/tools/perf/util/cgroup.c
@@ -115,23 +115,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str)
 			goto found;
 		n++;
 	}
-	if (cgrp->refcnt == 0)
+	if (atomic_read(&cgrp->refcnt) == 0)
 		free(cgrp);
 
 	return -1;
 found:
-	cgrp->refcnt++;
+	atomic_inc(&cgrp->refcnt);
 	counter->cgrp = cgrp;
 	return 0;
 }
 
 void close_cgroup(struct cgroup_sel *cgrp)
 {
-	if (!cgrp)
-		return;
-
-	/* XXX: not reentrant */
-	if (--cgrp->refcnt == 0) {
+	if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) {
 		close(cgrp->fd);
 		zfree(&cgrp->name);
 		free(cgrp);
diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h
index 89acd6debdc5..b4b8cb42fe5e 100644
--- a/tools/perf/util/cgroup.h
+++ b/tools/perf/util/cgroup.h
@@ -1,12 +1,14 @@
 #ifndef __CGROUP_H__
 #define __CGROUP_H__
 
+#include <linux/atomic.h>
+
 struct option;
 
 struct cgroup_sel {
 	char *name;
 	int fd;
-	int refcnt;
+	atomic_t refcnt;
 };
 
 

From c1b9034db7c85428f96db4ecf77abbf02d96de71 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 15 May 2015 18:23:11 +0200
Subject: [PATCH 155/305] perf tools: Fix dwarf-aux.c compilation on i386

Replacing %lu format strings for Dwarf_Addr type with PRIu64 as it fits
for Dwarf_Addr (defined as uint64_t) type and works also on both 32/64
bits.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1431706991-15646-1-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dwarf-aux.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c
index eb47abdcf0ac..57f3ef41c2bc 100644
--- a/tools/perf/util/dwarf-aux.c
+++ b/tools/perf/util/dwarf-aux.c
@@ -994,11 +994,11 @@ static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die,
 		end -= entry;
 
 		if (first) {
-			strbuf_addf(buf, "@<%s+[%lu-%lu",
+			strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
 				name, start, end);
 			first = false;
 		} else {
-			strbuf_addf(buf, ",%lu-%lu",
+			strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
 				start, end);
 		}
 	}
@@ -1057,11 +1057,11 @@ int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf)
 		start -= entry;
 		end -= entry;
 		if (first) {
-			strbuf_addf(buf, "@<%s+[%lu-%lu",
+			strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64,
 				name, start, end);
 			first = false;
 		} else {
-			strbuf_addf(buf, ",%lu-%lu",
+			strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64,
 				start, end);
 		}
 	}

From 2f15bd8c6c6e80f15c899ea84b53fa607a2f08b2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 15 May 2015 17:54:28 +0200
Subject: [PATCH 156/305] perf tools: Fix "Command" sort_entry's cmp and
 collapse function

Currently the se_cmp and se_collapse use pointer comparison,
which is ok for for testing equality of strings. It's not ok
as comparing function for rbtree insertion, because it gives
different results based on current pointer values.

We saw test 32 (hists cumulation test) failing based on different
environment setup. Having all sort functions straightened fix the
test for us.

Reported-by: Jan Stancek <jstancek@redhat.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jan Stancek <jstancek@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 4593f36ecc4c..09d4696fd9a1 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -89,14 +89,14 @@ static int64_t
 sort__comm_cmp(struct hist_entry *left, struct hist_entry *right)
 {
 	/* Compare the addr that should be unique among comm */
-	return comm__str(right->comm) - comm__str(left->comm);
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
 }
 
 static int64_t
 sort__comm_collapse(struct hist_entry *left, struct hist_entry *right)
 {
 	/* Compare the addr that should be unique among comm */
-	return comm__str(right->comm) - comm__str(left->comm);
+	return strcmp(comm__str(right->comm), comm__str(left->comm));
 }
 
 static int64_t

From 75e4a2a6af15e956993913314ced2582b350a647 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 15 May 2015 12:14:44 +0000
Subject: [PATCH 157/305] perf probe: Load map before glob matching

Commit 4c859351226c920b227fec040a3b447f0d482af3 ("perf probe: Support
glob wildcards for function name") introduces a problem:

  # /root/perf probe kmem_cache_free
 Failed to find symbol kmem_cache_free in kernel
   Error: Failed to add events.

The reason is the replacement of map__for_each_symbol_by_name() (by
map__for_each_symbol()). Although their names are similar,
map__for_each_symbol doesn't call map__load() and dso__sort_by_name()
before searching. The missing of map__load() causes this problem because
it search symbol before load dso map.

This patch ensures map__load() is called before using
map__for_each_symbol().

After this patch:

 # /root/perf probe kmem_cache_free
  Added new event:
    probe:kmem_cache_free (on kmem_cache_free%return)

You can now use it in all perf tools, such as:

        perf record -e probe:kmem_cache_free -aR sleep 1

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1431692084-46287-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 2399dc4f6089..1faa1e67398b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2499,6 +2499,9 @@ static int find_probe_functions(struct map *map, char *name)
 	struct symbol *sym;
 	struct rb_node *tmp;
 
+	if (map__load(map, NULL) < 0)
+		return 0;
+
 	map__for_each_symbol(map, sym, tmp) {
 		if (strglobmatch(sym->name, name))
 			found++;

From 70ba6b8f975bcff56e180e2c7550d2302fcbf923 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 15 May 2015 07:50:57 +0000
Subject: [PATCH 158/305] tools include: add __aligned_u64 to types.h.

Following patches will introduce linux/bpf.h to a new libbpf library,
which requires definition of __aligned_u64. This patch add it to the
common types.h for tools.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pi3orama@163.com
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1431676290-1230-5-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/include/linux/types.h | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h
index 0bdeda66aae5..8ebf6278b2ef 100644
--- a/tools/include/linux/types.h
+++ b/tools/include/linux/types.h
@@ -64,6 +64,10 @@ typedef struct {
 	int counter;
 } atomic_t;
 
+#ifndef __aligned_u64
+# define __aligned_u64 __u64 __attribute__((aligned(8)))
+#endif
+
 struct list_head {
 	struct list_head *next, *prev;
 };

From 86066064e3e545338e9b48b49256d5e51213915a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 15 May 2015 17:29:56 -0300
Subject: [PATCH 159/305] perf tools: Elliminate alignment holes

perf_evsel:

Before:

	/* size: 320, cachelines: 5, members: 35 */
	/* sum members: 304, holes: 3, sum holes: 16 */

After:

	/* size: 304, cachelines: 5, members: 35 */
	/* last cacheline: 48 bytes */

perf_evlist:

Before:

	/* size: 2544, cachelines: 40, members: 17 */
	/* sum members: 2533, holes: 2, sum holes: 11 */
	/* last cacheline: 48 bytes */

After:

	/* size: 2536, cachelines: 40, members: 17 */
	/* sum members: 2533, holes: 1, sum holes: 3 */
	/* last cacheline: 40 bytes */

timechart:

Before:

	/* size: 288, cachelines: 5, members: 21 */
	/* sum members: 271, holes: 2, sum holes: 10 */
	/* padding: 7 */
	/* last cacheline: 32 bytes */

After:

	/* size: 272, cachelines: 5, members: 21 */
	/* sum members: 271, holes: 1, sum holes: 1 */
	/* last cacheline: 16 bytes */

thread:

Before:

	/* size: 112, cachelines: 2, members: 15 */
	/* sum members: 101, holes: 2, sum holes: 11 */
	/* last cacheline: 48 bytes */

After:

	/* size: 104, cachelines: 2, members: 15 */
	/* sum members: 101, holes: 1, sum holes: 3 */
	/* last cacheline: 40 bytes */

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-a543w7zjl9yyrg9nkf1teukp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-timechart.c | 4 ++--
 tools/perf/util/evlist.h       | 2 +-
 tools/perf/util/evsel.h        | 4 ++--
 tools/perf/util/thread.h       | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c
index 3b884e37ab8b..30e59620179d 100644
--- a/tools/perf/builtin-timechart.c
+++ b/tools/perf/builtin-timechart.c
@@ -61,13 +61,13 @@ struct timechart {
 				tasks_only,
 				with_backtrace,
 				topology;
+	bool			force;
 	/* IO related settings */
-	u64			io_events;
 	bool			io_only,
 				skip_eagain;
+	u64			io_events;
 	u64			min_time,
 				merge_dist;
-	bool			force;
 };
 
 struct per_pidcomm;
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 670e7f2114b1..955bf31b7dd3 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -40,6 +40,7 @@ struct perf_evlist {
 	int		 nr_entries;
 	int		 nr_groups;
 	int		 nr_mmaps;
+	bool		 overwrite;
 	size_t		 mmap_len;
 	int		 id_pos;
 	int		 is_pos;
@@ -48,7 +49,6 @@ struct perf_evlist {
 		int	cork_fd;
 		pid_t	pid;
 	} workload;
-	bool		 overwrite;
 	struct fdarray	 pollfd;
 	struct perf_mmap *mmap;
 	struct thread_map *threads;
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index e486151b0308..21ec08247d47 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -73,7 +73,6 @@ struct perf_evsel {
 	char			*name;
 	double			scale;
 	const char		*unit;
-	bool			snapshot;
 	struct event_format	*tp_format;
 	union {
 		void		*priv;
@@ -86,6 +85,7 @@ struct perf_evsel {
 	unsigned int		sample_size;
 	int			id_pos;
 	int			is_pos;
+	bool			snapshot;
 	bool 			supported;
 	bool 			needs_swap;
 	bool			no_aux_samples;
@@ -93,11 +93,11 @@ struct perf_evsel {
 	bool			system_wide;
 	bool			tracking;
 	bool			per_pkg;
-	unsigned long		*per_pkg_mask;
 	/* parse modifier helper */
 	int			exclude_GH;
 	int			nr_members;
 	int			sample_read;
+	unsigned long		*per_pkg_mask;
 	struct perf_evsel	*leader;
 	char			*group_name;
 };
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index f33c48cfdaa0..a0ac0317affb 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -25,9 +25,9 @@ struct thread {
 	atomic_t		refcnt;
 	char			shortname[3];
 	bool			comm_set;
+	int			comm_len;
 	bool			dead; /* if set thread has exited */
 	struct list_head	comm_list;
-	int			comm_len;
 	u64			db_id;
 
 	void			*priv;

From 0b1de0be1eac7b23e89cb43c17b02d38ead6b6c8 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 18 May 2015 09:30:17 +0900
Subject: [PATCH 160/305] perf tools: Add rm_rf() utility function

The rm_rf() function does same as the shell command 'rm -rf' which
removes all directory entries recursively.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1431909055-21442-3-git-send-email-namhyung@kernel.org
Link: http://lkml.kernel.org/r/20150130150256.GF6188@krava.brq.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/util.c | 43 ++++++++++++++++++++++++++++++++++++++++++
 tools/perf/util/util.h |  1 +
 2 files changed, 44 insertions(+)

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 4ee6d0d4c993..6104afb7e1ef 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -72,6 +72,49 @@ int mkdir_p(char *path, mode_t mode)
 	return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0;
 }
 
+int rm_rf(char *path)
+{
+	DIR *dir;
+	int ret = 0;
+	struct dirent *d;
+	char namebuf[PATH_MAX];
+
+	dir = opendir(path);
+	if (dir == NULL)
+		return 0;
+
+	while ((d = readdir(dir)) != NULL && !ret) {
+		struct stat statbuf;
+
+		if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, ".."))
+			continue;
+
+		scnprintf(namebuf, sizeof(namebuf), "%s/%s",
+			  path, d->d_name);
+
+		ret = stat(namebuf, &statbuf);
+		if (ret < 0) {
+			pr_debug("stat failed: %s\n", namebuf);
+			break;
+		}
+
+		if (S_ISREG(statbuf.st_mode))
+			ret = unlink(namebuf);
+		else if (S_ISDIR(statbuf.st_mode))
+			ret = rm_rf(namebuf);
+		else {
+			pr_debug("unknown file: %s\n", namebuf);
+			ret = -1;
+		}
+	}
+	closedir(dir);
+
+	if (ret < 0)
+		return ret;
+
+	return rmdir(path);
+}
+
 static int slow_copyfile(const char *from, const char *to, mode_t mode)
 {
 	int err = -1;
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index 3601ffd3d8b4..c4fe38ac8c00 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -249,6 +249,7 @@ static inline int sane_case(int x, int high)
 }
 
 int mkdir_p(char *path, mode_t mode);
+int rm_rf(char *path);
 int copyfile(const char *from, const char *to);
 int copyfile_mode(const char *from, const char *to, mode_t mode);
 

From 9c9f5a2f1944e8b6bf2b618d04b31e1c1760637e Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 18 May 2015 09:30:18 +0900
Subject: [PATCH 161/305] perf tools: Introduce copyfile_offset() function

The copyfile_offset() function is to copy source data from given offset
to a destination file with an offset.  It'll be used to build an indexed
data file.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20150304145824.GD7519@krava.brq.redhat.com
Link: http://lkml.kernel.org/r/1431909055-21442-4-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/util.c | 38 +++++++++++++++++++++++++++++---------
 tools/perf/util/util.h |  1 +
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 6104afb7e1ef..0c264bc685ac 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -145,11 +145,38 @@ out:
 	return err;
 }
 
+int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size)
+{
+	void *ptr;
+	loff_t pgoff;
+
+	pgoff = off_in & ~(page_size - 1);
+	off_in -= pgoff;
+
+	ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff);
+	if (ptr == MAP_FAILED)
+		return -1;
+
+	while (size) {
+		ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out);
+		if (ret < 0 && errno == EINTR)
+			continue;
+		if (ret <= 0)
+			break;
+
+		size -= ret;
+		off_in += ret;
+		off_out -= ret;
+	}
+	munmap(ptr, off_in + size);
+
+	return size ? -1 : 0;
+}
+
 int copyfile_mode(const char *from, const char *to, mode_t mode)
 {
 	int fromfd, tofd;
 	struct stat st;
-	void *addr;
 	int err = -1;
 
 	if (stat(from, &st))
@@ -166,15 +193,8 @@ int copyfile_mode(const char *from, const char *to, mode_t mode)
 	if (tofd < 0)
 		goto out_close_from;
 
-	addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0);
-	if (addr == MAP_FAILED)
-		goto out_close_to;
+	err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
 
-	if (write(tofd, addr, st.st_size) == st.st_size)
-		err = 0;
-
-	munmap(addr, st.st_size);
-out_close_to:
 	close(tofd);
 	if (err)
 		unlink(to);
diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h
index c4fe38ac8c00..8bce58b47a82 100644
--- a/tools/perf/util/util.h
+++ b/tools/perf/util/util.h
@@ -252,6 +252,7 @@ int mkdir_p(char *path, mode_t mode);
 int rm_rf(char *path);
 int copyfile(const char *from, const char *to);
 int copyfile_mode(const char *from, const char *to, mode_t mode);
+int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size);
 
 s64 perf_atoll(const char *str);
 char **argv_split(const char *str, int *argcp);

From 4a936edc317005e8cd2b501e7865721bec104544 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 18 May 2015 09:30:40 +0900
Subject: [PATCH 162/305] perf symbols: Protect dso symbol loading using a
 mutex

Add mutex to protect it from concurrent dso__load().

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1431909055-21442-26-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c    |  2 ++
 tools/perf/util/dso.h    |  1 +
 tools/perf/util/symbol.c | 34 ++++++++++++++++++++++++----------
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 13d9ae0bd15c..482d6024ef13 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -936,6 +936,7 @@ struct dso *dso__new(const char *name)
 		RB_CLEAR_NODE(&dso->rb_node);
 		INIT_LIST_HEAD(&dso->node);
 		INIT_LIST_HEAD(&dso->data.open_entry);
+		pthread_mutex_init(&dso->lock, NULL);
 	}
 
 	return dso;
@@ -966,6 +967,7 @@ void dso__delete(struct dso *dso)
 	dso_cache__free(&dso->data.cache);
 	dso__free_a2l(dso);
 	zfree(&dso->symsrc_filename);
+	pthread_mutex_destroy(&dso->lock);
 	free(dso);
 }
 
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 3d79c749934c..b26ec3ab1336 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -129,6 +129,7 @@ struct dsos {
 struct auxtrace_cache;
 
 struct dso {
+	pthread_mutex_t	 lock;
 	struct list_head node;
 	struct rb_node	 rb_node;	/* rbtree node sorted by long name */
 	struct rb_root	 symbols[MAP__NR_TYPES];
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 45ba48a7acb3..9ef8b8946b11 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1383,12 +1383,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 	struct symsrc *syms_ss = NULL, *runtime_ss = NULL;
 	bool kmod;
 
-	dso__set_loaded(dso, map->type);
+	pthread_mutex_lock(&dso->lock);
 
-	if (dso->kernel == DSO_TYPE_KERNEL)
-		return dso__load_kernel_sym(dso, map, filter);
-	else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
-		return dso__load_guest_kernel_sym(dso, map, filter);
+	/* check again under the dso->lock */
+	if (dso__loaded(dso, map->type)) {
+		ret = 1;
+		goto out;
+	}
+
+	if (dso->kernel) {
+		if (dso->kernel == DSO_TYPE_KERNEL)
+			ret = dso__load_kernel_sym(dso, map, filter);
+		else if (dso->kernel == DSO_TYPE_GUEST_KERNEL)
+			ret = dso__load_guest_kernel_sym(dso, map, filter);
+
+		goto out;
+	}
 
 	if (map->groups && map->groups->machine)
 		machine = map->groups->machine;
@@ -1401,18 +1411,18 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 		struct stat st;
 
 		if (lstat(dso->name, &st) < 0)
-			return -1;
+			goto out;
 
 		if (st.st_uid && (st.st_uid != geteuid())) {
 			pr_warning("File %s not owned by current user or root, "
 				"ignoring it.\n", dso->name);
-			return -1;
+			goto out;
 		}
 
 		ret = dso__load_perf_map(dso, map, filter);
 		dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT :
 					     DSO_BINARY_TYPE__NOT_FOUND;
-		return ret;
+		goto out;
 	}
 
 	if (machine)
@@ -1420,7 +1430,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 
 	name = malloc(PATH_MAX);
 	if (!name)
-		return -1;
+		goto out;
 
 	kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE ||
 		dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP ||
@@ -1501,7 +1511,11 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter)
 out_free:
 	free(name);
 	if (ret < 0 && strstr(dso->name, " (deleted)") != NULL)
-		return 0;
+		ret = 0;
+out:
+	dso__set_loaded(dso, map->type);
+	pthread_mutex_unlock(&dso->lock);
+
 	return ret;
 }
 

From 8e67b7258e582998ab635bdc3c884d7a8077af5b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 18 May 2015 09:30:41 +0900
Subject: [PATCH 163/305] perf symbols: Protect dso cache tree using dso->lock

The dso cache is accessed during dwarf callchain unwind and it might be
processed concurrently.  Protect it under dso->lock.

Note that it doesn't protect dso_cache__find().  I think it's safe to
access to the cache tree without the lock since we don't delete nodes.

It it missed an existing node due to rotation, it'll find it during
dso_cache__insert() anyway.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1431909055-21442-27-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c | 34 +++++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 482d6024ef13..666e1db44541 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -495,10 +495,12 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
 }
 
 static void
-dso_cache__free(struct rb_root *root)
+dso_cache__free(struct dso *dso)
 {
+	struct rb_root *root = &dso->data.cache;
 	struct rb_node *next = rb_first(root);
 
+	pthread_mutex_lock(&dso->lock);
 	while (next) {
 		struct dso_cache *cache;
 
@@ -507,10 +509,12 @@ dso_cache__free(struct rb_root *root)
 		rb_erase(&cache->rb_node, root);
 		free(cache);
 	}
+	pthread_mutex_unlock(&dso->lock);
 }
 
-static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset)
+static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset)
 {
+	const struct rb_root *root = &dso->data.cache;
 	struct rb_node * const *p = &root->rb_node;
 	const struct rb_node *parent = NULL;
 	struct dso_cache *cache;
@@ -529,17 +533,20 @@ static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset)
 		else
 			return cache;
 	}
+
 	return NULL;
 }
 
-static void
-dso_cache__insert(struct rb_root *root, struct dso_cache *new)
+static struct dso_cache *
+dso_cache__insert(struct dso *dso, struct dso_cache *new)
 {
+	struct rb_root *root = &dso->data.cache;
 	struct rb_node **p = &root->rb_node;
 	struct rb_node *parent = NULL;
 	struct dso_cache *cache;
 	u64 offset = new->offset;
 
+	pthread_mutex_lock(&dso->lock);
 	while (*p != NULL) {
 		u64 end;
 
@@ -551,10 +558,17 @@ dso_cache__insert(struct rb_root *root, struct dso_cache *new)
 			p = &(*p)->rb_left;
 		else if (offset >= end)
 			p = &(*p)->rb_right;
+		else
+			goto out;
 	}
 
 	rb_link_node(&new->rb_node, parent, p);
 	rb_insert_color(&new->rb_node, root);
+
+	cache = NULL;
+out:
+	pthread_mutex_unlock(&dso->lock);
+	return cache;
 }
 
 static ssize_t
@@ -572,6 +586,7 @@ static ssize_t
 dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 {
 	struct dso_cache *cache;
+	struct dso_cache *old;
 	ssize_t ret;
 
 	do {
@@ -591,7 +606,12 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 
 		cache->offset = cache_offset;
 		cache->size   = ret;
-		dso_cache__insert(&dso->data.cache, cache);
+		old = dso_cache__insert(dso, cache);
+		if (old) {
+			/* we lose the race */
+			free(cache);
+			cache = old;
+		}
 
 		ret = dso_cache__memcpy(cache, offset, data, size);
 
@@ -608,7 +628,7 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset,
 {
 	struct dso_cache *cache;
 
-	cache = dso_cache__find(&dso->data.cache, offset);
+	cache = dso_cache__find(dso, offset);
 	if (cache)
 		return dso_cache__memcpy(cache, offset, data, size);
 	else
@@ -964,7 +984,7 @@ void dso__delete(struct dso *dso)
 
 	dso__data_close(dso);
 	auxtrace_cache__free(dso->auxtrace_cache);
-	dso_cache__free(&dso->data.cache);
+	dso_cache__free(dso);
 	dso__free_a2l(dso);
 	zfree(&dso->symsrc_filename);
 	pthread_mutex_destroy(&dso->lock);

From 33bdedcea2d77231fe46b1204cf6fc3a7d7c96c9 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Mon, 18 May 2015 09:30:42 +0900
Subject: [PATCH 164/305] perf tools: Protect dso cache fd with a mutex

When dso cache is accessed in multi-thread environment, it's possible to
close other dso->data.fd during operation due to open file limit.
Protect the file descriptors using a separate mutex.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1431909055-21442-28-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c | 98 +++++++++++++++++++++++++++++++------------
 1 file changed, 72 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 666e1db44541..1b96c8d18435 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -265,6 +265,7 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
  */
 static LIST_HEAD(dso__data_open);
 static long dso__data_open_cnt;
+static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER;
 
 static void dso__list_add(struct dso *dso)
 {
@@ -434,7 +435,9 @@ static void check_data_close(void)
  */
 void dso__data_close(struct dso *dso)
 {
+	pthread_mutex_lock(&dso__data_open_lock);
 	close_dso(dso);
+	pthread_mutex_unlock(&dso__data_open_lock);
 }
 
 /**
@@ -457,6 +460,8 @@ int dso__data_fd(struct dso *dso, struct machine *machine)
 	if (dso->data.status == DSO_DATA_STATUS_ERROR)
 		return -1;
 
+	pthread_mutex_lock(&dso__data_open_lock);
+
 	if (dso->data.fd >= 0)
 		goto out;
 
@@ -479,6 +484,7 @@ out:
 	else
 		dso->data.status = DSO_DATA_STATUS_ERROR;
 
+	pthread_mutex_unlock(&dso__data_open_lock);
 	return dso->data.fd;
 }
 
@@ -583,7 +589,8 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset,
 }
 
 static ssize_t
-dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
+dso_cache__read(struct dso *dso, struct machine *machine,
+		u64 offset, u8 *data, ssize_t size)
 {
 	struct dso_cache *cache;
 	struct dso_cache *old;
@@ -592,11 +599,24 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 	do {
 		u64 cache_offset;
 
-		ret = -ENOMEM;
-
 		cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE);
 		if (!cache)
-			break;
+			return -ENOMEM;
+
+		pthread_mutex_lock(&dso__data_open_lock);
+
+		/*
+		 * dso->data.fd might be closed if other thread opened another
+		 * file (dso) due to open file limit (RLIMIT_NOFILE).
+		 */
+		if (dso->data.fd < 0) {
+			dso->data.fd = open_dso(dso, machine);
+			if (dso->data.fd < 0) {
+				ret = -errno;
+				dso->data.status = DSO_DATA_STATUS_ERROR;
+				break;
+			}
+		}
 
 		cache_offset = offset & DSO__DATA_CACHE_MASK;
 
@@ -606,6 +626,11 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 
 		cache->offset = cache_offset;
 		cache->size   = ret;
+	} while (0);
+
+	pthread_mutex_unlock(&dso__data_open_lock);
+
+	if (ret > 0) {
 		old = dso_cache__insert(dso, cache);
 		if (old) {
 			/* we lose the race */
@@ -614,8 +639,7 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 		}
 
 		ret = dso_cache__memcpy(cache, offset, data, size);
-
-	} while (0);
+	}
 
 	if (ret <= 0)
 		free(cache);
@@ -623,8 +647,8 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 	return ret;
 }
 
-static ssize_t dso_cache_read(struct dso *dso, u64 offset,
-			      u8 *data, ssize_t size)
+static ssize_t dso_cache_read(struct dso *dso, struct machine *machine,
+			      u64 offset, u8 *data, ssize_t size)
 {
 	struct dso_cache *cache;
 
@@ -632,7 +656,7 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset,
 	if (cache)
 		return dso_cache__memcpy(cache, offset, data, size);
 	else
-		return dso_cache__read(dso, offset, data, size);
+		return dso_cache__read(dso, machine, offset, data, size);
 }
 
 /*
@@ -640,7 +664,8 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset,
  * in the rb_tree. Any read to already cached data is served
  * by cached data.
  */
-static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
+static ssize_t cached_read(struct dso *dso, struct machine *machine,
+			   u64 offset, u8 *data, ssize_t size)
 {
 	ssize_t r = 0;
 	u8 *p = data;
@@ -648,7 +673,7 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 	do {
 		ssize_t ret;
 
-		ret = dso_cache_read(dso, offset, p, size);
+		ret = dso_cache_read(dso, machine, offset, p, size);
 		if (ret < 0)
 			return ret;
 
@@ -668,21 +693,42 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size)
 	return r;
 }
 
-static int data_file_size(struct dso *dso)
+static int data_file_size(struct dso *dso, struct machine *machine)
 {
+	int ret = 0;
 	struct stat st;
 	char sbuf[STRERR_BUFSIZE];
 
-	if (!dso->data.file_size) {
-		if (fstat(dso->data.fd, &st)) {
-			pr_err("dso mmap failed, fstat: %s\n",
-				strerror_r(errno, sbuf, sizeof(sbuf)));
-			return -1;
+	if (dso->data.file_size)
+		return 0;
+
+	pthread_mutex_lock(&dso__data_open_lock);
+
+	/*
+	 * dso->data.fd might be closed if other thread opened another
+	 * file (dso) due to open file limit (RLIMIT_NOFILE).
+	 */
+	if (dso->data.fd < 0) {
+		dso->data.fd = open_dso(dso, machine);
+		if (dso->data.fd < 0) {
+			ret = -errno;
+			dso->data.status = DSO_DATA_STATUS_ERROR;
+			goto out;
 		}
-		dso->data.file_size = st.st_size;
 	}
 
-	return 0;
+	if (fstat(dso->data.fd, &st) < 0) {
+		ret = -errno;
+		pr_err("dso cache fstat failed: %s\n",
+		       strerror_r(errno, sbuf, sizeof(sbuf)));
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		goto out;
+	}
+	dso->data.file_size = st.st_size;
+
+out:
+	pthread_mutex_unlock(&dso__data_open_lock);
+	return ret;
 }
 
 /**
@@ -700,17 +746,17 @@ off_t dso__data_size(struct dso *dso, struct machine *machine)
 	if (fd < 0)
 		return fd;
 
-	if (data_file_size(dso))
+	if (data_file_size(dso, machine))
 		return -1;
 
 	/* For now just estimate dso data size is close to file size */
 	return dso->data.file_size;
 }
 
-static ssize_t data_read_offset(struct dso *dso, u64 offset,
-				u8 *data, ssize_t size)
+static ssize_t data_read_offset(struct dso *dso, struct machine *machine,
+				u64 offset, u8 *data, ssize_t size)
 {
-	if (data_file_size(dso))
+	if (data_file_size(dso, machine))
 		return -1;
 
 	/* Check the offset sanity. */
@@ -720,7 +766,7 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
 	if (offset + size < offset)
 		return -1;
 
-	return cached_read(dso, offset, data, size);
+	return cached_read(dso, machine, offset, data, size);
 }
 
 /**
@@ -737,10 +783,10 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset,
 ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine,
 			      u64 offset, u8 *data, ssize_t size)
 {
-	if (dso__data_fd(dso, machine) < 0)
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
 		return -1;
 
-	return data_read_offset(dso, offset, data, size);
+	return data_read_offset(dso, machine, offset, data, size);
 }
 
 /**

From c4f035473d93c1594d8225f6dd97332317820801 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Sun, 17 May 2015 10:56:27 +0000
Subject: [PATCH 165/305] perf tools: Set vmlinux_path__nr_entries to 0 in
 vmlinux_path__exit

Original vmlinux_path__exit() doesn't revert vmlinux_path__nr_entries to
its original state. After the while loop vmlinux_path__nr_entries
becomes -1 instead of 0.

This makes a problem that, if runs twice, during the second run
vmlinux_path__init() will set vmlinux_path[-1] to strdup("vmlinux"),
corrupts random memory.

This patch reset vmlinux_path__nr_entries to 0 after the while loop.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Cc: Alexei Starovoitov <ast@plumgrid.com
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pi3orama@163.com
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1431860222-61636-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/symbol.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 9ef8b8946b11..82a31fd0fcf5 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1819,6 +1819,7 @@ static void vmlinux_path__exit(void)
 {
 	while (--vmlinux_path__nr_entries >= 0)
 		zfree(&vmlinux_path[vmlinux_path__nr_entries]);
+	vmlinux_path__nr_entries = 0;
 
 	zfree(&vmlinux_path);
 }

From bb53e176fed9b6c9321904558b4e605b4770e454 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Sun, 17 May 2015 10:56:28 +0000
Subject: [PATCH 166/305] tools lib traceevent: Install libtraceevent.a into
 libdir

Before this patch, 'make install' installs libraries into bindir:

  $ make install DESTDIR=./tree
   INSTALL  trace_plugins
   INSTALL  libtraceevent.a
   INSTALL  libtraceevent.so
  $ find ./tree
   ./tree/
   ./tree/usr
   ./tree/usr/local
   ./tree/usr/local/bin
   ./tree/usr/local/bin/libtraceevent.a
   ./tree/usr/local/bin/libtraceevent.so
   ...

/usr/local/lib( or lib64) should be a better place.

This patch replaces 'bin' with libdir. For __LP64__ building, libraries
are installed to /usr/local/lib64. For other building, to
/usr/local/lib instead.

After applying this patch:

  $ make install DESTDIR=./tree
   INSTALL  trace_plugins
   INSTALL  libtraceevent.a
   INSTALL  libtraceevent.so
  $ find ./tree
   ./tree
   ./tree/usr
   ./tree/usr/local
   ./tree/usr/local/lib64
   ./tree/usr/local/lib64/libtraceevent.a
   ./tree/usr/local/lib64/traceevent
   ./tree/usr/local/lib64/traceevent/plugins
   ./tree/usr/local/lib64/traceevent/plugins/plugin_mac80211.so
   ./tree/usr/local/lib64/traceevent/plugins/plugin_hrtimer.so
   ...
   ./tree/usr/local/lib64/libtraceevent.so

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pi3orama@163.com
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1431860222-61636-4-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/Makefile | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index d410da335e3d..84640394ebf9 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -34,9 +34,15 @@ INSTALL = install
 DESTDIR ?=
 DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))'
 
+LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1)
+ifeq ($(LP64), 1)
+  libdir_relative = lib64
+else
+  libdir_relative = lib
+endif
+
 prefix ?= /usr/local
-bindir_relative = bin
-bindir = $(prefix)/$(bindir_relative)
+libdir = $(prefix)/$(libdir_relative)
 man_dir = $(prefix)/share/man
 man_dir_SQ = '$(subst ','\'',$(man_dir))'
 
@@ -58,7 +64,7 @@ ifeq ($(prefix),$(HOME))
 override plugin_dir = $(HOME)/.traceevent/plugins
 set_plugin_dir := 0
 else
-override plugin_dir = $(prefix)/lib/traceevent/plugins
+override plugin_dir = $(libdir)/traceevent/plugins
 endif
 endif
 
@@ -85,11 +91,11 @@ srctree := $(patsubst %/,%,$(dir $(srctree)))
 #$(info Determined 'srctree' to be $(srctree))
 endif
 
-export prefix bindir src obj
+export prefix libdir src obj
 
 # Shell quotes
-bindir_SQ = $(subst ','\'',$(bindir))
-bindir_relative_SQ = $(subst ','\'',$(bindir_relative))
+libdir_SQ = $(subst ','\'',$(libdir))
+libdir_relative_SQ = $(subst ','\'',$(libdir_relative))
 plugin_dir_SQ = $(subst ','\'',$(plugin_dir))
 
 LIB_FILE = libtraceevent.a libtraceevent.so
@@ -240,7 +246,7 @@ endef
 
 install_lib: all_cmd install_plugins
 	$(call QUIET_INSTALL, $(LIB_FILE)) \
-		$(call do_install,$(LIB_FILE),$(bindir_SQ))
+		$(call do_install,$(LIB_FILE),$(libdir_SQ))
 
 install_plugins: $(PLUGINS)
 	$(call QUIET_INSTALL, trace_plugins) \

From 8135c8c750cf018cd43bf955117529467ba178db Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Sun, 17 May 2015 10:56:29 +0000
Subject: [PATCH 167/305] tools build: Change FEATURE_TESTS and FEATURE_DISPLAY
 to weak binding

Replace strong binding of FEATURE_TESTS and FEATURE_DISPLAY by weak
binding. This patch enables other makefiles which include
tools/build/Makefile.feature enable only limited feathres to check.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: pi3orama@163.com
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1431860222-61636-5-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/Makefile.feature | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 3a0b0ca2a28c..2975632d51e2 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -27,7 +27,7 @@ endef
 #   the rule that uses them - an example for that is the 'bionic'
 #   feature check. ]
 #
-FEATURE_TESTS =			\
+FEATURE_TESTS ?=			\
 	backtrace			\
 	dwarf				\
 	fortify-source			\
@@ -53,7 +53,7 @@ FEATURE_TESTS =			\
 	zlib				\
 	lzma
 
-FEATURE_DISPLAY =			\
+FEATURE_DISPLAY ?=			\
 	dwarf				\
 	glibc				\
 	gtk2				\

From 2d8e405acd787f4b975f73e0f8d9804b272c00f0 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 18 May 2015 12:24:41 -0300
Subject: [PATCH 168/305] perf bench numa: Share sched_getcpu() __weak def with
 cloexec.c

We really should move the sched_getcpu() to some more suitable place,
but this one-liner fixes this build problem on ancient distros like
RHEL5.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Vinson Lee <vlee@twitter.com>
Link: http://lkml.kernel.org/n/tip-5yqg4p11f9uii6yremz3r35v@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/bench/numa.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
index e2415f40343a..870b7e665a20 100644
--- a/tools/perf/bench/numa.c
+++ b/tools/perf/bench/numa.c
@@ -8,6 +8,7 @@
 #include "../builtin.h"
 #include "../util/util.h"
 #include "../util/parse-options.h"
+#include "../util/cloexec.h"
 
 #include "bench.h"
 

From f4d9757ca6f5a2db6919a5b1ab86b8afa16773d0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 00:00:50 +0000
Subject: [PATCH 169/305] perf/x86/intel/cqm: Document PQR MSR abuse

The CQM code acts like it owns the PQR MSR completely. That's not true
because only the lower 10 bits are used for CQM. The upper 32 bits are
used for the 'CLass Of Service ID' (CLOSID). Document the abuse. Will be
fixed in a later patch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/20150518235149.823214798@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index e4d1b8b738fa..572582e2143e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -978,7 +978,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 		WARN_ON_ONCE(state->rmid);
 
 	state->rmid = rmid;
-	wrmsrl(MSR_IA32_PQR_ASSOC, state->rmid);
+	/*
+	 * This is actually wrong, as the upper 32 bit MSR contain the
+	 * closid which is used for configuring the Cache Allocation
+	 * Technology component.
+	 */
+	wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0);
 
 	raw_spin_unlock_irqrestore(&state->lock, flags);
 }
@@ -998,7 +1003,13 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)
 
 	if (!--state->cnt) {
 		state->rmid = 0;
-		wrmsrl(MSR_IA32_PQR_ASSOC, 0);
+		/*
+		 * This is actually wrong, as the upper 32 bit of the
+		 * MSR contain the closid which is used for
+		 * configuring the Cache Allocation Technology
+		 * component.
+		 */
+		wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
 	} else {
 		WARN_ON_ONCE(!state->rmid);
 	}

From b3df4ec4424f27e55d754cfe586195fecca1c4e4 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 00:00:51 +0000
Subject: [PATCH 170/305] perf/x86/intel/cqm: Use proper data types

'int' is really not a proper data type for an MSR. Use u32 to make it
clear that we are dealing with a 32-bit unsigned hardware value.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/20150518235149.919350144@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 4 ++--
 include/linux/perf_event.h                 | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 572582e2143e..3e9a7fbfce58 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -18,7 +18,7 @@ static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
 struct intel_cqm_state {
 	raw_spinlock_t		lock;
-	int			rmid;
+	u32			rmid;
 	int			cnt;
 };
 
@@ -962,7 +962,7 @@ out:
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
 	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
-	unsigned int rmid = event->hw.cqm_rmid;
+	u32 rmid = event->hw.cqm_rmid;
 	unsigned long flags;
 
 	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 248f7829ce41..06580028cee6 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -120,7 +120,7 @@ struct hw_perf_event {
 		};
 		struct { /* intel_cqm */
 			int			cqm_state;
-			int			cqm_rmid;
+			u32			cqm_rmid;
 			struct list_head	cqm_events_entry;
 			struct list_head	cqm_groups_entry;
 			struct list_head	cqm_group_entry;

From 9e7eaac95af6c1aecaf558b8c7a1757d5f2d2ad7 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 00:00:53 +0000
Subject: [PATCH 171/305] perf/x86/intel/cqm: Remove pointless spinlock from
 state cache

'struct intel_cqm_state' is a strict per CPU cache of the rmid and the
usage counter. It can never be modified from a remote CPU.

The three functions which modify the content: intel_cqm_event[start|stop|del]
(del maps to stop) are called from the perf core with interrupts disabled
which is enough protection for the per CPU state values.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/20150518235150.001006529@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 17 ++++++-----------
 1 file changed, 6 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 3e9a7fbfce58..63391f860175 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -17,11 +17,16 @@ static unsigned int cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
 struct intel_cqm_state {
-	raw_spinlock_t		lock;
 	u32			rmid;
 	int			cnt;
 };
 
+/*
+ * The cached intel_cqm_state is strictly per CPU and can never be
+ * updated from a remote CPU. Both functions which modify the state
+ * (intel_cqm_event_start and intel_cqm_event_stop) are called with
+ * interrupts disabled, which is sufficient for the protection.
+ */
 static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
 
 /*
@@ -963,15 +968,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
 	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
 	u32 rmid = event->hw.cqm_rmid;
-	unsigned long flags;
 
 	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
 		return;
 
 	event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-	raw_spin_lock_irqsave(&state->lock, flags);
-
 	if (state->cnt++)
 		WARN_ON_ONCE(state->rmid != rmid);
 	else
@@ -984,21 +986,17 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 	 * Technology component.
 	 */
 	wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0);
-
-	raw_spin_unlock_irqrestore(&state->lock, flags);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
 	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
-	unsigned long flags;
 
 	if (event->hw.cqm_state & PERF_HES_STOPPED)
 		return;
 
 	event->hw.cqm_state |= PERF_HES_STOPPED;
 
-	raw_spin_lock_irqsave(&state->lock, flags);
 	intel_cqm_event_read(event);
 
 	if (!--state->cnt) {
@@ -1013,8 +1011,6 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)
 	} else {
 		WARN_ON_ONCE(!state->rmid);
 	}
-
-	raw_spin_unlock_irqrestore(&state->lock, flags);
 }
 
 static int intel_cqm_event_add(struct perf_event *event, int mode)
@@ -1257,7 +1253,6 @@ static void intel_cqm_cpu_prepare(unsigned int cpu)
 	struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
-	raw_spin_lock_init(&state->lock);
 	state->rmid = 0;
 	state->cnt  = 0;
 

From 0bac237845e203dd1439cfc571b1baf1b2274b3b Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 00:00:55 +0000
Subject: [PATCH 172/305] perf/x86/intel/cqm: Avoid pointless MSR write

If the usage counter is non-zero there is no point to update the rmid
in the PQR MSR.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/20150518235150.080844281@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 63391f860175..2ce69c0953ab 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -974,10 +974,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 
 	event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-	if (state->cnt++)
-		WARN_ON_ONCE(state->rmid != rmid);
-	else
+	if (state->cnt++) {
+		if (!WARN_ON_ONCE(state->rmid != rmid))
+			return;
+	} else {
 		WARN_ON_ONCE(state->rmid);
+	}
 
 	state->rmid = rmid;
 	/*

From 43d0c2f6dcd07ffc0de658a7fbeeb63c806e9caa Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 00:00:56 +0000
Subject: [PATCH 173/305] perf/x86/intel/cqm: Remove useless wrapper function

intel_cqm_event_del() is a 1:1 wrapper for intel_cqm_event_stop().
Remove the useless indirection.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/20150518235150.159779847@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 2ce69c0953ab..8241b64d34c4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -1033,11 +1033,6 @@ static int intel_cqm_event_add(struct perf_event *event, int mode)
 	return 0;
 }
 
-static void intel_cqm_event_del(struct perf_event *event, int mode)
-{
-	intel_cqm_event_stop(event, mode);
-}
-
 static void intel_cqm_event_destroy(struct perf_event *event)
 {
 	struct perf_event *group_other = NULL;
@@ -1230,7 +1225,7 @@ static struct pmu intel_cqm_pmu = {
 	.task_ctx_nr	     = perf_sw_context,
 	.event_init	     = intel_cqm_event_init,
 	.add		     = intel_cqm_event_add,
-	.del		     = intel_cqm_event_del,
+	.del		     = intel_cqm_event_stop,
 	.start		     = intel_cqm_event_start,
 	.stop		     = intel_cqm_event_stop,
 	.read		     = intel_cqm_event_read,

From bf926731e1585ccad029ca2fad1444fee082b78d Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 May 2015 00:00:58 +0000
Subject: [PATCH 174/305] perf/x86/intel/cqm: Add storage for 'closid' and
 clean up 'struct intel_pqr_state'

'closid' (CLass Of Service ID) is used for the Class based Cache
Allocation Technology (CAT). Add explicit storage to the per cpu cache
for it, so it can be used later with the CAT support (requires to move
the per cpu data).

While at it:

 - Rename the structure to intel_pqr_state which reflects the actual
   purpose of the struct: cache values which go into the PQR MSR

 - Rename 'cnt' to rmid_usecnt which reflects the actual purpose of
   the counter.

 - Document the structure and the struct members.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/20150518235150.240899319@linutronix.de
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 50 ++++++++++++----------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 8241b64d34c4..8233b29bdd35 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -16,18 +16,32 @@
 static unsigned int cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
-struct intel_cqm_state {
+/**
+ * struct intel_pqr_state - State cache for the PQR MSR
+ * @rmid:		The cached Resource Monitoring ID
+ * @closid:		The cached Class Of Service ID
+ * @rmid_usecnt:	The usage counter for rmid
+ *
+ * The upper 32 bits of MSR_IA32_PQR_ASSOC contain closid and the
+ * lower 10 bits rmid. The update to MSR_IA32_PQR_ASSOC always
+ * contains both parts, so we need to cache them.
+ *
+ * The cache also helps to avoid pointless updates if the value does
+ * not change.
+ */
+struct intel_pqr_state {
 	u32			rmid;
-	int			cnt;
+	u32			closid;
+	int			rmid_usecnt;
 };
 
 /*
- * The cached intel_cqm_state is strictly per CPU and can never be
+ * The cached intel_pqr_state is strictly per CPU and can never be
  * updated from a remote CPU. Both functions which modify the state
  * (intel_cqm_event_start and intel_cqm_event_stop) are called with
  * interrupts disabled, which is sufficient for the protection.
  */
-static DEFINE_PER_CPU(struct intel_cqm_state, cqm_state);
+static DEFINE_PER_CPU(struct intel_pqr_state, pqr_state);
 
 /*
  * Protects cache_cgroups and cqm_rmid_free_lru and cqm_rmid_limbo_lru.
@@ -966,7 +980,7 @@ out:
 
 static void intel_cqm_event_start(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 	u32 rmid = event->hw.cqm_rmid;
 
 	if (!(event->hw.cqm_state & PERF_HES_STOPPED))
@@ -974,7 +988,7 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 
 	event->hw.cqm_state &= ~PERF_HES_STOPPED;
 
-	if (state->cnt++) {
+	if (state->rmid_usecnt++) {
 		if (!WARN_ON_ONCE(state->rmid != rmid))
 			return;
 	} else {
@@ -982,17 +996,12 @@ static void intel_cqm_event_start(struct perf_event *event, int mode)
 	}
 
 	state->rmid = rmid;
-	/*
-	 * This is actually wrong, as the upper 32 bit MSR contain the
-	 * closid which is used for configuring the Cache Allocation
-	 * Technology component.
-	 */
-	wrmsr(MSR_IA32_PQR_ASSOC, rmid, 0);
+	wrmsr(MSR_IA32_PQR_ASSOC, rmid, state->closid);
 }
 
 static void intel_cqm_event_stop(struct perf_event *event, int mode)
 {
-	struct intel_cqm_state *state = this_cpu_ptr(&cqm_state);
+	struct intel_pqr_state *state = this_cpu_ptr(&pqr_state);
 
 	if (event->hw.cqm_state & PERF_HES_STOPPED)
 		return;
@@ -1001,15 +1010,9 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)
 
 	intel_cqm_event_read(event);
 
-	if (!--state->cnt) {
+	if (!--state->rmid_usecnt) {
 		state->rmid = 0;
-		/*
-		 * This is actually wrong, as the upper 32 bit of the
-		 * MSR contain the closid which is used for
-		 * configuring the Cache Allocation Technology
-		 * component.
-		 */
-		wrmsr(MSR_IA32_PQR_ASSOC, 0, 0);
+		wrmsr(MSR_IA32_PQR_ASSOC, 0, state->closid);
 	} else {
 		WARN_ON_ONCE(!state->rmid);
 	}
@@ -1247,11 +1250,12 @@ static inline void cqm_pick_event_reader(int cpu)
 
 static void intel_cqm_cpu_prepare(unsigned int cpu)
 {
-	struct intel_cqm_state *state = &per_cpu(cqm_state, cpu);
+	struct intel_pqr_state *state = &per_cpu(pqr_state, cpu);
 	struct cpuinfo_x86 *c = &cpu_data(cpu);
 
 	state->rmid = 0;
-	state->cnt  = 0;
+	state->closid = 0;
+	state->rmid_usecnt = 0;
 
 	WARN_ON(c->x86_cache_max_rmid != cqm_max_rmid);
 	WARN_ON(c->x86_cache_occ_scale != cqm_l3_scale);

From adafa99960ef18b019f001ddee4d9d81c4e25944 Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Fri, 22 May 2015 09:59:42 +0100
Subject: [PATCH 175/305] perf/x86/intel/cqm: Use 'u32' data type for RMIDs

Since we write RMID values to MSRs the correct type to use is 'u32'
because that clearly articulates we're writing a hardware register
value.

Fix up all uses of RMID in this code to consistently use the correct data
type.

Reported-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Kanaka Juvva <kanaka.d.juvva@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Vikas Shivappa <vikas.shivappa@linux.intel.com>
Cc: Will Auld <will.auld@intel.com>
Link: http://lkml.kernel.org/r/1432285182-17180-1-git-send-email-matt@codeblueprint.co.uk
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_cqm.c | 37 +++++++++++-----------
 1 file changed, 18 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_cqm.c b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
index 8233b29bdd35..188076161c1b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_cqm.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_cqm.c
@@ -13,7 +13,7 @@
 #define MSR_IA32_QM_CTR		0x0c8e
 #define MSR_IA32_QM_EVTSEL	0x0c8d
 
-static unsigned int cqm_max_rmid = -1;
+static u32 cqm_max_rmid = -1;
 static unsigned int cqm_l3_scale; /* supposedly cacheline size */
 
 /**
@@ -76,7 +76,7 @@ static cpumask_t cqm_cpumask;
  * near-zero occupancy value, i.e. no cachelines are tagged with this
  * RMID, once __intel_cqm_rmid_rotate() returns.
  */
-static unsigned int intel_cqm_rotation_rmid;
+static u32 intel_cqm_rotation_rmid;
 
 #define INVALID_RMID		(-1)
 
@@ -88,7 +88,7 @@ static unsigned int intel_cqm_rotation_rmid;
  * Likewise, an rmid value of -1 is used to indicate "no rmid currently
  * assigned" and is used as part of the rotation code.
  */
-static inline bool __rmid_valid(unsigned int rmid)
+static inline bool __rmid_valid(u32 rmid)
 {
 	if (!rmid || rmid == INVALID_RMID)
 		return false;
@@ -96,7 +96,7 @@ static inline bool __rmid_valid(unsigned int rmid)
 	return true;
 }
 
-static u64 __rmid_read(unsigned int rmid)
+static u64 __rmid_read(u32 rmid)
 {
 	u64 val;
 
@@ -121,7 +121,7 @@ enum rmid_recycle_state {
 };
 
 struct cqm_rmid_entry {
-	unsigned int rmid;
+	u32 rmid;
 	enum rmid_recycle_state state;
 	struct list_head list;
 	unsigned long queue_time;
@@ -166,7 +166,7 @@ static LIST_HEAD(cqm_rmid_limbo_lru);
  */
 static struct cqm_rmid_entry **cqm_rmid_ptrs;
 
-static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
+static inline struct cqm_rmid_entry *__rmid_entry(u32 rmid)
 {
 	struct cqm_rmid_entry *entry;
 
@@ -181,7 +181,7 @@ static inline struct cqm_rmid_entry *__rmid_entry(int rmid)
  *
  * We expect to be called with cache_mutex held.
  */
-static int __get_rmid(void)
+static u32 __get_rmid(void)
 {
 	struct cqm_rmid_entry *entry;
 
@@ -196,7 +196,7 @@ static int __get_rmid(void)
 	return entry->rmid;
 }
 
-static void __put_rmid(unsigned int rmid)
+static void __put_rmid(u32 rmid)
 {
 	struct cqm_rmid_entry *entry;
 
@@ -391,7 +391,7 @@ static bool __conflict_event(struct perf_event *a, struct perf_event *b)
 }
 
 struct rmid_read {
-	unsigned int rmid;
+	u32 rmid;
 	atomic64_t value;
 };
 
@@ -400,12 +400,11 @@ static void __intel_cqm_event_count(void *info);
 /*
  * Exchange the RMID of a group of events.
  */
-static unsigned int
-intel_cqm_xchg_rmid(struct perf_event *group, unsigned int rmid)
+static u32 intel_cqm_xchg_rmid(struct perf_event *group, u32 rmid)
 {
 	struct perf_event *event;
-	unsigned int old_rmid = group->hw.cqm_rmid;
 	struct list_head *head = &group->hw.cqm_group_entry;
+	u32 old_rmid = group->hw.cqm_rmid;
 
 	lockdep_assert_held(&cache_mutex);
 
@@ -470,7 +469,7 @@ static void intel_cqm_stable(void *arg)
  * If we have group events waiting for an RMID that don't conflict with
  * events already running, assign @rmid.
  */
-static bool intel_cqm_sched_in_event(unsigned int rmid)
+static bool intel_cqm_sched_in_event(u32 rmid)
 {
 	struct perf_event *leader, *event;
 
@@ -617,7 +616,7 @@ static bool intel_cqm_rmid_stabilize(unsigned int *available)
 static void __intel_cqm_pick_and_rotate(struct perf_event *next)
 {
 	struct perf_event *rotor;
-	unsigned int rmid;
+	u32 rmid;
 
 	lockdep_assert_held(&cache_mutex);
 
@@ -645,7 +644,7 @@ static void __intel_cqm_pick_and_rotate(struct perf_event *next)
 static void intel_cqm_sched_out_conflicting_events(struct perf_event *event)
 {
 	struct perf_event *group, *g;
-	unsigned int rmid;
+	u32 rmid;
 
 	lockdep_assert_held(&cache_mutex);
 
@@ -847,8 +846,8 @@ static void intel_cqm_setup_event(struct perf_event *event,
 				  struct perf_event **group)
 {
 	struct perf_event *iter;
-	unsigned int rmid;
 	bool conflict = false;
+	u32 rmid;
 
 	list_for_each_entry(iter, &cache_groups, hw.cqm_groups_entry) {
 		rmid = iter->hw.cqm_rmid;
@@ -879,7 +878,7 @@ static void intel_cqm_setup_event(struct perf_event *event,
 static void intel_cqm_event_read(struct perf_event *event)
 {
 	unsigned long flags;
-	unsigned int rmid;
+	u32 rmid;
 	u64 val;
 
 	/*
@@ -1021,7 +1020,7 @@ static void intel_cqm_event_stop(struct perf_event *event, int mode)
 static int intel_cqm_event_add(struct perf_event *event, int mode)
 {
 	unsigned long flags;
-	unsigned int rmid;
+	u32 rmid;
 
 	raw_spin_lock_irqsave(&cache_lock, flags);
 
@@ -1064,7 +1063,7 @@ static void intel_cqm_event_destroy(struct perf_event *event)
 			list_replace(&event->hw.cqm_groups_entry,
 				     &group_other->hw.cqm_groups_entry);
 		} else {
-			unsigned int rmid = event->hw.cqm_rmid;
+			u32 rmid = event->hw.cqm_rmid;
 
 			if (__rmid_valid(rmid))
 				__put_rmid(rmid);

From 1c565833ac7950a5ddb3322e9848e0628ceba3b5 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 10:57:21 +0200
Subject: [PATCH 176/305] perf/x86/intel: Correct local vs remote sibling state

For some obscure reason the current code accounts the current SMT
thread's state on the remote thread and reads the remote's state on
the local SMT thread.

While internally consistent, and 'correct' its pointless confusion we
can do without.

Flip them the right way around.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 79 +++++++++++---------------
 1 file changed, 33 insertions(+), 46 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a1e35c9f06b9..0ea040562bb8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1903,9 +1903,8 @@ static void
 intel_start_scheduling(struct cpu_hw_events *cpuc)
 {
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xl, *xlo;
+	struct intel_excl_states *xl;
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid; /* sibling thread */
 
 	/*
 	 * nothing needed if in group validation mode
@@ -1919,7 +1918,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
 	if (!excl_cntrs)
 		return;
 
-	xlo = &excl_cntrs->states[o_tid];
 	xl = &excl_cntrs->states[tid];
 
 	xl->sched_started = true;
@@ -1932,18 +1930,17 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
 	raw_spin_lock(&excl_cntrs->lock);
 
 	/*
-	 * save initial state of sibling thread
+	 * Save a copy of our state to work on.
 	 */
-	memcpy(xlo->init_state, xlo->state, sizeof(xlo->init_state));
+	memcpy(xl->init_state, xl->state, sizeof(xl->init_state));
 }
 
 static void
 intel_stop_scheduling(struct cpu_hw_events *cpuc)
 {
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xl, *xlo;
+	struct intel_excl_states *xl;
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid; /* sibling thread */
 
 	/*
 	 * nothing needed if in group validation mode
@@ -1956,13 +1953,12 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
 	if (!excl_cntrs)
 		return;
 
-	xlo = &excl_cntrs->states[o_tid];
 	xl = &excl_cntrs->states[tid];
 
 	/*
-	 * make new sibling thread state visible
+	 * Commit the working state.
 	 */
-	memcpy(xlo->state, xlo->init_state, sizeof(xlo->state));
+	memcpy(xl->state, xl->init_state, sizeof(xl->state));
 
 	xl->sched_started = false;
 	/*
@@ -1977,10 +1973,9 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 {
 	struct event_constraint *cx;
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xl, *xlo;
-	int is_excl, i;
+	struct intel_excl_states *xlo;
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid; /* alternate */
+	int is_excl, i;
 
 	/*
 	 * validating a group does not require
@@ -1994,23 +1989,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 */
 	if (!excl_cntrs)
 		return c;
-	/*
-	 * event requires exclusive counter access
-	 * across HT threads
-	 */
-	is_excl = c->flags & PERF_X86_EVENT_EXCL;
-	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
-		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
-		if (!cpuc->n_excl++)
-			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
-	}
-
-	/*
-	 * xl = state of current HT
-	 * xlo = state of sibling HT
-	 */
-	xl = &excl_cntrs->states[tid];
-	xlo = &excl_cntrs->states[o_tid];
 
 	cx = c;
 
@@ -2053,6 +2031,22 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 * of this function
 	 */
 
+	/*
+	 * state of sibling HT
+	 */
+	xlo = &excl_cntrs->states[tid ^ 1];
+
+	/*
+	 * event requires exclusive counter access
+	 * across HT threads
+	 */
+	is_excl = c->flags & PERF_X86_EVENT_EXCL;
+	if (is_excl && !(event->hw.flags & PERF_X86_EVENT_EXCL_ACCT)) {
+		event->hw.flags |= PERF_X86_EVENT_EXCL_ACCT;
+		if (!cpuc->n_excl++)
+			WRITE_ONCE(excl_cntrs->has_exclusive[tid], 1);
+	}
+
 	/*
 	 * Modify static constraint with current dynamic
 	 * state of thread
@@ -2067,14 +2061,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 		 * our corresponding counter cannot be used
 		 * regardless of our event
 		 */
-		if (xl->state[i] == INTEL_EXCL_EXCLUSIVE)
+		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
 			__clear_bit(i, cx->idxmsk);
 		/*
 		 * if measuring an exclusive event, sibling
 		 * measuring non-exclusive, then counter cannot
 		 * be used
 		 */
-		if (is_excl && xl->state[i] == INTEL_EXCL_SHARED)
+		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
 			__clear_bit(i, cx->idxmsk);
 	}
 
@@ -2124,10 +2118,9 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 {
 	struct hw_perf_event *hwc = &event->hw;
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct intel_excl_states *xlo, *xl;
-	unsigned long flags = 0; /* keep compiler happy */
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid;
+	struct intel_excl_states *xl;
+	unsigned long flags = 0; /* keep compiler happy */
 
 	/*
 	 * nothing needed if in group validation mode
@@ -2141,7 +2134,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 		return;
 
 	xl = &excl_cntrs->states[tid];
-	xlo = &excl_cntrs->states[o_tid];
 	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
 		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
 		if (!--cpuc->n_excl)
@@ -2161,7 +2153,7 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 	 * counter state as unused now
 	 */
 	if (hwc->idx >= 0)
-		xlo->state[hwc->idx] = INTEL_EXCL_UNUSED;
+		xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
 
 	if (!xl->sched_started)
 		raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
@@ -2200,16 +2192,12 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
 {
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
 	struct event_constraint *c = cpuc->event_constraint[idx];
-	struct intel_excl_states *xlo, *xl;
+	struct intel_excl_states *xl;
 	int tid = cpuc->excl_thread_id;
-	int o_tid = 1 - tid;
-	int is_excl;
 
 	if (cpuc->is_fake || !c)
 		return;
 
-	is_excl = c->flags & PERF_X86_EVENT_EXCL;
-
 	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
 		return;
 
@@ -2219,15 +2207,14 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
 		return;
 
 	xl = &excl_cntrs->states[tid];
-	xlo = &excl_cntrs->states[o_tid];
 
 	WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
 
 	if (cntr >= 0) {
-		if (is_excl)
-			xlo->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+		if (c->flags & PERF_X86_EVENT_EXCL)
+			xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
 		else
-			xlo->init_state[cntr] = INTEL_EXCL_SHARED;
+			xl->init_state[cntr] = INTEL_EXCL_SHARED;
 	}
 }
 

From b32ed7f5de262b10633bb6c6dbb7f8ba46598cf4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 12:38:21 +0200
Subject: [PATCH 177/305] perf/x86/intel: Add lockdep assert

Lockdep is very good at finding incorrect IRQ state while locking and
is far better at telling us if we hold a lock than the _is_locked()
API. It also generates less code for !DEBUG kernels.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 0ea040562bb8..5182cee16367 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1926,7 +1926,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
 	 * in stop_event_scheduling()
 	 * makes scheduling appear as a transaction
 	 */
-	WARN_ON_ONCE(!irqs_disabled());
 	raw_spin_lock(&excl_cntrs->lock);
 
 	/*
@@ -2208,7 +2207,7 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
 
 	xl = &excl_cntrs->states[tid];
 
-	WARN_ON_ONCE(!raw_spin_is_locked(&excl_cntrs->lock));
+	lockdep_assert_held(&excl_cntrs->lock);
 
 	if (cntr >= 0) {
 		if (c->flags & PERF_X86_EVENT_EXCL)

From aaf932e8161e45291cc85085b6d850f1bbdf53c8 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 10:57:24 +0200
Subject: [PATCH 178/305] perf/x86/intel: Simplify the dynamic constraint code
 somewhat

We have two 'struct event_constraint' local variables in
intel_get_excl_constraints(): 'cx' and 'c'.

Instead of using 'cx' after the dynamic allocation, put all 'cx' inside
the dynamic allocation block and use 'c' outside of it.

Also use direct assignment to copy the structure; let the compiler
figure it out.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 5182cee16367..9588b8d1264d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1970,7 +1970,6 @@ static struct event_constraint *
 intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 			   int idx, struct event_constraint *c)
 {
-	struct event_constraint *cx;
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
 	struct intel_excl_states *xlo;
 	int tid = cpuc->excl_thread_id;
@@ -1989,8 +1988,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	if (!excl_cntrs)
 		return c;
 
-	cx = c;
-
 	/*
 	 * because we modify the constraint, we need
 	 * to make a copy. Static constraints come
@@ -2000,6 +1997,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 * been cloned (marked dynamic)
 	 */
 	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
+		struct event_constraint *cx;
 
 		/* sanity check */
 		if (idx < 0)
@@ -2014,13 +2012,14 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 		 * initialize dynamic constraint
 		 * with static constraint
 		 */
-		memcpy(cx, c, sizeof(*cx));
+		*cx = *c;
 
 		/*
 		 * mark constraint as dynamic, so we
 		 * can free it later on
 		 */
 		cx->flags |= PERF_X86_EVENT_DYNAMIC;
+		c = cx;
 	}
 
 	/*
@@ -2054,37 +2053,37 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	 * SHARED   : sibling counter measuring non-exclusive event
 	 * UNUSED   : sibling counter unused
 	 */
-	for_each_set_bit(i, cx->idxmsk, X86_PMC_IDX_MAX) {
+	for_each_set_bit(i, c->idxmsk, X86_PMC_IDX_MAX) {
 		/*
 		 * exclusive event in sibling counter
 		 * our corresponding counter cannot be used
 		 * regardless of our event
 		 */
 		if (xlo->state[i] == INTEL_EXCL_EXCLUSIVE)
-			__clear_bit(i, cx->idxmsk);
+			__clear_bit(i, c->idxmsk);
 		/*
 		 * if measuring an exclusive event, sibling
 		 * measuring non-exclusive, then counter cannot
 		 * be used
 		 */
 		if (is_excl && xlo->state[i] == INTEL_EXCL_SHARED)
-			__clear_bit(i, cx->idxmsk);
+			__clear_bit(i, c->idxmsk);
 	}
 
 	/*
 	 * recompute actual bit weight for scheduling algorithm
 	 */
-	cx->weight = hweight64(cx->idxmsk64);
+	c->weight = hweight64(c->idxmsk64);
 
 	/*
 	 * if we return an empty mask, then switch
 	 * back to static empty constraint to avoid
 	 * the cost of freeing later on
 	 */
-	if (cx->weight == 0)
-		cx = &emptyconstraint;
+	if (c->weight == 0)
+		c = &emptyconstraint;
 
-	return cx;
+	return c;
 }
 
 static struct event_constraint *

From 17186ccda374ae02ef231cbbc8f1825e7c19ddbd Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 10:57:28 +0200
Subject: [PATCH 179/305] perf/x86/intel: Make WARN()ings consistent

The intel_commit_scheduling() callback is pointlessly different from
the start and stop scheduling callback.

Furthermore, the constraint should never be NULL, so remove that test.

Even though we'll never get called (because we NULL the callbacks)
when !is_ht_workaround_enabled() put that test in.

Collapse the (pointless) WARN_ON_ONCE() and bail on !cpuc->excl_cntrs --
this is doubly pointless, because its the same condition as
is_ht_workaround_enabled() which was already pointless because the
whole method won't ever be called.

Furthremore, make all the !excl_cntrs test WARN_ON_ONCE(); they're all
pointless, because the above, either the function
({get,put}_excl_constraint) are already predicated on it existing or
the is_ht_workaround_enabled() thing is the same test.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 20 ++++++++------------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 9588b8d1264d..d7d30b41f6a3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1915,7 +1915,7 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
 	/*
 	 * no exclusion needed
 	 */
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
 	xl = &excl_cntrs->states[tid];
@@ -1949,7 +1949,7 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
 	/*
 	 * no exclusion needed
 	 */
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
 	xl = &excl_cntrs->states[tid];
@@ -1985,7 +1985,7 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	/*
 	 * no exclusion needed
 	 */
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return c;
 
 	/*
@@ -2126,9 +2126,7 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 	if (cpuc->is_fake)
 		return;
 
-	WARN_ON_ONCE(!excl_cntrs);
-
-	if (!excl_cntrs)
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
 	xl = &excl_cntrs->states[tid];
@@ -2193,17 +2191,15 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
 	struct intel_excl_states *xl;
 	int tid = cpuc->excl_thread_id;
 
-	if (cpuc->is_fake || !c)
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return;
+
+	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
 	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
 		return;
 
-	WARN_ON_ONCE(!excl_cntrs);
-
-	if (!excl_cntrs)
-		return;
-
 	xl = &excl_cntrs->states[tid];
 
 	lockdep_assert_held(&excl_cntrs->lock);

From 0c41e756b9c5a9899b5cd238226600f8f34c9b82 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 10:57:32 +0200
Subject: [PATCH 180/305] perf/x86/intel: Clean up intel_commit_scheduling()
 placement

Move the code of intel_commit_scheduling() to the right place, which is
in between start() and stop().

No change in functionality.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h       |  4 +-
 arch/x86/kernel/cpu/perf_event_intel.c | 60 +++++++++++++-------------
 2 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index ef78516850fb..e5609522255c 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -527,10 +527,10 @@ struct x86_pmu {
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
 
-	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
-
 	void		(*start_scheduling)(struct cpu_hw_events *cpuc);
 
+	void		(*commit_scheduling)(struct cpu_hw_events *cpuc, int idx, int cntr);
+
 	void		(*stop_scheduling)(struct cpu_hw_events *cpuc);
 
 	struct event_constraint *event_constraints;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index d7d30b41f6a3..ff56fc3f016e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1934,6 +1934,34 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
 	memcpy(xl->init_state, xl->state, sizeof(xl->init_state));
 }
 
+static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
+{
+	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
+	struct event_constraint *c = cpuc->event_constraint[idx];
+	struct intel_excl_states *xl;
+	int tid = cpuc->excl_thread_id;
+
+	if (cpuc->is_fake || !is_ht_workaround_enabled())
+		return;
+
+	if (WARN_ON_ONCE(!excl_cntrs))
+		return;
+
+	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
+		return;
+
+	xl = &excl_cntrs->states[tid];
+
+	lockdep_assert_held(&excl_cntrs->lock);
+
+	if (cntr >= 0) {
+		if (c->flags & PERF_X86_EVENT_EXCL)
+			xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+		else
+			xl->init_state[cntr] = INTEL_EXCL_SHARED;
+	}
+}
+
 static void
 intel_stop_scheduling(struct cpu_hw_events *cpuc)
 {
@@ -2184,34 +2212,6 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
 		intel_put_excl_constraints(cpuc, event);
 }
 
-static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
-{
-	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
-	struct event_constraint *c = cpuc->event_constraint[idx];
-	struct intel_excl_states *xl;
-	int tid = cpuc->excl_thread_id;
-
-	if (cpuc->is_fake || !is_ht_workaround_enabled())
-		return;
-
-	if (WARN_ON_ONCE(!excl_cntrs))
-		return;
-
-	if (!(c->flags & PERF_X86_EVENT_DYNAMIC))
-		return;
-
-	xl = &excl_cntrs->states[tid];
-
-	lockdep_assert_held(&excl_cntrs->lock);
-
-	if (cntr >= 0) {
-		if (c->flags & PERF_X86_EVENT_EXCL)
-			xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
-		else
-			xl->init_state[cntr] = INTEL_EXCL_SHARED;
-	}
-}
-
 static void intel_pebs_aliases_core2(struct perf_event *event)
 {
 	if ((event->hw.config & X86_RAW_EVENT_MASK) == 0x003c) {
@@ -2920,8 +2920,8 @@ static __init void intel_ht_bug(void)
 {
 	x86_pmu.flags |= PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED;
 
-	x86_pmu.commit_scheduling = intel_commit_scheduling;
 	x86_pmu.start_scheduling = intel_start_scheduling;
+	x86_pmu.commit_scheduling = intel_commit_scheduling;
 	x86_pmu.stop_scheduling = intel_stop_scheduling;
 }
 
@@ -3377,8 +3377,8 @@ static __init int fixup_ht_bug(void)
 
 	x86_pmu.flags &= ~(PMU_FL_EXCL_CNTRS | PMU_FL_EXCL_ENABLED);
 
-	x86_pmu.commit_scheduling = NULL;
 	x86_pmu.start_scheduling = NULL;
+	x86_pmu.commit_scheduling = NULL;
 	x86_pmu.stop_scheduling = NULL;
 
 	watchdog_nmi_enable_all();

From 1fe684e349e904adeed2883cfdeef259a21c94f4 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 10:57:36 +0200
Subject: [PATCH 181/305] perf/x86/intel: Remove pointless tests

Both intel_commit_scheduling() and intel_get_excl_contraints() test
for cntr < 0.

The only way that can happen (aside from a bug) is through
validate_event(), however that is already captured by the
cpuc->is_fake test.

So remove these test and simplify the code.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 14 ++++----------
 1 file changed, 4 insertions(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ff56fc3f016e..6a3e794cdc06 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1954,12 +1954,10 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
 
 	lockdep_assert_held(&excl_cntrs->lock);
 
-	if (cntr >= 0) {
-		if (c->flags & PERF_X86_EVENT_EXCL)
-			xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
-		else
-			xl->init_state[cntr] = INTEL_EXCL_SHARED;
-	}
+	if (c->flags & PERF_X86_EVENT_EXCL)
+		xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+	else
+		xl->init_state[cntr] = INTEL_EXCL_SHARED;
 }
 
 static void
@@ -2027,10 +2025,6 @@ intel_get_excl_constraints(struct cpu_hw_events *cpuc, struct perf_event *event,
 	if (!(c->flags & PERF_X86_EVENT_DYNAMIC)) {
 		struct event_constraint *cx;
 
-		/* sanity check */
-		if (idx < 0)
-			return &emptyconstraint;
-
 		/*
 		 * grab pre-allocated constraint entry
 		 */

From 43ef205bded025432f5eeeb3503c11fe5cd1913e Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 10:57:39 +0200
Subject: [PATCH 182/305] perf/x86/intel: Remove intel_excl_states::init_state

For some obscure reason intel_{start,stop}_scheduling() copy the HT
state to an intermediate array. This would make sense if we ever were
to make changes to it which we'd have to discard.

Except we don't. By the time we call intel_commit_scheduling() we're;
as the name implies; committed to them. We'll never back out.

A further hint its pointless is that stop_scheduling() unconditionally
publishes the state.

So the intermediate array is pointless, modify the state in place and
kill the extra array.

And remove the pointless array initialization: INTEL_EXCL_UNUSED == 0.

Note; all is serialized by intel_excl_cntr::lock.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c       |  1 -
 arch/x86/kernel/cpu/perf_event.h       |  1 -
 arch/x86/kernel/cpu/perf_event_intel.c | 22 ++--------------------
 3 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 4f7001f28936..d275da3d81dd 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -884,7 +884,6 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 	}
 
 	if (!assign || unsched) {
-
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
 			/*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index e5609522255c..89e6cd61e6ae 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -133,7 +133,6 @@ enum intel_excl_state_type {
 };
 
 struct intel_excl_states {
-	enum intel_excl_state_type init_state[X86_PMC_IDX_MAX];
 	enum intel_excl_state_type state[X86_PMC_IDX_MAX];
 	bool sched_started; /* true if scheduling has started */
 };
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6a3e794cdc06..f3201439031d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1927,11 +1927,6 @@ intel_start_scheduling(struct cpu_hw_events *cpuc)
 	 * makes scheduling appear as a transaction
 	 */
 	raw_spin_lock(&excl_cntrs->lock);
-
-	/*
-	 * Save a copy of our state to work on.
-	 */
-	memcpy(xl->init_state, xl->state, sizeof(xl->init_state));
 }
 
 static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cntr)
@@ -1955,9 +1950,9 @@ static void intel_commit_scheduling(struct cpu_hw_events *cpuc, int idx, int cnt
 	lockdep_assert_held(&excl_cntrs->lock);
 
 	if (c->flags & PERF_X86_EVENT_EXCL)
-		xl->init_state[cntr] = INTEL_EXCL_EXCLUSIVE;
+		xl->state[cntr] = INTEL_EXCL_EXCLUSIVE;
 	else
-		xl->init_state[cntr] = INTEL_EXCL_SHARED;
+		xl->state[cntr] = INTEL_EXCL_SHARED;
 }
 
 static void
@@ -1980,11 +1975,6 @@ intel_stop_scheduling(struct cpu_hw_events *cpuc)
 
 	xl = &excl_cntrs->states[tid];
 
-	/*
-	 * Commit the working state.
-	 */
-	memcpy(xl->state, xl->init_state, sizeof(xl->state));
-
 	xl->sched_started = false;
 	/*
 	 * release shared state lock (acquired in intel_start_scheduling())
@@ -2519,19 +2509,11 @@ struct intel_shared_regs *allocate_shared_regs(int cpu)
 static struct intel_excl_cntrs *allocate_excl_cntrs(int cpu)
 {
 	struct intel_excl_cntrs *c;
-	int i;
 
 	c = kzalloc_node(sizeof(struct intel_excl_cntrs),
 			 GFP_KERNEL, cpu_to_node(cpu));
 	if (c) {
 		raw_spin_lock_init(&c->lock);
-		for (i = 0; i < X86_PMC_IDX_MAX; i++) {
-			c->states[0].state[i] = INTEL_EXCL_UNUSED;
-			c->states[0].init_state[i] = INTEL_EXCL_UNUSED;
-
-			c->states[1].state[i] = INTEL_EXCL_UNUSED;
-			c->states[1].init_state[i] = INTEL_EXCL_UNUSED;
-		}
 		c->core_id = -1;
 	}
 	return c;

From 8736e548db0f48244132bc36331496494625bbaf Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Thu, 21 May 2015 10:57:43 +0200
Subject: [PATCH 183/305] perf/x86: Simplify the x86_schedule_events() logic

	!x && y == ! (x || !y)

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index d275da3d81dd..dbe3328f8ad7 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -881,9 +881,7 @@ int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
 			if (x86_pmu.commit_scheduling)
 				x86_pmu.commit_scheduling(cpuc, i, assign[i]);
 		}
-	}
-
-	if (!assign || unsched) {
+	} else {
 		for (i = 0; i < n; i++) {
 			e = cpuc->event_list[i];
 			/*

From ba040653b48d32afa8b6c3331eae97c6bbb66f03 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Fri, 22 May 2015 11:36:13 +0200
Subject: [PATCH 184/305] perf/x86/intel: Simplify put_exclusive_constraints()

Don't bother with taking locks if we're not actually going to do
anything. Also, drop the _irqsave(), this is very much only called
from IRQ-disabled context.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Vince Weaver <vincent.weaver@maine.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel.c | 29 +++++++++++++-------------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f3201439031d..74f19d9268bb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2130,7 +2130,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 	struct intel_excl_cntrs *excl_cntrs = cpuc->excl_cntrs;
 	int tid = cpuc->excl_thread_id;
 	struct intel_excl_states *xl;
-	unsigned long flags = 0; /* keep compiler happy */
 
 	/*
 	 * nothing needed if in group validation mode
@@ -2141,7 +2140,6 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 	if (WARN_ON_ONCE(!excl_cntrs))
 		return;
 
-	xl = &excl_cntrs->states[tid];
 	if (hwc->flags & PERF_X86_EVENT_EXCL_ACCT) {
 		hwc->flags &= ~PERF_X86_EVENT_EXCL_ACCT;
 		if (!--cpuc->n_excl)
@@ -2149,22 +2147,25 @@ static void intel_put_excl_constraints(struct cpu_hw_events *cpuc,
 	}
 
 	/*
-	 * put_constraint may be called from x86_schedule_events()
-	 * which already has the lock held so here make locking
-	 * conditional
+	 * If event was actually assigned, then mark the counter state as
+	 * unused now.
 	 */
-	if (!xl->sched_started)
-		raw_spin_lock_irqsave(&excl_cntrs->lock, flags);
+	if (hwc->idx >= 0) {
+		xl = &excl_cntrs->states[tid];
+
+		/*
+		 * put_constraint may be called from x86_schedule_events()
+		 * which already has the lock held so here make locking
+		 * conditional.
+		 */
+		if (!xl->sched_started)
+			raw_spin_lock(&excl_cntrs->lock);
 
-	/*
-	 * if event was actually assigned, then mark the
-	 * counter state as unused now
-	 */
-	if (hwc->idx >= 0)
 		xl->state[hwc->idx] = INTEL_EXCL_UNUSED;
 
-	if (!xl->sched_started)
-		raw_spin_unlock_irqrestore(&excl_cntrs->lock, flags);
+		if (!xl->sched_started)
+			raw_spin_unlock(&excl_cntrs->lock);
+	}
 }
 
 static void

From 74387bcb711b7b2ed65c0ed08953e13d4e31969e Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 21 Apr 2015 16:16:13 +0300
Subject: [PATCH 185/305] perf/x86/intel/pt: Kill an unused variable

Currently, there's a set-but-not-used variable in setup_topa_index();
this patch gets rid of it. And while at it, fixes a style issue with
brackets around a one-line block.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/1429622177-22843-2-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_pt.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 5b804f96ad66..8a4595650d6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -674,7 +674,7 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
 	struct topa *cur = buf->first, *prev = buf->last;
 	struct topa_entry *te_cur = TOPA_ENTRY(cur, 0),
 		*te_prev = TOPA_ENTRY(prev, prev->last - 1);
-	int pg = 0, idx = 0, ntopa = 0;
+	int pg = 0, idx = 0;
 
 	while (pg < buf->nr_pages) {
 		int tidx;
@@ -689,9 +689,9 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
 			/* advance to next topa table */
 			idx = 0;
 			cur = list_entry(cur->list.next, struct topa, list);
-			ntopa++;
-		} else
+		} else {
 			idx++;
+		}
 		te_cur = TOPA_ENTRY(cur, idx);
 	}
 

From cf302bfdf3039853fce812ae1ffd0ac24f5b468f Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 21 Apr 2015 16:16:15 +0300
Subject: [PATCH 186/305] perf/x86/intel/pt: Document pt_buffer_reset_markers()

The comments in the driver don't make it absolutely clear as to what
exactly is the calling order and other possible constraints of buffer
management functions.

Document constraints and calling order for the buffer configuration
functions. While at it, replace a redundant check in
pt_buffer_reset_markers() with an explanation why it is not needed.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/1429622177-22843-4-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_pt.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 8a4595650d6e..b2746eafa0cd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -609,7 +609,12 @@ static unsigned int pt_topa_next_entry(struct pt_buffer *buf, unsigned int pg)
  * @handle:	Current output handle.
  *
  * Place INT and STOP marks to prevent overwriting old data that the consumer
- * hasn't yet collected.
+ * hasn't yet collected and waking up the consumer after a certain fraction of
+ * the buffer has filled up. Only needed and sensible for non-snapshot counters.
+ *
+ * This obviously relies on buf::head to figure out buffer markers, so it has
+ * to be called after pt_buffer_reset_offsets() and before the hardware tracing
+ * is enabled.
  */
 static int pt_buffer_reset_markers(struct pt_buffer *buf,
 				   struct perf_output_handle *handle)
@@ -618,9 +623,6 @@ static int pt_buffer_reset_markers(struct pt_buffer *buf,
 	unsigned long head = local64_read(&buf->head);
 	unsigned long idx, npages, wakeup;
 
-	if (buf->snapshot)
-		return 0;
-
 	/* can't stop in the middle of an output region */
 	if (buf->output_off + handle->size + 1 <
 	    sizes(TOPA_ENTRY(buf->cur, buf->cur_idx)->size))
@@ -901,6 +903,7 @@ void intel_pt_interrupt(void)
 		}
 
 		pt_buffer_reset_offsets(buf, pt->handle.head);
+		/* snapshot counters don't use PMI, so it's safe */
 		ret = pt_buffer_reset_markers(buf, &pt->handle);
 		if (ret) {
 			perf_aux_output_end(&pt->handle, 0, true);

From 5b1dbd17c0dee679b154ce47f534677b7e0f7ad6 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 21 Apr 2015 16:16:16 +0300
Subject: [PATCH 187/305] perf/x86/intel/pt: Document pt_buffer_reset_offsets()

Currently, the description of pt_buffer_reset_offsets() lacks information
about its calling constraints and ordering with regards to other buffer
management functions.

Add a clarification about when this function has to be called.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/1429622177-22843-5-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_pt.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index b2746eafa0cd..40ba5e4312d4 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -705,7 +705,14 @@ static void pt_buffer_setup_topa_index(struct pt_buffer *buf)
  * @head:	Write pointer (aux_head) from AUX buffer.
  *
  * Find the ToPA table and entry corresponding to given @head and set buffer's
- * "current" pointers accordingly.
+ * "current" pointers accordingly. This is done after we have obtained the
+ * current aux_head position from a successful call to perf_aux_output_begin()
+ * to make sure the hardware is writing to the right place.
+ *
+ * This function modifies buf::{cur,cur_idx,output_off} that will be programmed
+ * into PT msrs when the tracing is enabled and buf::head and buf::data_size,
+ * which are used to determine INT and STOP markers' locations by a subsequent
+ * call to pt_buffer_reset_markers().
  */
 static void pt_buffer_reset_offsets(struct pt_buffer *buf, unsigned long head)
 {

From 0a487aad2dfd088bcbbe1766944280b40ff969a5 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Tue, 21 Apr 2015 16:16:17 +0300
Subject: [PATCH 188/305] perf/x86/intel/pt: Kill pt_is_running()

Initially, we were trying to guard against scenarios where somebody
attaches to the system with a hardware debugger while PT is enabled
from software and pt_is_running() tries to make sure we handle this
better, but the truth is, there is still a race window no matter what
and people with hardware debuggers should really know what they are
doing anyway.

In other words, there is no point in keeping this one around, and
it's one RDMSR instructions fewer in the fast path.

The case when PT is enabled by the BIOS at boot time is handled
in the driver initialization path and doesn't use pt_is_running().

This patch gets rid of it.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/1429622177-22843-6-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_pt.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index 40ba5e4312d4..a2d407172d61 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -187,15 +187,6 @@ static bool pt_event_valid(struct perf_event *event)
  * These all are cpu affine and operate on a local PT
  */
 
-static bool pt_is_running(void)
-{
-	u64 ctl;
-
-	rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-
-	return !!(ctl & RTIT_CTL_TRACEEN);
-}
-
 static void pt_config(struct perf_event *event)
 {
 	u64 reg;
@@ -933,7 +924,7 @@ static void pt_event_start(struct perf_event *event, int mode)
 	struct pt *pt = this_cpu_ptr(&pt_ctx);
 	struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
-	if (pt_is_running() || !buf || pt_buffer_is_full(buf, pt)) {
+	if (!buf || pt_buffer_is_full(buf, pt)) {
 		event->hw.state = PERF_HES_STOPPED;
 		return;
 	}

From a82d24edfeaf1ed244cf8b969916840c6feb5165 Mon Sep 17 00:00:00 2001
From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Date: Fri, 22 May 2015 18:30:26 +0300
Subject: [PATCH 189/305] perf/x86/intel/pt: Remove redundant variable
 declaration

There is a 'pt' variable in the outer scope of pt_event_stop() with the same
type, we don't really need another one in the inner scope.

This patch removes the redundant variable declaration.

Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: adrian.hunter@intel.com
Cc: hpa@zytor.com
Link: http://lkml.kernel.org/r/1432308626-18845-8-git-send-email-alexander.shishkin@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_pt.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_pt.c b/arch/x86/kernel/cpu/perf_event_intel_pt.c
index a2d407172d61..59596d25cb29 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_pt.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_pt.c
@@ -955,7 +955,6 @@ static void pt_event_stop(struct perf_event *event, int mode)
 	event->hw.state = PERF_HES_STOPPED;
 
 	if (mode & PERF_EF_UPDATE) {
-		struct pt *pt = this_cpu_ptr(&pt_ctx);
 		struct pt_buffer *buf = perf_get_aux(&pt->handle);
 
 		if (!buf)

From 128c32ed1866e6cf3d3944e7dcdcea06bc060b0d Mon Sep 17 00:00:00 2001
From: "Nam T. Nguyen" <namnguyen@chromium.org>
Date: Mon, 18 May 2015 11:37:27 -0700
Subject: [PATCH 190/305] perf tools: Separate the tests and tools in
 installation

This refactors out install-bin to install-tests and install-tools so
that downstream could opt to only install the tools, and not the tests.

Signed-off-by: Nam T. Nguyen <namnguyen@chromium.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Simon Que <sque@chromium.org>
Link: http://lkml.kernel.org/r/1431974247-22275-1-git-send-email-namnguyen@chromium.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 03409cc02117..5816a3bb7e9f 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -464,7 +464,7 @@ check: $(OUTPUT)common-cmds.h
 
 install-gtk:
 
-install-bin: all install-gtk
+install-tools: all install-gtk
 	$(call QUIET_INSTALL, binaries) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \
 		$(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \
@@ -502,12 +502,16 @@ endif
 	$(call QUIET_INSTALL, perf_completion-script) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \
 		$(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf'
+
+install-tests: all install-gtk
 	$(call QUIET_INSTALL, tests) \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
 		$(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \
 		$(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \
 		$(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'
 
+install-bin: install-tools install-tests
+
 install: install-bin try-install-man install-traceevent-plugins
 
 install-python_ext:

From bb78ce7d0598fb277290f8ee2443b8f4e0eb7cb2 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 19 May 2015 16:05:42 +0300
Subject: [PATCH 191/305] perf tools: Fix function declarations needed by
 parse-events.y

Patch "perf tools: Add location to pmu event terms" moved declarations
for parse_events_term__num() and parse_events_term__str() so that they
were no longer visible in parse-events.y. That can result in segfaults
as the arguments no longer need match the function prototype.

Move the declarations back, changing YYLTYPE pointers to
pointers-to-void because YYLTYPE is not generated until parse-events.y
is processed.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Link: http://lkml.kernel.org/r/1432040746-1755-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 16 ++++++++--------
 tools/perf/util/parse-events.h |  6 ++++++
 2 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 80a50fdb6d8a..78032d887c1a 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -25,12 +25,6 @@
 extern int parse_events_debug;
 #endif
 int parse_events_parse(void *data, void *scanner);
-int parse_events_term__num(struct parse_events_term **term,
-			   int type_term, char *config, u64 num,
-			   YYLTYPE *loc_term, YYLTYPE *loc_val);
-int parse_events_term__str(struct parse_events_term **term,
-			   int type_term, char *config, char *str,
-			   YYLTYPE *loc_term, YYLTYPE *loc_val);
 
 static struct perf_pmu_event_symbol *perf_pmu_events_list;
 /*
@@ -1601,8 +1595,11 @@ static int new_term(struct parse_events_term **_term, int type_val,
 
 int parse_events_term__num(struct parse_events_term **term,
 			   int type_term, char *config, u64 num,
-			   YYLTYPE *loc_term, YYLTYPE *loc_val)
+			   void *loc_term_, void *loc_val_)
 {
+	YYLTYPE *loc_term = loc_term_;
+	YYLTYPE *loc_val = loc_val_;
+
 	return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term,
 			config, NULL, num,
 			loc_term ? loc_term->first_column : 0,
@@ -1611,8 +1608,11 @@ int parse_events_term__num(struct parse_events_term **term,
 
 int parse_events_term__str(struct parse_events_term **term,
 			   int type_term, char *config, char *str,
-			   YYLTYPE *loc_term, YYLTYPE *loc_val)
+			   void *loc_term_, void *loc_val_)
 {
+	YYLTYPE *loc_term = loc_term_;
+	YYLTYPE *loc_val = loc_val_;
+
 	return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term,
 			config, str, 0,
 			loc_term ? loc_term->first_column : 0,
diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h
index e236f1b6ac6f..131f29b2f132 100644
--- a/tools/perf/util/parse-events.h
+++ b/tools/perf/util/parse-events.h
@@ -98,6 +98,12 @@ struct parse_events_terms {
 };
 
 int parse_events__is_hardcoded_term(struct parse_events_term *term);
+int parse_events_term__num(struct parse_events_term **term,
+			   int type_term, char *config, u64 num,
+			   void *loc_term, void *loc_val);
+int parse_events_term__str(struct parse_events_term **term,
+			   int type_term, char *config, char *str,
+			   void *loc_term, void *loc_val);
 int parse_events_term__sym_hw(struct parse_events_term **term,
 			      char *config, unsigned idx);
 int parse_events_term__clone(struct parse_events_term **new,

From a6ced2be06c302402c52dedba97d169d22cce99c Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 19 May 2015 16:05:44 +0300
Subject: [PATCH 192/305] perf tools: Fix parse_events_error dereferences

Parse errors can be reported in struct parse_events_error but the
pointer passed is optional and can be NULL.  Ensure it is not NULL
before dereferencing it.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Link: http://lkml.kernel.org/r/1432040746-1755-4-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/parse-events.c | 2 ++
 tools/perf/util/parse-events.y | 6 ++++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 78032d887c1a..2a4d1ec02846 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -1659,6 +1659,8 @@ void parse_events_evlist_error(struct parse_events_evlist *data,
 {
 	struct parse_events_error *err = data->error;
 
+	if (!err)
+		return;
 	err->idx = idx;
 	err->str = strdup(str);
 	WARN_ONCE(!err->str, "WARNING: failed to allocate error string");
diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y
index 3d11e00243e3..591905a02b92 100644
--- a/tools/perf/util/parse-events.y
+++ b/tools/perf/util/parse-events.y
@@ -389,8 +389,10 @@ PE_NAME ':' PE_NAME
 	if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) {
 		struct parse_events_error *error = data->error;
 
-		error->idx = @1.first_column;
-		error->str = strdup("unknown tracepoint");
+		if (error) {
+			error->idx = @1.first_column;
+			error->str = strdup("unknown tracepoint");
+		}
 		return -1;
 	}
 	$$ = list;

From 05b41775e2edd69a83f592e3534930c934d4038e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 19 May 2015 16:05:43 +0300
Subject: [PATCH 193/305] perf build: Fix libunwind feature detection on 32-bit
 x86

The libunwind feature would never detect because of the following error:

  $ cat tools/build/feature/test-libunwind.make.output
  /usr/lib/gcc/i686-linux-gnu/4.8/../../../i386-linux-gnu/libunwind-x86.so: undefined reference to `lzma_stream_buffer_decode'
  /usr/lib/gcc/i686-linux-gnu/4.8/../../../i386-linux-gnu/libunwind-x86.so: undefined reference to `lzma_index_uncompressed_size'
  /usr/lib/gcc/i686-linux-gnu/4.8/../../../i386-linux-gnu/libunwind-x86.so: undefined reference to `lzma_index_end'
  /usr/lib/gcc/i686-linux-gnu/4.8/../../../i386-linux-gnu/libunwind-x86.so: undefined reference to `lzma_index_buffer_decode'
  /usr/lib/gcc/i686-linux-gnu/4.8/../../../i386-linux-gnu/libunwind-x86.so: undefined reference to `lzma_stream_footer_decode'
  /usr/lib/gcc/i686-linux-gnu/4.8/../../../i386-linux-gnu/libunwind-x86.so: undefined reference to `lzma_index_size'
  collect2: error: ld returned 1 exit status

Fix by adding -llzma and re-ordering to match the dependencies.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Link: http://lkml.kernel.org/r/1432040746-1755-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/config/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 1b957a1272d0..e3b3724e73ff 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -32,7 +32,7 @@ ifeq ($(ARCH),x86)
     LIBUNWIND_LIBS = -lunwind -lunwind-x86_64
     $(call detected,CONFIG_X86_64)
   else
-    LIBUNWIND_LIBS = -lunwind -lunwind-x86
+    LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind
   endif
   NO_PERF_REGS := 0
 endif

From 554e92ed8fcdbcad736ef906c393847d44d52692 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 19 May 2015 16:05:45 +0300
Subject: [PATCH 194/305] perf session: Fix perf_session__peek_event()

perf_session__peek_event() generally leverages there being a single mmap
of the perf.data file, however on 32-bit platforms when there is more
that 32MiB of data, then there are multiple mmaps, so
perf_session__peek_event() reads from the file.

In that case a couple of bugs were exposed (note how the seg. fault
appears with >32M of data):

   $ perf record --per-thread -e intel_bts// ../rtit-tests/loopy 1000000
   [ perf record: Woken up 13 times to write data ]
   [ perf record: Captured and wrote 24.568 MB perf.data ]
   $ perf script > /dev/null
   $ perf record --per-thread -e intel_bts// ../rtit-tests/loopy 10000000
   [ perf record: Woken up 136 times to write data ]
   [ perf record: Captured and wrote 270.794 MB perf.data ]
   $ perf script > /dev/null
   Segmentation fault (core dumped)

The wrong address was being passed to the readn() function and the
buffer size was not being checked.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Link: http://lkml.kernel.org/r/1432040746-1755-5-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e722107f932a..39fe09d5a87e 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1182,7 +1182,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
 		return -1;
 
 	if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 ||
-	    readn(fd, &buf, hdr_sz) != (ssize_t)hdr_sz)
+	    readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz)
 		return -1;
 
 	event = (union perf_event *)buf;
@@ -1190,12 +1190,12 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset,
 	if (session->header.needs_swap)
 		perf_event_header__bswap(&event->header);
 
-	if (event->header.size < hdr_sz)
+	if (event->header.size < hdr_sz || event->header.size > buf_sz)
 		return -1;
 
 	rest = event->header.size - hdr_sz;
 
-	if (readn(fd, &buf, rest) != (ssize_t)rest)
+	if (readn(fd, buf, rest) != (ssize_t)rest)
 		return -1;
 
 	if (session->header.needs_swap)

From 063bd9363bb8979b2939bdc0412d98a8ac062e3b Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Tue, 19 May 2015 17:04:10 +0900
Subject: [PATCH 195/305] perf hists: Reducing arguments of
 hist_entry_iter__add()

The evsel and sample arguments are to set iter for later use.  As it
also receives an iter as another argument, just set them before calling
the function.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1432022650-18205-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c       | 9 +++++----
 tools/perf/builtin-top.c          | 7 ++++---
 tools/perf/tests/hists_cumulate.c | 6 ++++--
 tools/perf/tests/hists_filter.c   | 4 +++-
 tools/perf/tests/hists_output.c   | 6 ++++--
 tools/perf/util/hist.c            | 8 ++------
 tools/perf/util/hist.h            | 1 -
 7 files changed, 22 insertions(+), 19 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 92fca2157e5e..56025d90622f 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -139,8 +139,10 @@ static int process_sample_event(struct perf_tool *tool,
 	struct report *rep = container_of(tool, struct report, tool);
 	struct addr_location al;
 	struct hist_entry_iter iter = {
-		.hide_unresolved = rep->hide_unresolved,
-		.add_entry_cb = hist_iter__report_callback,
+		.evsel 			= evsel,
+		.sample 		= sample,
+		.hide_unresolved 	= rep->hide_unresolved,
+		.add_entry_cb 		= hist_iter__report_callback,
 	};
 	int ret = 0;
 
@@ -168,8 +170,7 @@ static int process_sample_event(struct perf_tool *tool,
 	if (al.map != NULL)
 		al.map->dso->hit = 1;
 
-	ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack,
-				   rep);
+	ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep);
 	if (ret < 0)
 		pr_debug("problem adding hist entry, skipping event\n");
 out_put:
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index a19351728f0f..6b987424d015 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -775,7 +775,9 @@ static void perf_event__process_sample(struct perf_tool *tool,
 	if (al.sym == NULL || !al.sym->ignore) {
 		struct hists *hists = evsel__hists(evsel);
 		struct hist_entry_iter iter = {
-			.add_entry_cb = hist_iter__top_callback,
+			.evsel		= evsel,
+			.sample 	= sample,
+			.add_entry_cb 	= hist_iter__top_callback,
 		};
 
 		if (symbol_conf.cumulate_callchain)
@@ -785,8 +787,7 @@ static void perf_event__process_sample(struct perf_tool *tool,
 
 		pthread_mutex_lock(&hists->lock);
 
-		err = hist_entry_iter__add(&iter, &al, evsel, sample,
-					   top->max_stack, top);
+		err = hist_entry_iter__add(&iter, &al, top->max_stack, top);
 		if (err < 0)
 			pr_err("Problem incrementing symbol period, skipping event\n");
 
diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c
index 620f626e5b35..7d82c8be5e36 100644
--- a/tools/perf/tests/hists_cumulate.c
+++ b/tools/perf/tests/hists_cumulate.c
@@ -87,6 +87,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 			},
 		};
 		struct hist_entry_iter iter = {
+			.evsel = evsel,
+			.sample	= &sample,
 			.hide_unresolved = false,
 		};
 
@@ -104,8 +106,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 						  &sample) < 0)
 			goto out;
 
-		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-					 PERF_MAX_STACK_DEPTH, NULL) < 0) {
+		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+					 NULL) < 0) {
 			addr_location__put(&al);
 			goto out;
 		}
diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c
index 82e1ee52e024..ce48775e6ada 100644
--- a/tools/perf/tests/hists_filter.c
+++ b/tools/perf/tests/hists_filter.c
@@ -63,6 +63,8 @@ static int add_hist_entries(struct perf_evlist *evlist,
 				},
 			};
 			struct hist_entry_iter iter = {
+				.evsel = evsel,
+				.sample = &sample,
 				.ops = &hist_iter_normal,
 				.hide_unresolved = false,
 			};
@@ -81,7 +83,7 @@ static int add_hist_entries(struct perf_evlist *evlist,
 							  &sample) < 0)
 				goto out;
 
-			if (hist_entry_iter__add(&iter, &al, evsel, &sample,
+			if (hist_entry_iter__add(&iter, &al,
 						 PERF_MAX_STACK_DEPTH, NULL) < 0) {
 				addr_location__put(&al);
 				goto out;
diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c
index fd7ec4f9aeb4..adbebc852cc8 100644
--- a/tools/perf/tests/hists_output.c
+++ b/tools/perf/tests/hists_output.c
@@ -57,6 +57,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 			},
 		};
 		struct hist_entry_iter iter = {
+			.evsel = evsel,
+			.sample = &sample,
 			.ops = &hist_iter_normal,
 			.hide_unresolved = false,
 		};
@@ -70,8 +72,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine)
 						  &sample) < 0)
 			goto out;
 
-		if (hist_entry_iter__add(&iter, &al, evsel, &sample,
-					 PERF_MAX_STACK_DEPTH, NULL) < 0) {
+		if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH,
+					 NULL) < 0) {
 			addr_location__put(&al);
 			goto out;
 		}
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 338770679863..2504b5b1a308 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -851,19 +851,15 @@ const struct hist_iter_ops hist_iter_cumulative = {
 };
 
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
-			 struct perf_evsel *evsel, struct perf_sample *sample,
 			 int max_stack_depth, void *arg)
 {
 	int err, err2;
 
-	err = sample__resolve_callchain(sample, &iter->parent, evsel, al,
-					max_stack_depth);
+	err = sample__resolve_callchain(iter->sample, &iter->parent,
+					iter->evsel, al, max_stack_depth);
 	if (err)
 		return err;
 
-	iter->evsel = evsel;
-	iter->sample = sample;
-
 	err = iter->ops->prepare_entry(iter, al);
 	if (err)
 		goto out;
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index 9f31b89a527a..5ed8d9c22981 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -111,7 +111,6 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
 				      u64 weight, u64 transaction,
 				      bool sample_self);
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
-			 struct perf_evsel *evsel, struct perf_sample *sample,
 			 int max_stack_depth, void *arg);
 
 int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right);

From e7e0efcdb807a570b11f240e2608d7aed5ccdfb1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 19 May 2015 11:31:22 -0300
Subject: [PATCH 196/305] perf hists: Rename add_hist_entry to
 hists__findnew_entry

To match the convention used elsewhere.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-66oo6yn8upssfeuprwy0il1q@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 2504b5b1a308..f53d017c7c22 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -362,10 +362,10 @@ static u8 symbol__parent_filter(const struct symbol *parent)
 	return 0;
 }
 
-static struct hist_entry *add_hist_entry(struct hists *hists,
-					 struct hist_entry *entry,
-					 struct addr_location *al,
-					 bool sample_self)
+static struct hist_entry *hists__findnew_entry(struct hists *hists,
+					       struct hist_entry *entry,
+					       struct addr_location *al,
+					       bool sample_self)
 {
 	struct rb_node **p;
 	struct rb_node *parent = NULL;
@@ -468,7 +468,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists,
 		.transaction = transaction,
 	};
 
-	return add_hist_entry(hists, &entry, al, sample_self);
+	return hists__findnew_entry(hists, &entry, al, sample_self);
 }
 
 static int
@@ -548,9 +548,9 @@ iter_finish_mem_entry(struct hist_entry_iter *iter,
 
 out:
 	/*
-	 * We don't need to free iter->priv (mem_info) here since
-	 * the mem info was either already freed in add_hist_entry() or
-	 * passed to a new hist entry by hist_entry__new().
+	 * We don't need to free iter->priv (mem_info) here since the mem info
+	 * was either already freed in hists__findnew_entry() or passed to a
+	 * new hist entry by hist_entry__new().
 	 */
 	iter->priv = NULL;
 

From 86c19525b7e953217e5ad2b5496029b1ac6fe26b Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 19 May 2015 19:07:42 -0300
Subject: [PATCH 197/305] perf comm: Use atomic.h for refcounting

Now that we have atomic.h, we should convert all of the existing
refcounts to use it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-quzeuy3jwsyod6e06o39cl6y@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/comm.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c
index b2bb59df65e1..21b7ff382c3f 100644
--- a/tools/perf/util/comm.c
+++ b/tools/perf/util/comm.c
@@ -2,24 +2,27 @@
 #include "util.h"
 #include <stdlib.h>
 #include <stdio.h>
+#include <linux/atomic.h>
 
 struct comm_str {
 	char *str;
 	struct rb_node rb_node;
-	int ref;
+	atomic_t refcnt;
 };
 
 /* Should perhaps be moved to struct machine */
 static struct rb_root comm_str_root;
 
-static void comm_str__get(struct comm_str *cs)
+static struct comm_str *comm_str__get(struct comm_str *cs)
 {
-	cs->ref++;
+	if (cs)
+		atomic_inc(&cs->refcnt);
+	return cs;
 }
 
 static void comm_str__put(struct comm_str *cs)
 {
-	if (!--cs->ref) {
+	if (cs && atomic_dec_and_test(&cs->refcnt)) {
 		rb_erase(&cs->rb_node, &comm_str_root);
 		zfree(&cs->str);
 		free(cs);
@@ -40,6 +43,8 @@ static struct comm_str *comm_str__alloc(const char *str)
 		return NULL;
 	}
 
+	atomic_set(&cs->refcnt, 0);
+
 	return cs;
 }
 

From 8e160b2e1e3efdd84ddef726f9b5136dd192a682 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 19 May 2015 20:07:14 -0300
Subject: [PATCH 198/305] perf machine: Do not call map_groups__delete(), drop
 refcnt instead

It could be used somewhere, so just call map__groups_put() to make sure
we don't delete it prematurely

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-dxmh8mr12i65p8h909vi88cp@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index daa55910ff28..7ec3188d3cb3 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -333,7 +333,7 @@ static void machine__update_thread_pid(struct machine *machine,
 		if (!map_groups__empty(th->mg))
 			pr_err("Discarding thread maps for %d:%d\n",
 			       th->pid_, th->tid);
-		map_groups__delete(th->mg);
+		map_groups__put(th->mg);
 	}
 
 	th->mg = map_groups__get(leader->mg);

From 71ff824a60a7b0d9d0746e6e237fe4735077e5b4 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 21 May 2015 01:03:39 +0900
Subject: [PATCH 199/305] perf tools: Fix dso__data_read_offset() file opening

When dso__data_read_offset/addr() is called without prior dso__data_fd()
(or other functions which call it internally), it failed to open dso in
data_file_size() since its binary type was not identified.

However calling dso__data_fd() in dso__data_read_offset() will hurt
performance as it grabs a global lock everytime.  So factor out the loop
on the binary type in dso__data_fd(), and call it from both.

Reported-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1432137821-10853-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c | 59 +++++++++++++++++++++++--------------------
 1 file changed, 32 insertions(+), 27 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 1b96c8d18435..516e0c25ea16 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -440,15 +440,7 @@ void dso__data_close(struct dso *dso)
 	pthread_mutex_unlock(&dso__data_open_lock);
 }
 
-/**
- * dso__data_fd - Get dso's data file descriptor
- * @dso: dso object
- * @machine: machine object
- *
- * External interface to find dso's file, open it and
- * returns file descriptor.
- */
-int dso__data_fd(struct dso *dso, struct machine *machine)
+static void try_to_open_dso(struct dso *dso, struct machine *machine)
 {
 	enum dso_binary_type binary_type_data[] = {
 		DSO_BINARY_TYPE__BUILD_ID_CACHE,
@@ -457,13 +449,8 @@ int dso__data_fd(struct dso *dso, struct machine *machine)
 	};
 	int i = 0;
 
-	if (dso->data.status == DSO_DATA_STATUS_ERROR)
-		return -1;
-
-	pthread_mutex_lock(&dso__data_open_lock);
-
 	if (dso->data.fd >= 0)
-		goto out;
+		return;
 
 	if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) {
 		dso->data.fd = open_dso(dso, machine);
@@ -483,8 +470,25 @@ out:
 		dso->data.status = DSO_DATA_STATUS_OK;
 	else
 		dso->data.status = DSO_DATA_STATUS_ERROR;
+}
 
+/**
+ * dso__data_fd - Get dso's data file descriptor
+ * @dso: dso object
+ * @machine: machine object
+ *
+ * External interface to find dso's file, open it and
+ * returns file descriptor.
+ */
+int dso__data_fd(struct dso *dso, struct machine *machine)
+{
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
+	pthread_mutex_lock(&dso__data_open_lock);
+	try_to_open_dso(dso, machine);
 	pthread_mutex_unlock(&dso__data_open_lock);
+
 	return dso->data.fd;
 }
 
@@ -609,13 +613,12 @@ dso_cache__read(struct dso *dso, struct machine *machine,
 		 * dso->data.fd might be closed if other thread opened another
 		 * file (dso) due to open file limit (RLIMIT_NOFILE).
 		 */
+		try_to_open_dso(dso, machine);
+
 		if (dso->data.fd < 0) {
-			dso->data.fd = open_dso(dso, machine);
-			if (dso->data.fd < 0) {
-				ret = -errno;
-				dso->data.status = DSO_DATA_STATUS_ERROR;
-				break;
-			}
+			ret = -errno;
+			dso->data.status = DSO_DATA_STATUS_ERROR;
+			break;
 		}
 
 		cache_offset = offset & DSO__DATA_CACHE_MASK;
@@ -702,19 +705,21 @@ static int data_file_size(struct dso *dso, struct machine *machine)
 	if (dso->data.file_size)
 		return 0;
 
+	if (dso->data.status == DSO_DATA_STATUS_ERROR)
+		return -1;
+
 	pthread_mutex_lock(&dso__data_open_lock);
 
 	/*
 	 * dso->data.fd might be closed if other thread opened another
 	 * file (dso) due to open file limit (RLIMIT_NOFILE).
 	 */
+	try_to_open_dso(dso, machine);
+
 	if (dso->data.fd < 0) {
-		dso->data.fd = open_dso(dso, machine);
-		if (dso->data.fd < 0) {
-			ret = -errno;
-			dso->data.status = DSO_DATA_STATUS_ERROR;
-			goto out;
-		}
+		ret = -errno;
+		dso->data.status = DSO_DATA_STATUS_ERROR;
+		goto out;
 	}
 
 	if (fstat(dso->data.fd, &st) < 0) {

From e840238d7c6afcde0f6402aac3a74723ee9c448f Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 21 May 2015 01:03:40 +0900
Subject: [PATCH 200/305] perf tools: Get rid of dso__data_fd() from
 dso__data_size()

It seems that the dso__data_fd() was needed to find a binary type
since open in data_file_size() alone used to fail.

But as it can open the dso fine now, the dso__data_fd() can go away.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1432137821-10853-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 516e0c25ea16..e95e850dd832 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -745,12 +745,6 @@ out:
  */
 off_t dso__data_size(struct dso *dso, struct machine *machine)
 {
-	int fd;
-
-	fd = dso__data_fd(dso, machine);
-	if (fd < 0)
-		return fd;
-
 	if (data_file_size(dso, machine))
 		return -1;
 

From 4bb11d012ab248d0e383008d725be0d26a74fac2 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Thu, 21 May 2015 01:03:41 +0900
Subject: [PATCH 201/305] perf tools: Add dso__data_get/put_fd()

Using dso__data_fd() in multi-thread environment is not safe since
returned fd can be closed and/or reused anytime.

So convert it to the dso__data_get/put_fd() pair to protect the access
with lock.

The original dso__data_fd() is deprecated and kept only for testing.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Acked-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1432137821-10853-3-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/dso-data.c        | 11 +++++++++++
 tools/perf/util/dso.c              | 31 +++++++++++++++++++++---------
 tools/perf/util/dso.h              | 13 +++++++++----
 tools/perf/util/unwind-libunwind.c | 11 ++++++++---
 4 files changed, 50 insertions(+), 16 deletions(-)

diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 513e5febbe5a..3e41c61bd861 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -99,6 +99,17 @@ struct test_data_offset offsets[] = {
 	},
 };
 
+/* move it from util/dso.c for compatibility */
+static int dso__data_fd(struct dso *dso, struct machine *machine)
+{
+	int fd = dso__data_get_fd(dso, machine);
+
+	if (fd >= 0)
+		dso__data_put_fd(dso);
+
+	return fd;
+}
+
 int test__dso_data(void)
 {
 	struct machine machine;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index e95e850dd832..7e11a700303f 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -473,25 +473,35 @@ out:
 }
 
 /**
- * dso__data_fd - Get dso's data file descriptor
+ * dso__data_get_fd - Get dso's data file descriptor
  * @dso: dso object
  * @machine: machine object
  *
  * External interface to find dso's file, open it and
- * returns file descriptor.
+ * returns file descriptor.  It should be paired with
+ * dso__data_put_fd() if it returns non-negative value.
  */
-int dso__data_fd(struct dso *dso, struct machine *machine)
+int dso__data_get_fd(struct dso *dso, struct machine *machine)
 {
 	if (dso->data.status == DSO_DATA_STATUS_ERROR)
 		return -1;
 
-	pthread_mutex_lock(&dso__data_open_lock);
+	if (pthread_mutex_lock(&dso__data_open_lock) < 0)
+		return -1;
+
 	try_to_open_dso(dso, machine);
-	pthread_mutex_unlock(&dso__data_open_lock);
+
+	if (dso->data.fd < 0)
+		pthread_mutex_unlock(&dso__data_open_lock);
 
 	return dso->data.fd;
 }
 
+void dso__data_put_fd(struct dso *dso __maybe_unused)
+{
+	pthread_mutex_unlock(&dso__data_open_lock);
+}
+
 bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by)
 {
 	u32 flag = 1 << by;
@@ -1199,12 +1209,15 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp)
 enum dso_type dso__type(struct dso *dso, struct machine *machine)
 {
 	int fd;
+	enum dso_type type = DSO__TYPE_UNKNOWN;
 
-	fd = dso__data_fd(dso, machine);
-	if (fd < 0)
-		return DSO__TYPE_UNKNOWN;
+	fd = dso__data_get_fd(dso, machine);
+	if (fd >= 0) {
+		type = dso__type_fd(fd);
+		dso__data_put_fd(dso);
+	}
 
-	return dso__type_fd(fd);
+	return type;
 }
 
 int dso__strerror_load(struct dso *dso, char *buf, size_t buflen)
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index b26ec3ab1336..bcec06ad73a2 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -240,7 +240,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 
 /*
  * The dso__data_* external interface provides following functions:
- *   dso__data_fd
+ *   dso__data_get_fd
+ *   dso__data_put_fd
  *   dso__data_close
  *   dso__data_size
  *   dso__data_read_offset
@@ -257,8 +258,11 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
  * The current usage of the dso__data_* interface is as follows:
  *
  * Get DSO's fd:
- *   int fd = dso__data_fd(dso, machine);
- *   USE 'fd' SOMEHOW
+ *   int fd = dso__data_get_fd(dso, machine);
+ *   if (fd >= 0) {
+ *       USE 'fd' SOMEHOW
+ *       dso__data_put_fd(dso);
+ *   }
  *
  * Read DSO's data:
  *   n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE);
@@ -277,7 +281,8 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
  *
  * TODO
 */
-int dso__data_fd(struct dso *dso, struct machine *machine);
+int dso__data_get_fd(struct dso *dso, struct machine *machine);
+void dso__data_put_fd(struct dso *dso __maybe_unused);
 void dso__data_close(struct dso *dso);
 
 off_t dso__data_size(struct dso *dso, struct machine *machine);
diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index 7b09a443a280..f079b63f0b7f 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -269,13 +269,14 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine,
 	u64 offset = dso->data.eh_frame_hdr_offset;
 
 	if (offset == 0) {
-		fd = dso__data_fd(dso, machine);
+		fd = dso__data_get_fd(dso, machine);
 		if (fd < 0)
 			return -EINVAL;
 
 		/* Check the .eh_frame section for unwinding info */
 		offset = elf_section_offset(fd, ".eh_frame_hdr");
 		dso->data.eh_frame_hdr_offset = offset;
+		dso__data_put_fd(dso);
 	}
 
 	if (offset)
@@ -294,13 +295,14 @@ static int read_unwind_spec_debug_frame(struct dso *dso,
 	u64 ofs = dso->data.debug_frame_offset;
 
 	if (ofs == 0) {
-		fd = dso__data_fd(dso, machine);
+		fd = dso__data_get_fd(dso, machine);
 		if (fd < 0)
 			return -EINVAL;
 
 		/* Check the .debug_frame section for unwinding info */
 		ofs = elf_section_offset(fd, ".debug_frame");
 		dso->data.debug_frame_offset = ofs;
+		dso__data_put_fd(dso);
 	}
 
 	*offset = ofs;
@@ -353,10 +355,13 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
 #ifndef NO_LIBUNWIND_DEBUG_FRAME
 	/* Check the .debug_frame section for unwinding info */
 	if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) {
-		int fd = dso__data_fd(map->dso, ui->machine);
+		int fd = dso__data_get_fd(map->dso, ui->machine);
 		int is_exec = elf_is_exec(fd, map->dso->name);
 		unw_word_t base = is_exec ? 0 : map->start;
 
+		if (fd >= 0)
+			dso__data_put_fd(dso);
+
 		memset(&di, 0, sizeof(di));
 		if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name,
 					   map->start, map->end))

From 4d4dee9a9609819309a84cd3f2d19dcc50ece195 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 21 May 2015 17:48:33 -0300
Subject: [PATCH 202/305] perf tools: Rename maps__next

It really is a 'struct map' method, and since we're introducing a new
'struct maps' class, fix it to avoid confusion.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-xo9ifhk53cfl30wqcuhxpnvl@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c | 2 +-
 tools/perf/util/map.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 2d20c5ff8653..09a62731e035 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -775,7 +775,7 @@ struct map *maps__first(struct rb_root *maps)
 	return NULL;
 }
 
-struct map *maps__next(struct map *map)
+struct map *map__next(struct map *map)
 {
 	struct rb_node *next = rb_next(&map->rb_node);
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 7f39217d29bf..aba9569165ea 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -166,7 +166,7 @@ void maps__insert(struct rb_root *maps, struct map *map);
 void maps__remove(struct rb_root *maps, struct map *map);
 struct map *maps__find(struct rb_root *maps, u64 addr);
 struct map *maps__first(struct rb_root *maps);
-struct map *maps__next(struct map *map);
+struct map *map__next(struct map *map);
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
 int map_groups__clone(struct map_groups *mg,
@@ -201,7 +201,7 @@ static inline struct map *map_groups__first(struct map_groups *mg,
 
 static inline struct map *map_groups__next(struct map *map)
 {
-	return maps__next(map);
+	return map__next(map);
 }
 
 struct symbol *map_groups__find_symbol(struct map_groups *mg,

From fdce6a4edaada40136f0e61569b938c9a25f61d5 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 May 2015 17:42:37 -0300
Subject: [PATCH 203/305] perf tools: Remove redundant initialization of thread
 linkage members

A thread moves from a rb tree to a list, but can't be on both, because
those linkage members are in a union. This is leftover from when I was
debugging thread refcounting and had nuked that union.

It is harmless duplication, as RB_CLEAR_NODE() does again what
INIT_LIST_HEAD does.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-hmma9lmip6qlhzhgkhp9tzd1@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/thread.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 16c28a37a9e4..28c4b746baa1 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -54,7 +54,6 @@ struct thread *thread__new(pid_t pid, pid_t tid)
 
 		list_add(&comm->list, &thread->comm_list);
 		atomic_set(&thread->refcnt, 0);
-		INIT_LIST_HEAD(&thread->node);
 		RB_CLEAR_NODE(&thread->rb_node);
 	}
 
@@ -70,7 +69,6 @@ void thread__delete(struct thread *thread)
 	struct comm *comm, *tmp;
 
 	BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));
-	BUG_ON(!list_empty(&thread->node));
 
 	thread_stack__free(thread);
 

From f7e365eb61d4d78f2f5e66d859664048c2921df2 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 25 May 2015 18:03:44 -0300
Subject: [PATCH 204/305] perf tools: Nuke unused map_groups__flush()

Since:

	9fdbf671ba7e "perf tools: do not flush maps on COMM for perf report"

We have no users of this function, nuke it.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Luigi Semenzato <semenzato@chromium.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-hsac1t42ehtva8gut8qe6hih@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c | 22 ----------------------
 tools/perf/util/map.h |  2 --
 2 files changed, 24 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 09a62731e035..c1bfd0a12a94 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -498,28 +498,6 @@ void map_groups__put(struct map_groups *mg)
 		map_groups__delete(mg);
 }
 
-void map_groups__flush(struct map_groups *mg)
-{
-	int type;
-
-	for (type = 0; type < MAP__NR_TYPES; type++) {
-		struct rb_root *root = &mg->maps[type];
-		struct rb_node *next = rb_first(root);
-
-		while (next) {
-			struct map *pos = rb_entry(next, struct map, rb_node);
-			next = rb_next(&pos->rb_node);
-			rb_erase(&pos->rb_node, root);
-			/*
-			 * We may have references to this map, for
-			 * instance in some hist_entry instances, so
-			 * just move them to a separate list.
-			 */
-			list_add_tail(&pos->node, &mg->removed_maps[pos->type]);
-		}
-	}
-}
-
 struct symbol *map_groups__find_symbol(struct map_groups *mg,
 				       enum map_type type, u64 addr,
 				       struct map **mapp,
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index aba9569165ea..f2b27566d986 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -233,6 +233,4 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 struct map *map_groups__find_by_name(struct map_groups *mg,
 				     enum map_type type, const char *name);
 
-void map_groups__flush(struct map_groups *mg);
-
 #endif /* __PERF_MAP_H */

From 9402e23f90c5a672db7170e4c0f1fc80ca8c009a Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 25 May 2015 11:49:11 -0300
Subject: [PATCH 205/305] perf tools: Import rb_erase_init from block/ in the
 kernel sources

I was assuming rb_erase() was setting things up like list_del_init, but
the fact that thread__delete() was being sucessfull is because the last
thing before deleting is to remove the thread from the
machine->dead_threads list, using list_del_init(), that has the same
effect as using rb_erase_init()...

Introduce this function so that we can use it when removing objects from
rb_trees.

Then we will be able to BUG_ON(still on a list) in destructors.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-55b16mbtndjyd7zzg8nmnamx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/include/linux/rbtree.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h
index 2a030c5af3aa..f06d89f0b867 100644
--- a/tools/perf/util/include/linux/rbtree.h
+++ b/tools/perf/util/include/linux/rbtree.h
@@ -1,2 +1,16 @@
+#ifndef __TOOLS_LINUX_PERF_RBTREE_H
+#define __TOOLS_LINUX_PERF_RBTREE_H
 #include <stdbool.h>
 #include "../../../../include/linux/rbtree.h"
+
+/*
+ * Handy for checking that we are not deleting an entry that is
+ * already in a list, found in block/{blk-throttle,cfq-iosched}.c,
+ * probably should be moved to lib/rbtree.c...
+ */
+static inline void rb_erase_init(struct rb_node *n, struct rb_root *root)
+{
+	rb_erase(n, root);
+	RB_CLEAR_NODE(n);
+}
+#endif /* __TOOLS_LINUX_PERF_RBTREE_H */

From 0170b14f5f5462524d05ee96275b7a0a0d34ae77 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 25 May 2015 15:23:05 -0300
Subject: [PATCH 206/305] perf machine: Mark removed threads as such

We use:

  BUG_ON(!RB_EMPTY_NODE(&thread->rb_node));

in the thread destructor as a debugging check to find out about
possibly still referenced thread instances being deleted, to do that
we need to make sure we use RB_CLEAR_NODE() right after rb_erase(),
i.e. that we use the newly introduced rb_erase_init(), that works
just like list_del_init().

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-4fcqo5ypy1cjjf15ilb0hn78@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 7ec3188d3cb3..6bf845758ae3 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -400,7 +400,7 @@ static struct thread *____machine__findnew_thread(struct machine *machine,
 		 * leader and that would screwed the rb tree.
 		 */
 		if (thread__init_map_groups(th, machine)) {
-			rb_erase(&th->rb_node, &machine->threads);
+			rb_erase_init(&th->rb_node, &machine->threads);
 			RB_CLEAR_NODE(&th->rb_node);
 			thread__delete(th);
 			return NULL;
@@ -1314,7 +1314,7 @@ static void __machine__remove_thread(struct machine *machine, struct thread *th,
 	BUG_ON(atomic_read(&th->refcnt) == 0);
 	if (lock)
 		pthread_rwlock_wrlock(&machine->threads_lock);
-	rb_erase(&th->rb_node, &machine->threads);
+	rb_erase_init(&th->rb_node, &machine->threads);
 	RB_CLEAR_NODE(&th->rb_node);
 	/*
 	 * Move it first to the dead_threads list, then drop the reference,

From 614c6b570d5157c2cf835d334bc89af071fc2e44 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 25 May 2015 16:21:53 -0300
Subject: [PATCH 207/305] perf tools: Leave DSO destruction to the map
 destruction

As the way DSOs are created are normally via dsos__findnew, so that we
don't have to load the same dso multiple times for multiple maps (think
about /lib64/libc.so.6), so they may be shared and dso__delete() should
be left to be done as part of the map destruction process.

This will all be properly solved by reference counting struct dso, which
will be done soon.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-gbrohe1nvkjxw3u5a1bgj3yh@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 1faa1e67398b..db6021834e8f 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -195,7 +195,6 @@ static void put_target_map(struct map *map, bool user)
 {
 	if (map && user) {
 		/* Only the user map needs to be released */
-		dso__delete(map->dso);
 		map__delete(map);
 	}
 }
@@ -1791,7 +1790,6 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
 
 out:
 	if (map && !is_kprobe) {
-		dso__delete(map->dso);
 		map__delete(map);
 	}
 
@@ -2884,7 +2882,6 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 	dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
 end:
 	if (user) {
-		dso__delete(map->dso);
 		map__delete(map);
 	}
 	exit_symbol_maps();

From 4bb7123dcfa7aa1d963ad4a8f01b88d54a2bb873 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 May 2015 11:52:22 -0300
Subject: [PATCH 208/305] perf tools: Use maps__first()/map__next()

In a few more remaining places, for consistency.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-c2n7slwtto29wndfttdrhfrx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/vmlinux-kallsyms.c | 34 ++++++++++++++---------------
 tools/perf/util/event.c             |  7 +++---
 tools/perf/util/map.c               |  7 +++---
 tools/perf/util/probe-event.c       |  6 ++---
 tools/perf/util/symbol.c            | 23 +++++++++----------
 5 files changed, 37 insertions(+), 40 deletions(-)

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 3d9088003a5b..94ac6924df65 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -23,9 +23,10 @@ int test__vmlinux_matches_kallsyms(void)
 	int err = -1;
 	struct rb_node *nd;
 	struct symbol *sym;
-	struct map *kallsyms_map, *vmlinux_map;
+	struct map *kallsyms_map, *vmlinux_map, *map;
 	struct machine kallsyms, vmlinux;
 	enum map_type type = MAP__FUNCTION;
+	struct rb_root *maps = &vmlinux.kmaps.maps[type];
 	u64 mem_start, mem_end;
 
 	/*
@@ -184,8 +185,8 @@ detour:
 
 	pr_info("Maps only in vmlinux:\n");
 
-	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
-		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		struct map *
 		/*
 		 * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while
 		 * the kernel will have the path for the vmlinux file being used,
@@ -193,22 +194,22 @@ detour:
 		 * both cases.
 		 */
 		pair = map_groups__find_by_name(&kallsyms.kmaps, type,
-						(pos->dso->kernel ?
-							pos->dso->short_name :
-							pos->dso->name));
+						(map->dso->kernel ?
+							map->dso->short_name :
+							map->dso->name));
 		if (pair)
 			pair->priv = 1;
 		else
-			map__fprintf(pos, stderr);
+			map__fprintf(map, stderr);
 	}
 
 	pr_info("Maps in vmlinux with a different name in kallsyms:\n");
 
-	for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) {
-		struct map *pos = rb_entry(nd, struct map, rb_node), *pair;
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		struct map *pair;
 
-		mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start);
-		mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end);
+		mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start);
+		mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end);
 
 		pair = map_groups__find(&kallsyms.kmaps, type, mem_start);
 		if (pair == NULL || pair->priv)
@@ -217,7 +218,7 @@ detour:
 		if (pair->start == mem_start) {
 			pair->priv = 1;
 			pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as",
-				pos->start, pos->end, pos->pgoff, pos->dso->name);
+				map->start, map->end, map->pgoff, map->dso->name);
 			if (mem_end != pair->end)
 				pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64,
 					pair->start, pair->end, pair->pgoff);
@@ -228,12 +229,11 @@ detour:
 
 	pr_info("Maps only in kallsyms:\n");
 
-	for (nd = rb_first(&kallsyms.kmaps.maps[type]);
-	     nd; nd = rb_next(nd)) {
-		struct map *pos = rb_entry(nd, struct map, rb_node);
+	maps = &kallsyms.kmaps.maps[type];
 
-		if (!pos->priv)
-			map__fprintf(pos, stderr);
+	for (map = maps__first(maps); map; map = map__next(map)) {
+		if (!map->priv)
+			map__fprintf(map, stderr);
 	}
 out:
 	machine__exit(&kallsyms);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index a513a51f7330..9d3bba175423 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -329,8 +329,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
 				   struct machine *machine)
 {
 	int rc = 0;
-	struct rb_node *nd;
+	struct map *pos;
 	struct map_groups *kmaps = &machine->kmaps;
+	struct rb_root *maps = &kmaps->maps[MAP__FUNCTION];
 	union perf_event *event = zalloc((sizeof(event->mmap) +
 					  machine->id_hdr_size));
 	if (event == NULL) {
@@ -350,10 +351,8 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
 	else
 		event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL;
 
-	for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]);
-	     nd; nd = rb_next(nd)) {
+	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
 		size_t size;
-		struct map *pos = rb_entry(nd, struct map, rb_node);
 
 		if (pos->dso->kernel)
 			continue;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index c1bfd0a12a94..898ab92a98dd 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -688,9 +688,10 @@ move_map:
 int map_groups__clone(struct map_groups *mg,
 		      struct map_groups *parent, enum map_type type)
 {
-	struct rb_node *nd;
-	for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) {
-		struct map *map = rb_entry(nd, struct map, rb_node);
+	struct map *map;
+	struct rb_root *maps = &parent->maps[type];
+
+	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *new = map__clone(map);
 		if (new == NULL)
 			return -ENOMEM;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index db6021834e8f..092256516262 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -162,8 +162,9 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc)
 
 static struct map *kernel_get_module_map(const char *module)
 {
-	struct rb_node *nd;
 	struct map_groups *grp = &host_machine->kmaps;
+	struct rb_root *maps = &grp->maps[MAP__FUNCTION];
+	struct map *pos;
 
 	/* A file path -- this is an offline module */
 	if (module && strchr(module, '/'))
@@ -172,8 +173,7 @@ static struct map *kernel_get_module_map(const char *module)
 	if (!module)
 		module = "kernel";
 
-	for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) {
-		struct map *pos = rb_entry(nd, struct map, rb_node);
+	for (pos = maps__first(maps); pos; pos = map__next(pos)) {
 		if (strncmp(pos->dso->short_name + 1, module,
 			    pos->dso->short_name_len - 2) == 0) {
 			return pos;
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 82a31fd0fcf5..00b6b17e74a7 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -202,18 +202,16 @@ void symbols__fixup_end(struct rb_root *symbols)
 
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 {
-	struct map *prev, *curr;
-	struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]);
+	struct rb_root *maps = &mg->maps[type];
+	struct map *next, *curr;
 
-	if (prevnd == NULL)
+	curr = maps__first(maps);
+	if (curr == NULL)
 		return;
 
-	curr = rb_entry(prevnd, struct map, rb_node);
-
-	for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) {
-		prev = curr;
-		curr = rb_entry(nd, struct map, rb_node);
-		prev->end = curr->start;
+	for (next = map__next(curr); next; next = map__next(curr)) {
+		curr->end = next->start;
+		curr = next;
 	}
 
 	/*
@@ -1522,11 +1520,10 @@ out:
 struct map *map_groups__find_by_name(struct map_groups *mg,
 				     enum map_type type, const char *name)
 {
-	struct rb_node *nd;
-
-	for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
-		struct map *map = rb_entry(nd, struct map, rb_node);
+	struct rb_root *maps = &mg->maps[type];
+	struct map *map;
 
+	for (map = maps__first(maps); map; map = map__next(map)) {
 		if (map->dso && strcmp(map->dso->short_name, name) == 0)
 			return map;
 	}

From 5bcaaca3e4d15ce39008a0b9c431c0ac4be784bd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Li=C5=A1ka?= <mliska@suse.cz>
Date: Tue, 26 May 2015 11:41:37 -0300
Subject: [PATCH 209/305] perf tools: Assign default value for some pointers

Assign default value for pointers that are identified by the compiler as
non-initialized.

Signed-off-by: Martin Liska <mliska@suse.cz>
Acked-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/5564393C.1090104@suse.cz
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/common.c            | 2 +-
 tools/perf/util/symbol.c            | 2 +-
 tools/perf/util/trace-event-parse.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c
index 49776f190abf..b7bb42c44694 100644
--- a/tools/perf/arch/common.c
+++ b/tools/perf/arch/common.c
@@ -61,7 +61,7 @@ const char *const mips_triplets[] = {
 static bool lookup_path(char *name)
 {
 	bool found = false;
-	char *path, *tmp;
+	char *path, *tmp = NULL;
 	char buf[PATH_MAX];
 	char *env = getenv("PATH");
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 00b6b17e74a7..b9e3eb581884 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -398,7 +398,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols,
 					    const char *name)
 {
 	struct rb_node *n;
-	struct symbol_name_rb_node *s;
+	struct symbol_name_rb_node *s = NULL;
 
 	if (symbols == NULL)
 		return NULL;
diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c
index 25d6c737be3e..d4957418657e 100644
--- a/tools/perf/util/trace-event-parse.c
+++ b/tools/perf/util/trace-event-parse.c
@@ -173,7 +173,7 @@ void parse_ftrace_printk(struct pevent *pevent,
 	char *line;
 	char *next = NULL;
 	char *addr_str;
-	char *fmt;
+	char *fmt = NULL;
 
 	line = strtok_r(file, "\n", &next);
 	while (line) {

From e8b7ea4356fdd3c4de5478f3418eb84f8dce2b61 Mon Sep 17 00:00:00 2001
From: Martin Liska <mliska@suse.cz>
Date: Tue, 26 May 2015 12:23:24 -0300
Subject: [PATCH 210/305] perf tools: Improve setting of gcc debug option

Correct debugging experience is given by passing -Og to compiler.

Do it in a way that supports older compilers

Signed-off-by: Martin Liska <mliska@suse.cz>
Acked-by: Ingo Molnar <mingo@kernel.org>
Link: http://lkml.kernel.org/r/5564393C.1090104@suse.cz
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/config/Makefile      |  2 ++
 tools/perf/config/utilities.mak | 19 +++++++++++++++++++
 2 files changed, 21 insertions(+)

diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index e3b3724e73ff..317001c94660 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -130,6 +130,8 @@ endif
 
 ifeq ($(DEBUG),0)
   CFLAGS += -O6
+else
+  CFLAGS += $(call cc-option,-Og,-O0)
 endif
 
 ifdef PARSER_DEBUG
diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak
index c16ce833079c..0ebef09c0842 100644
--- a/tools/perf/config/utilities.mak
+++ b/tools/perf/config/utilities.mak
@@ -177,3 +177,22 @@ $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2)))
 endef
 _ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2)))
 _gea_err  = $(if $(1),$(error Please set '$(1)' appropriately))
+
+# try-run
+# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise)
+# Exit code chooses option. "$$TMP" is can be used as temporary file and
+# is automatically cleaned up.
+try-run = $(shell set -e;		\
+	TMP="$(TMPOUT).$$$$.tmp";	\
+	TMPO="$(TMPOUT).$$$$.o";	\
+	if ($(1)) >/dev/null 2>&1;	\
+	then echo "$(2)";		\
+	else echo "$(3)";		\
+	fi;				\
+	rm -f "$$TMP" "$$TMPO")
+
+# cc-option
+# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586)
+
+cc-option = $(call try-run,\
+	$(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2))

From 2f80dd4488c204a4850554746eb31f25f5a84405 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 22 May 2015 09:18:40 -0400
Subject: [PATCH 211/305] perf sched: Add option to merge like comms to lat
 output

Sometimes when debugging large multi-threaded applications it is helpful
to collate all of the latency numbers into one bulk record to get an
idea of what is going on.

This patch does this by merging any entries that belong to the same comm
into one entry and then spits out those totals.

I've also slightly changed the output so you can see how many threads
were merged in the processing.  Here is the new default output format

 -----------------------------------------------------------------------------------------------------------
  Task                 | Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at    |
 -----------------------------------------------------------------------------------------------------------
  chrome:(23)          |  740.878 ms |     2612 | avg:    0.022 ms | max:    0.845 ms | max at: 7935.254223 s
  pulseaudio:1523      |   94.440 ms |      597 | avg:    0.027 ms | max:    0.110 ms | max at: 7934.668372 s
  threaded-ml:6042     |   72.554 ms |      386 | avg:    0.035 ms | max:    1.186 ms | max at: 7935.330911 s
  Chrome_IOThread:3832 |   52.388 ms |      456 | avg:    0.021 ms | max:    1.365 ms | max at: 7935.330602 s
  Chrome_ChildIOT:(7)  |   50.694 ms |      743 | avg:    0.021 ms | max:    1.448 ms | max at: 7935.256659 s
  Compositor:5510      |   30.012 ms |      192 | avg:    0.019 ms | max:    0.131 ms | max at: 7936.636815 s
  plugin_audio_th:6043 |   24.828 ms |      314 | avg:    0.018 ms | max:    0.143 ms | max at: 7936.205994 s
  CompositorTileW:(2)  |   14.099 ms |       45 | avg:    0.022 ms | max:    0.153 ms | max at: 7937.521800 s

the (#) after the task is the number of tasks merged, and then if there were
no tasks merged it just shows the pid.  Here is the same trace file with the -p
option to print the per-pid latency numbers

 -----------------------------------------------------------------------------------------------------------
  Task                 | Runtime ms  | Switches | Average delay ms | Maximum delay ms | Maximum delay at    |
 -----------------------------------------------------------------------------------------------------------
  chrome:5500          |  386.872 ms |      387 | avg:    0.023 ms | max:    0.241 ms | max at: 7936.001694 s
  pulseaudio:1523      |   94.440 ms |      597 | avg:    0.027 ms | max:    0.110 ms | max at: 7934.668372 s
  threaded-ml:6042     |   72.554 ms |      386 | avg:    0.035 ms | max:    1.186 ms | max at: 7935.330911 s
  chrome:10226         |   69.710 ms |      251 | avg:    0.023 ms | max:    0.764 ms | max at: 7935.992305 s
  chrome:4267          |   64.551 ms |      418 | avg:    0.021 ms | max:    0.294 ms | max at: 7937.862427 s
  chrome:4827          |   62.268 ms |       54 | avg:    0.029 ms | max:    0.666 ms | max at: 7935.992813 s
  Chrome_IOThread:3832 |   52.388 ms |      456 | avg:    0.021 ms | max:    1.365 ms | max at: 7935.330602 s
  chrome:3776          |   46.150 ms |      349 | avg:    0.023 ms | max:    0.845 ms | max at: 7935.254223 s

Signed-off-by: Josef Bacik <jbacik@fb.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@fb.com
Link: http://lkml.kernel.org/r/1432300720-30478-1-git-send-email-jbacik@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-sched.c | 77 +++++++++++++++++++++++++++++++++++---
 1 file changed, 72 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 79273ecf92eb..33962612a5e9 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -95,6 +95,7 @@ struct work_atoms {
 	u64			total_lat;
 	u64			nb_atoms;
 	u64			total_runtime;
+	int			num_merged;
 };
 
 typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *);
@@ -168,9 +169,10 @@ struct perf_sched {
 	u64		 all_runtime;
 	u64		 all_count;
 	u64		 cpu_last_switched[MAX_CPUS];
-	struct rb_root	 atom_root, sorted_atom_root;
+	struct rb_root	 atom_root, sorted_atom_root, merged_atom_root;
 	struct list_head sort_list, cmp_pid;
 	bool force;
+	bool skip_merge;
 };
 
 static u64 get_nsecs(void)
@@ -1182,7 +1184,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_
 	sched->all_runtime += work_list->total_runtime;
 	sched->all_count   += work_list->nb_atoms;
 
-	ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
+	if (work_list->num_merged > 1)
+		ret = printf("  %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged);
+	else
+		ret = printf("  %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid);
 
 	for (i = 0; i < 24 - ret; i++)
 		printf(" ");
@@ -1302,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list)
 static void perf_sched__sort_lat(struct perf_sched *sched)
 {
 	struct rb_node *node;
-
+	struct rb_root *root = &sched->atom_root;
+again:
 	for (;;) {
 		struct work_atoms *data;
-		node = rb_first(&sched->atom_root);
+		node = rb_first(root);
 		if (!node)
 			break;
 
-		rb_erase(node, &sched->atom_root);
+		rb_erase(node, root);
 		data = rb_entry(node, struct work_atoms, node);
 		__thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list);
 	}
+	if (root == &sched->atom_root) {
+		root = &sched->merged_atom_root;
+		goto again;
+	}
 }
 
 static int process_sched_wakeup_event(struct perf_tool *tool,
@@ -1572,6 +1582,59 @@ static void print_bad_events(struct perf_sched *sched)
 	}
 }
 
+static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data)
+{
+	struct rb_node **new = &(root->rb_node), *parent = NULL;
+	struct work_atoms *this;
+	const char *comm = thread__comm_str(data->thread), *this_comm;
+
+	while (*new) {
+		int cmp;
+
+		this = container_of(*new, struct work_atoms, node);
+		parent = *new;
+
+		this_comm = thread__comm_str(this->thread);
+		cmp = strcmp(comm, this_comm);
+		if (cmp > 0) {
+			new = &((*new)->rb_left);
+		} else if (cmp < 0) {
+			new = &((*new)->rb_right);
+		} else {
+			this->num_merged++;
+			this->total_runtime += data->total_runtime;
+			this->nb_atoms += data->nb_atoms;
+			this->total_lat += data->total_lat;
+			list_splice(&data->work_list, &this->work_list);
+			if (this->max_lat < data->max_lat) {
+				this->max_lat = data->max_lat;
+				this->max_lat_at = data->max_lat_at;
+			}
+			zfree(&data);
+			return;
+		}
+	}
+
+	data->num_merged++;
+	rb_link_node(&data->node, parent, new);
+	rb_insert_color(&data->node, root);
+}
+
+static void perf_sched__merge_lat(struct perf_sched *sched)
+{
+	struct work_atoms *data;
+	struct rb_node *node;
+
+	if (sched->skip_merge)
+		return;
+
+	while ((node = rb_first(&sched->atom_root))) {
+		rb_erase(node, &sched->atom_root);
+		data = rb_entry(node, struct work_atoms, node);
+		__merge_work_atoms(&sched->merged_atom_root, data);
+	}
+}
+
 static int perf_sched__lat(struct perf_sched *sched)
 {
 	struct rb_node *next;
@@ -1581,6 +1644,7 @@ static int perf_sched__lat(struct perf_sched *sched)
 	if (perf_sched__read_events(sched))
 		return -1;
 
+	perf_sched__merge_lat(sched);
 	perf_sched__sort_lat(sched);
 
 	printf("\n -----------------------------------------------------------------------------------------------------------------\n");
@@ -1732,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		.profile_cpu	      = -1,
 		.next_shortname1      = 'A',
 		.next_shortname2      = '0',
+		.skip_merge           = 0,
 	};
 	const struct option latency_options[] = {
 	OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]",
@@ -1742,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused)
 		    "CPU to profile on"),
 	OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace,
 		    "dump raw trace in ASCII"),
+	OPT_BOOLEAN('p', "pids", &sched.skip_merge,
+		    "latency stats per pid instead of per comm"),
 	OPT_END()
 	};
 	const struct option replay_options[] = {

From 9b5d1c29556989aa9dc1240566e78806ddefd160 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 22 May 2015 14:53:58 +0300
Subject: [PATCH 212/305] perf tools: Disallow PMU events intel_pt and
 intel_bts until there is support

Disallow PMU events intel_pt and intel_bts until the tools support them.

By default any PMU is selectable as an event but until the tools have
intel_pt and intel_bts support using them would result in no data being
recorded without any indication as to why.

Before the change:

    $ perf record -e intel_bts// sleep 1
    [ perf record: Woken up 1 times to write data ]
    [ perf record: Captured and wrote 0.008 MB perf.data ]
    $ perf report --stdio
    Error:
    The perf.data file has no samples!

After the change:

    $ perf record -e intel_bts// sleep 1
    invalid or unsupported event: 'intel_bts//'
    Run 'perf list' for a list of valid events

Reported-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1432295653-13989-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 244c66f89891..5d3ab7c8ceaf 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -442,6 +442,10 @@ static struct perf_pmu *pmu_lookup(const char *name)
 	LIST_HEAD(aliases);
 	__u32 type;
 
+	/* No support for intel_bts or intel_pt so disallow them */
+	if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt"))
+		return NULL;
+
 	/*
 	 * The pmu data we store & need consists of the pmu
 	 * type value and format definitions. Load both right

From 419e87382873b11b17cb31e2f21859570a32e0d1 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 27 May 2015 17:37:18 +0900
Subject: [PATCH 213/305] perf probe: Show the error reason comes from invalid
 DSO

Show the reason of error when dso__load* fails. This shows when user
gives wrong kernel image or wrong path.

Without this, perf probe shows an obscure message:

  ----
  $ perf probe -k ~/kbin/linux-3.x86_64/vmlinux -L vfs_read
  Failed to find path of kernel module.
    Error: Failed to show lines.
  ----

With this, perf shows appropriate error message:

  ----
  $ perf probe -k ~/kbin/linux-3.x86_64/vmlinux -L vfs_read
  Failed to find the path for kernel: Mismatching build id
    Error: Failed to show lines.
  ----

And:

  ----
  $ perf probe -k /non-exist/kernel/vmlinux -L vfs_read
  Failed to find the path for kernel: No such file or directory
    Error: Failed to show lines.
  ----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Link: http://lkml.kernel.org/r/20150527083718.23880.84100.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 47 ++++++++++++++++++-----------------
 tools/perf/util/probe-event.h |  3 ---
 2 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 092256516262..f5be411bc69c 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -200,11 +200,12 @@ static void put_target_map(struct map *map, bool user)
 }
 
 
-static struct dso *kernel_get_module_dso(const char *module)
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
 {
 	struct dso *dso;
 	struct map *map;
 	const char *vmlinux_name;
+	int ret = 0;
 
 	if (module) {
 		list_for_each_entry(dso, &host_machine->kernel_dsos.head,
@@ -214,30 +215,21 @@ static struct dso *kernel_get_module_dso(const char *module)
 				goto found;
 		}
 		pr_debug("Failed to find module %s.\n", module);
-		return NULL;
+		return -ENOENT;
 	}
 
 	map = host_machine->vmlinux_maps[MAP__FUNCTION];
 	dso = map->dso;
 
 	vmlinux_name = symbol_conf.vmlinux_name;
-	if (vmlinux_name) {
-		if (dso__load_vmlinux(dso, map, vmlinux_name, false, NULL) <= 0)
-			return NULL;
-	} else {
-		if (dso__load_vmlinux_path(dso, map, NULL) <= 0) {
-			pr_debug("Failed to load kernel map.\n");
-			return NULL;
-		}
-	}
+	dso->load_errno = 0;
+	if (vmlinux_name)
+		ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
+	else
+		ret = dso__load_vmlinux_path(dso, map, NULL);
 found:
-	return dso;
-}
-
-const char *kernel_get_module_path(const char *module)
-{
-	struct dso *dso = kernel_get_module_dso(module);
-	return (dso) ? dso->long_name : NULL;
+	*pdso = dso;
+	return ret;
 }
 
 static int convert_exec_to_group(const char *exec, char **result)
@@ -389,16 +381,25 @@ static int get_alternative_line_range(struct debuginfo *dinfo,
 static struct debuginfo *open_debuginfo(const char *module, bool silent)
 {
 	const char *path = module;
-	struct debuginfo *ret;
+	char reason[STRERR_BUFSIZE];
+	struct debuginfo *ret = NULL;
+	struct dso *dso = NULL;
+	int err;
 
 	if (!module || !strchr(module, '/')) {
-		path = kernel_get_module_path(module);
-		if (!path) {
+		err = kernel_get_module_dso(module, &dso);
+		if (err < 0) {
+			if (!dso || dso->load_errno == 0) {
+				if (!strerror_r(-err, reason, STRERR_BUFSIZE))
+					strcpy(reason, "(unknown)");
+			} else
+				dso__strerror_load(dso, reason, STRERR_BUFSIZE);
 			if (!silent)
-				pr_err("Failed to find path of %s module.\n",
-				       module ?: "kernel");
+				pr_err("Failed to find the path for %s: %s\n",
+					module ?: "kernel", reason);
 			return NULL;
 		}
+		path = dso->long_name;
 	}
 	ret = debuginfo__new(path);
 	if (!ret && !silent) {
diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h
index 537eb329c2cf..31db6ee7db54 100644
--- a/tools/perf/util/probe-event.h
+++ b/tools/perf/util/probe-event.h
@@ -131,9 +131,6 @@ extern void line_range__clear(struct line_range *lr);
 /* Initialize line range */
 extern int line_range__init(struct line_range *lr);
 
-/* Internal use: Return kernel/module path */
-extern const char *kernel_get_module_path(const char *module);
-
 extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs);
 extern int del_perf_probe_events(struct strfilter *filter);
 extern int show_perf_probe_events(struct strfilter *filter);

From dddc7ee32fa13efc66afa71ebd83bce545c8392a Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 27 May 2015 17:37:25 +0900
Subject: [PATCH 214/305] perf probe: Fix an error when deleting probes
 successfully

Fix a bug in del_perf_probe_events() which returns an error (-ENOENT)
even if the probes are successfully deleted.

This happens only if the probes are on user-apps and not on kernel,
simply because it doesn't clear the previous error.

So, without this fix, we get an error even though events are being
successfully removed.

  ------
  # ./perf probe -x ./perf del_perf_probe_events
  Added new event:
    probe_perf:del_perf_probe_events (on del_perf_probe_events in ...

  You can now use it in all perf tools, such as:

          perf record -e probe_perf:del_perf_probe_events -aR sleep 1

  # ./perf probe -d \*:\*
  Removed event: probe_perf:del_perf_probe_events
    Error: Failed to delete events.
  ------

This fixes the above error.
  ------
  # ./perf probe -d \*:\*
  Removed event: probe_perf:del_perf_probe_events
  ------

Reported-by: Arnaldo Carvalho de Melo <acme@kernel.org>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Richard Weinberger <richard@nod.at>
Link: http://lkml.kernel.org/r/20150527083725.23880.45209.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index f5be411bc69c..97da98481d89 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2811,13 +2811,14 @@ int del_perf_probe_events(struct strfilter *filter)
 		goto error;
 
 	ret2 = del_trace_probe_events(ufd, filter, unamelist);
-	if (ret2 < 0 && ret2 != -ENOENT)
+	if (ret2 < 0 && ret2 != -ENOENT) {
 		ret = ret2;
-	else if (ret == -ENOENT && ret2 == -ENOENT) {
+		goto error;
+	}
+	if (ret == -ENOENT && ret2 == -ENOENT)
 		pr_debug("\"%s\" does not hit any event.\n", str);
 		/* Note that this is silently ignored */
-		ret = 0;
-	}
+	ret = 0;
 
 error:
 	if (kfd >= 0) {

From 1eee78aea9252fabcd333805d5d9fa42a1bf9427 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 May 2015 12:58:53 -0300
Subject: [PATCH 215/305] perf tools: Introduce struct maps

That for now has the maps rbtree and the list for the dead maps, that
may be still referenced from some hist_entry, etc.

This paves the way for protecting the rbtree with a lock, then refcount
the maps and finally remove the removed_maps list, as it'll not ne
anymore needed.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-fl0fa6142pj8khj97fow3uw0@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/vmlinux-kallsyms.c |  2 +-
 tools/perf/util/event.c             |  2 +-
 tools/perf/util/map.c               | 64 +++++++++++++++++------------
 tools/perf/util/map.h               | 16 +++++---
 tools/perf/util/probe-event.c       |  2 +-
 tools/perf/util/symbol.c            |  4 +-
 6 files changed, 52 insertions(+), 38 deletions(-)

diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c
index 94ac6924df65..b34c5fc829ae 100644
--- a/tools/perf/tests/vmlinux-kallsyms.c
+++ b/tools/perf/tests/vmlinux-kallsyms.c
@@ -26,7 +26,7 @@ int test__vmlinux_matches_kallsyms(void)
 	struct map *kallsyms_map, *vmlinux_map, *map;
 	struct machine kallsyms, vmlinux;
 	enum map_type type = MAP__FUNCTION;
-	struct rb_root *maps = &vmlinux.kmaps.maps[type];
+	struct maps *maps = &vmlinux.kmaps.maps[type];
 	u64 mem_start, mem_end;
 
 	/*
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 9d3bba175423..c1925968a8af 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -331,7 +331,7 @@ int perf_event__synthesize_modules(struct perf_tool *tool,
 	int rc = 0;
 	struct map *pos;
 	struct map_groups *kmaps = &machine->kmaps;
-	struct rb_root *maps = &kmaps->maps[MAP__FUNCTION];
+	struct maps *maps = &kmaps->maps[MAP__FUNCTION];
 	union perf_event *event = zalloc((sizeof(event->mmap) +
 					  machine->id_hdr_size));
 	if (event == NULL) {
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 898ab92a98dd..adf012c4d650 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -418,48 +418,58 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
 	return ip + map->reloc;
 }
 
+static void maps__init(struct maps *maps)
+{
+	maps->entries = RB_ROOT;
+	INIT_LIST_HEAD(&maps->removed_maps);
+}
+
 void map_groups__init(struct map_groups *mg, struct machine *machine)
 {
 	int i;
 	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		mg->maps[i] = RB_ROOT;
-		INIT_LIST_HEAD(&mg->removed_maps[i]);
+		maps__init(&mg->maps[i]);
 	}
 	mg->machine = machine;
 	atomic_set(&mg->refcnt, 1);
 }
 
-static void maps__delete(struct rb_root *maps)
+static void maps__purge(struct maps *maps)
 {
-	struct rb_node *next = rb_first(maps);
+	struct rb_root *root = &maps->entries;
+	struct rb_node *next = rb_first(root);
 
 	while (next) {
 		struct map *pos = rb_entry(next, struct map, rb_node);
 
 		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, maps);
+		rb_erase(&pos->rb_node, root);
 		map__delete(pos);
 	}
 }
 
-static void maps__delete_removed(struct list_head *maps)
+static void maps__purge_removed_maps(struct maps *maps)
 {
 	struct map *pos, *n;
 
-	list_for_each_entry_safe(pos, n, maps, node) {
+	list_for_each_entry_safe(pos, n, &maps->removed_maps, node) {
 		list_del(&pos->node);
 		map__delete(pos);
 	}
 }
 
+static void maps__exit(struct maps *maps)
+{
+	maps__purge(maps);
+	maps__purge_removed_maps(maps);
+}
+
 void map_groups__exit(struct map_groups *mg)
 {
 	int i;
 
-	for (i = 0; i < MAP__NR_TYPES; ++i) {
-		maps__delete(&mg->maps[i]);
-		maps__delete_removed(&mg->removed_maps[i]);
-	}
+	for (i = 0; i < MAP__NR_TYPES; ++i)
+		maps__exit(&mg->maps[i]);
 }
 
 bool map_groups__empty(struct map_groups *mg)
@@ -469,7 +479,7 @@ bool map_groups__empty(struct map_groups *mg)
 	for (i = 0; i < MAP__NR_TYPES; ++i) {
 		if (maps__first(&mg->maps[i]))
 			return false;
-		if (!list_empty(&mg->removed_maps[i]))
+		if (!list_empty(&mg->maps[i].removed_maps))
 			return false;
 	}
 
@@ -523,7 +533,7 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 {
 	struct rb_node *nd;
 
-	for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&mg->maps[type].entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
 		struct symbol *sym = map__find_symbol_by_name(pos, name, filter);
 
@@ -560,7 +570,7 @@ size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
 	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
 	struct rb_node *nd;
 
-	for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) {
+	for (nd = rb_first(&mg->maps[type].entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
 		printed += fprintf(fp, "Map:");
 		printed += map__fprintf(pos, fp);
@@ -587,7 +597,7 @@ static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg,
 	struct map *pos;
 	size_t printed = 0;
 
-	list_for_each_entry(pos, &mg->removed_maps[type], node) {
+	list_for_each_entry(pos, &mg->maps[type].removed_maps, node) {
 		printed += fprintf(fp, "Map:");
 		printed += map__fprintf(pos, fp);
 		if (verbose > 1) {
@@ -617,7 +627,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 				   FILE *fp)
 {
-	struct rb_root *root = &mg->maps[map->type];
+	struct rb_root *root = &mg->maps[map->type].entries;
 	struct rb_node *next = rb_first(root);
 	int err = 0;
 
@@ -671,7 +681,7 @@ move_map:
 		 * If we have references, just move them to a separate list.
 		 */
 		if (pos->referenced)
-			list_add_tail(&pos->node, &mg->removed_maps[map->type]);
+			list_add_tail(&pos->node, &mg->maps[map->type].removed_maps);
 		else
 			map__delete(pos);
 
@@ -689,7 +699,7 @@ int map_groups__clone(struct map_groups *mg,
 		      struct map_groups *parent, enum map_type type)
 {
 	struct map *map;
-	struct rb_root *maps = &parent->maps[type];
+	struct maps *maps = &parent->maps[type];
 
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *new = map__clone(map);
@@ -700,9 +710,9 @@ int map_groups__clone(struct map_groups *mg,
 	return 0;
 }
 
-void maps__insert(struct rb_root *maps, struct map *map)
+void maps__insert(struct maps *maps, struct map *map)
 {
-	struct rb_node **p = &maps->rb_node;
+	struct rb_node **p = &maps->entries.rb_node;
 	struct rb_node *parent = NULL;
 	const u64 ip = map->start;
 	struct map *m;
@@ -717,17 +727,17 @@ void maps__insert(struct rb_root *maps, struct map *map)
 	}
 
 	rb_link_node(&map->rb_node, parent, p);
-	rb_insert_color(&map->rb_node, maps);
+	rb_insert_color(&map->rb_node, &maps->entries);
 }
 
-void maps__remove(struct rb_root *maps, struct map *map)
+void maps__remove(struct maps *maps, struct map *map)
 {
-	rb_erase(&map->rb_node, maps);
+	rb_erase(&map->rb_node, &maps->entries);
 }
 
-struct map *maps__find(struct rb_root *maps, u64 ip)
+struct map *maps__find(struct maps *maps, u64 ip)
 {
-	struct rb_node **p = &maps->rb_node;
+	struct rb_node **p = &maps->entries.rb_node;
 	struct rb_node *parent = NULL;
 	struct map *m;
 
@@ -745,9 +755,9 @@ struct map *maps__find(struct rb_root *maps, u64 ip)
 	return NULL;
 }
 
-struct map *maps__first(struct rb_root *maps)
+struct map *maps__first(struct maps *maps)
 {
-	struct rb_node *first = rb_first(maps);
+	struct rb_node *first = rb_first(&maps->entries);
 
 	if (first)
 		return rb_entry(first, struct map, rb_node);
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index f2b27566d986..e3702fd468c5 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -58,9 +58,13 @@ struct kmap {
 	struct map_groups	*kmaps;
 };
 
+struct maps {
+	struct rb_root	 entries;
+	struct list_head removed_maps;
+};
+
 struct map_groups {
-	struct rb_root	 maps[MAP__NR_TYPES];
-	struct list_head removed_maps[MAP__NR_TYPES];
+	struct maps	 maps[MAP__NR_TYPES];
 	struct machine	 *machine;
 	atomic_t	 refcnt;
 };
@@ -162,10 +166,10 @@ void map__reloc_vmlinux(struct map *map);
 
 size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
 				  FILE *fp);
-void maps__insert(struct rb_root *maps, struct map *map);
-void maps__remove(struct rb_root *maps, struct map *map);
-struct map *maps__find(struct rb_root *maps, u64 addr);
-struct map *maps__first(struct rb_root *maps);
+void maps__insert(struct maps *maps, struct map *map);
+void maps__remove(struct maps *maps, struct map *map);
+struct map *maps__find(struct maps *maps, u64 addr);
+struct map *maps__first(struct maps *maps);
 struct map *map__next(struct map *map);
 void map_groups__init(struct map_groups *mg, struct machine *machine);
 void map_groups__exit(struct map_groups *mg);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 97da98481d89..32471d0839d1 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -163,7 +163,7 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc)
 static struct map *kernel_get_module_map(const char *module)
 {
 	struct map_groups *grp = &host_machine->kmaps;
-	struct rb_root *maps = &grp->maps[MAP__FUNCTION];
+	struct maps *maps = &grp->maps[MAP__FUNCTION];
 	struct map *pos;
 
 	/* A file path -- this is an offline module */
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index b9e3eb581884..c8a3e79c5da2 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -202,7 +202,7 @@ void symbols__fixup_end(struct rb_root *symbols)
 
 void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 {
-	struct rb_root *maps = &mg->maps[type];
+	struct maps *maps = &mg->maps[type];
 	struct map *next, *curr;
 
 	curr = maps__first(maps);
@@ -1520,7 +1520,7 @@ out:
 struct map *map_groups__find_by_name(struct map_groups *mg,
 				     enum map_type type, const char *name)
 {
-	struct rb_root *maps = &mg->maps[type];
+	struct maps *maps = &mg->maps[type];
 	struct map *map;
 
 	for (map = maps__first(maps); map; map = map__next(map)) {

From 6a2ffcddad22ead7ce75c5773e87895b91e7cca7 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 22 May 2015 13:45:24 -0300
Subject: [PATCH 216/305] perf tools: Protect accesses the map rbtrees with a
 rw lock

To allow concurrent access, next step: refcount struct map instances, so
that we can ditch maps->removed_maps and stop leaking threads, maps,
then struct DSO needs the same treatment.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-o45w2w5dzrza38nzqxnqzhyf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c    | 122 +++++++++++++++++++++++++++++----------
 tools/perf/util/map.h    |   2 +
 tools/perf/util/symbol.c |  17 +++++-
 3 files changed, 108 insertions(+), 33 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index adf012c4d650..0905b07072da 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -16,6 +16,8 @@
 #include "machine.h"
 #include <linux/string.h>
 
+static void __maps__insert(struct maps *maps, struct map *map);
+
 const char *map_type__name[MAP__NR_TYPES] = {
 	[MAP__FUNCTION] = "Functions",
 	[MAP__VARIABLE] = "Variables",
@@ -421,6 +423,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
 static void maps__init(struct maps *maps)
 {
 	maps->entries = RB_ROOT;
+	pthread_rwlock_init(&maps->lock, NULL);
 	INIT_LIST_HEAD(&maps->removed_maps);
 }
 
@@ -434,7 +437,7 @@ void map_groups__init(struct map_groups *mg, struct machine *machine)
 	atomic_set(&mg->refcnt, 1);
 }
 
-static void maps__purge(struct maps *maps)
+static void __maps__purge(struct maps *maps)
 {
 	struct rb_root *root = &maps->entries;
 	struct rb_node *next = rb_first(root);
@@ -448,7 +451,7 @@ static void maps__purge(struct maps *maps)
 	}
 }
 
-static void maps__purge_removed_maps(struct maps *maps)
+static void __maps__purge_removed_maps(struct maps *maps)
 {
 	struct map *pos, *n;
 
@@ -460,8 +463,10 @@ static void maps__purge_removed_maps(struct maps *maps)
 
 static void maps__exit(struct maps *maps)
 {
-	maps__purge(maps);
-	maps__purge_removed_maps(maps);
+	pthread_rwlock_wrlock(&maps->lock);
+	__maps__purge(maps);
+	__maps__purge_removed_maps(maps);
+	pthread_rwlock_unlock(&maps->lock);
 }
 
 void map_groups__exit(struct map_groups *mg)
@@ -531,20 +536,28 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg,
 					       struct map **mapp,
 					       symbol_filter_t filter)
 {
+	struct maps *maps = &mg->maps[type];
+	struct symbol *sym;
 	struct rb_node *nd;
 
-	for (nd = rb_first(&mg->maps[type].entries); nd; nd = rb_next(nd)) {
+	pthread_rwlock_rdlock(&maps->lock);
+
+	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
-		struct symbol *sym = map__find_symbol_by_name(pos, name, filter);
+
+		sym = map__find_symbol_by_name(pos, name, filter);
 
 		if (sym == NULL)
 			continue;
 		if (mapp != NULL)
 			*mapp = pos;
-		return sym;
+		goto out;
 	}
 
-	return NULL;
+	sym = NULL;
+out:
+	pthread_rwlock_unlock(&maps->lock);
+	return sym;
 }
 
 int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
@@ -564,25 +577,35 @@ int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter)
 	return ams->sym ? 0 : -1;
 }
 
-size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
-				  FILE *fp)
+static size_t maps__fprintf(struct maps *maps, FILE *fp)
 {
-	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+	size_t printed = 0;
 	struct rb_node *nd;
 
-	for (nd = rb_first(&mg->maps[type].entries); nd; nd = rb_next(nd)) {
+	pthread_rwlock_rdlock(&maps->lock);
+
+	for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) {
 		struct map *pos = rb_entry(nd, struct map, rb_node);
 		printed += fprintf(fp, "Map:");
 		printed += map__fprintf(pos, fp);
 		if (verbose > 2) {
-			printed += dso__fprintf(pos->dso, type, fp);
+			printed += dso__fprintf(pos->dso, pos->type, fp);
 			printed += fprintf(fp, "--\n");
 		}
 	}
 
+	pthread_rwlock_unlock(&maps->lock);
+
 	return printed;
 }
 
+size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
+				  FILE *fp)
+{
+	size_t printed = fprintf(fp, "%s:\n", map_type__name[type]);
+	return printed += maps__fprintf(&mg->maps[type], fp);
+}
+
 static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp)
 {
 	size_t printed = 0, i;
@@ -624,13 +647,17 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 	return printed + map_groups__fprintf_removed_maps(mg, fp);
 }
 
-int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
-				   FILE *fp)
+static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 {
-	struct rb_root *root = &mg->maps[map->type].entries;
-	struct rb_node *next = rb_first(root);
+	struct rb_root *root;
+	struct rb_node *next;
 	int err = 0;
 
+	pthread_rwlock_wrlock(&maps->lock);
+
+	root = &maps->entries;
+	next = rb_first(root);
+
 	while (next) {
 		struct map *pos = rb_entry(next, struct map, rb_node);
 		next = rb_next(&pos->rb_node);
@@ -658,7 +685,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 			}
 
 			before->end = map->start;
-			map_groups__insert(mg, before);
+			__maps__insert(maps, before);
 			if (verbose >= 2)
 				map__fprintf(before, fp);
 		}
@@ -672,7 +699,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
 			}
 
 			after->start = map->end;
-			map_groups__insert(mg, after);
+			__maps__insert(maps, after);
 			if (verbose >= 2)
 				map__fprintf(after, fp);
 		}
@@ -681,15 +708,24 @@ move_map:
 		 * If we have references, just move them to a separate list.
 		 */
 		if (pos->referenced)
-			list_add_tail(&pos->node, &mg->maps[map->type].removed_maps);
+			list_add_tail(&pos->node, &maps->removed_maps);
 		else
 			map__delete(pos);
 
 		if (err)
-			return err;
+			goto out;
 	}
 
-	return 0;
+	err = 0;
+out:
+	pthread_rwlock_unlock(&maps->lock);
+	return err;
+}
+
+int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map,
+				   FILE *fp)
+{
+	return maps__fixup_overlappings(&mg->maps[map->type], map, fp);
 }
 
 /*
@@ -698,19 +734,26 @@ move_map:
 int map_groups__clone(struct map_groups *mg,
 		      struct map_groups *parent, enum map_type type)
 {
+	int err = -ENOMEM;
 	struct map *map;
 	struct maps *maps = &parent->maps[type];
 
+	pthread_rwlock_rdlock(&maps->lock);
+
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		struct map *new = map__clone(map);
 		if (new == NULL)
-			return -ENOMEM;
+			goto out_unlock;
 		map_groups__insert(mg, new);
 	}
-	return 0;
+
+	err = 0;
+out_unlock:
+	pthread_rwlock_unlock(&maps->lock);
+	return err;
 }
 
-void maps__insert(struct maps *maps, struct map *map)
+static void __maps__insert(struct maps *maps, struct map *map)
 {
 	struct rb_node **p = &maps->entries.rb_node;
 	struct rb_node *parent = NULL;
@@ -730,17 +773,33 @@ void maps__insert(struct maps *maps, struct map *map)
 	rb_insert_color(&map->rb_node, &maps->entries);
 }
 
-void maps__remove(struct maps *maps, struct map *map)
+void maps__insert(struct maps *maps, struct map *map)
+{
+	pthread_rwlock_wrlock(&maps->lock);
+	__maps__insert(maps, map);
+	pthread_rwlock_unlock(&maps->lock);
+}
+
+static void __maps__remove(struct maps *maps, struct map *map)
 {
 	rb_erase(&map->rb_node, &maps->entries);
 }
 
+void maps__remove(struct maps *maps, struct map *map)
+{
+	pthread_rwlock_wrlock(&maps->lock);
+	__maps__remove(maps, map);
+	pthread_rwlock_unlock(&maps->lock);
+}
+
 struct map *maps__find(struct maps *maps, u64 ip)
 {
-	struct rb_node **p = &maps->entries.rb_node;
-	struct rb_node *parent = NULL;
+	struct rb_node **p, *parent = NULL;
 	struct map *m;
 
+	pthread_rwlock_rdlock(&maps->lock);
+
+	p = &maps->entries.rb_node;
 	while (*p != NULL) {
 		parent = *p;
 		m = rb_entry(parent, struct map, rb_node);
@@ -749,10 +808,13 @@ struct map *maps__find(struct maps *maps, u64 ip)
 		else if (ip >= m->end)
 			p = &(*p)->rb_right;
 		else
-			return m;
+			goto out;
 	}
 
-	return NULL;
+	m = NULL;
+out:
+	pthread_rwlock_unlock(&maps->lock);
+	return m;
 }
 
 struct map *maps__first(struct maps *maps)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index e3702fd468c5..6796f2785649 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -5,6 +5,7 @@
 #include <linux/compiler.h>
 #include <linux/list.h>
 #include <linux/rbtree.h>
+#include <pthread.h>
 #include <stdio.h>
 #include <stdbool.h>
 #include <linux/types.h>
@@ -60,6 +61,7 @@ struct kmap {
 
 struct maps {
 	struct rb_root	 entries;
+	pthread_rwlock_t lock;
 	struct list_head removed_maps;
 };
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index c8a3e79c5da2..8aae8b6b1cee 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -205,9 +205,11 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 	struct maps *maps = &mg->maps[type];
 	struct map *next, *curr;
 
+	pthread_rwlock_wrlock(&maps->lock);
+
 	curr = maps__first(maps);
 	if (curr == NULL)
-		return;
+		goto out_unlock;
 
 	for (next = map__next(curr); next; next = map__next(curr)) {
 		curr->end = next->start;
@@ -219,6 +221,9 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type)
 	 * last map final address.
 	 */
 	curr->end = ~0ULL;
+
+out_unlock:
+	pthread_rwlock_unlock(&maps->lock);
 }
 
 struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name)
@@ -1523,12 +1528,18 @@ struct map *map_groups__find_by_name(struct map_groups *mg,
 	struct maps *maps = &mg->maps[type];
 	struct map *map;
 
+	pthread_rwlock_rdlock(&maps->lock);
+
 	for (map = maps__first(maps); map; map = map__next(map)) {
 		if (map->dso && strcmp(map->dso->short_name, name) == 0)
-			return map;
+			goto out_unlock;
 	}
 
-	return NULL;
+	map = NULL;
+
+out_unlock:
+	pthread_rwlock_unlock(&maps->lock);
+	return map;
 }
 
 int dso__load_vmlinux(struct dso *dso, struct map *map,

From facf3f0621b2e11957af1aae9085730ea78ccf85 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 25 May 2015 15:30:09 -0300
Subject: [PATCH 217/305] perf tools: Check if a map is still in use when
 deleting it

I.e. match RB_CLEAR_NODE() with RB_EMPTY_NODE(), to check that it isn't
in a rb tree at the time of its deletion.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-vumvhird765id11zbx00d2r8@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c | 4 ++++
 tools/perf/util/map.c         | 9 +++++----
 tools/perf/util/symbol.c      | 8 ++++----
 3 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index b57a027fb200..c434e1264087 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -59,6 +59,10 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	    (al->sym == NULL ||
 	     strcmp(ann->sym_hist_filter, al->sym->name) != 0)) {
 		/* We're only interested in a symbol named sym_hist_filter */
+		/*
+		 * FIXME: why isn't this done in the symbol_filter when loading
+		 * the DSO?
+		 */
 		if (al->sym != NULL) {
 			rb_erase(&al->sym->rb_node,
 				 &al->map->dso->symbols[al->map->type]);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 0905b07072da..4d3a92d5dff3 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -225,6 +225,7 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 
 void map__delete(struct map *map)
 {
+	BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
 	free(map);
 }
 
@@ -446,7 +447,7 @@ static void __maps__purge(struct maps *maps)
 		struct map *pos = rb_entry(next, struct map, rb_node);
 
 		next = rb_next(&pos->rb_node);
-		rb_erase(&pos->rb_node, root);
+		rb_erase_init(&pos->rb_node, root);
 		map__delete(pos);
 	}
 }
@@ -456,7 +457,7 @@ static void __maps__purge_removed_maps(struct maps *maps)
 	struct map *pos, *n;
 
 	list_for_each_entry_safe(pos, n, &maps->removed_maps, node) {
-		list_del(&pos->node);
+		list_del_init(&pos->node);
 		map__delete(pos);
 	}
 }
@@ -671,7 +672,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 			map__fprintf(pos, fp);
 		}
 
-		rb_erase(&pos->rb_node, root);
+		rb_erase_init(&pos->rb_node, root);
 		/*
 		 * Now check if we need to create new maps for areas not
 		 * overlapped by the new map:
@@ -782,7 +783,7 @@ void maps__insert(struct maps *maps, struct map *map)
 
 static void __maps__remove(struct maps *maps, struct map *map)
 {
-	rb_erase(&map->rb_node, &maps->entries);
+	rb_erase_init(&map->rb_node, &maps->entries);
 }
 
 void maps__remove(struct maps *maps, struct map *map)
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 8aae8b6b1cee..743a9b360e3d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -659,14 +659,14 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map,
 		curr_map = map_groups__find(kmaps, map->type, pos->start);
 
 		if (!curr_map || (filter && filter(curr_map, pos))) {
-			rb_erase(&pos->rb_node, root);
+			rb_erase_init(&pos->rb_node, root);
 			symbol__delete(pos);
 		} else {
 			pos->start -= curr_map->start - curr_map->pgoff;
 			if (pos->end)
 				pos->end -= curr_map->start - curr_map->pgoff;
 			if (curr_map != map) {
-				rb_erase(&pos->rb_node, root);
+				rb_erase_init(&pos->rb_node, root);
 				symbols__insert(
 					&curr_map->dso->symbols[curr_map->type],
 					pos);
@@ -1173,7 +1173,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 	/* Add new maps */
 	while (!list_empty(&md.maps)) {
 		new_map = list_entry(md.maps.next, struct map, node);
-		list_del(&new_map->node);
+		list_del_init(&new_map->node);
 		if (new_map == replacement_map) {
 			map->start	= new_map->start;
 			map->end	= new_map->end;
@@ -1211,7 +1211,7 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 out_err:
 	while (!list_empty(&md.maps)) {
 		map = list_entry(md.maps.next, struct map, node);
-		list_del(&map->node);
+		list_del_init(&map->node);
 		map__delete(map);
 	}
 	close(fd);

From 84c2cafa288939e11d21c7830e32b2aee21b723e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 25 May 2015 16:59:56 -0300
Subject: [PATCH 218/305] perf tools: Reference count struct map

We have pointers to struct map instances in several places, like in the
hist_entry instances, so we need a way to know when we can destroy them,
otherwise we may either keep leaking them or end up referencing deleted
instances.

Start fixing it by reference counting them.

This patch puts the reference count for struct map in place, replacing
direct map__delete() calls with map__put() ones and then grabbing a
reference count when adding it to the maps struct where maps for a
struct thread are kept.

Next we'll grab reference counts when setting pointers to struct map
instances, in places like in the hist_entry code.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-wi19xczk0t2a41r1i2chuio5@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c     |  3 ++-
 tools/perf/util/map.c         | 21 +++++++++++++++------
 tools/perf/util/map.h         | 11 +++++++++++
 tools/perf/util/probe-event.c |  6 +++---
 tools/perf/util/symbol-elf.c  |  2 ++
 tools/perf/util/symbol.c      |  7 +++++--
 6 files changed, 38 insertions(+), 12 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 6bf845758ae3..0c0e61cce577 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -759,7 +759,6 @@ void machine__destroy_kernel_maps(struct machine *machine)
 				kmap->ref_reloc_sym = NULL;
 		}
 
-		map__delete(machine->vmlinux_maps[type]);
 		machine->vmlinux_maps[type] = NULL;
 	}
 }
@@ -1247,6 +1246,7 @@ int machine__process_mmap2_event(struct machine *machine,
 
 	thread__insert_map(thread, map);
 	thread__put(thread);
+	map__put(map);
 	return 0;
 
 out_problem_map:
@@ -1297,6 +1297,7 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event
 
 	thread__insert_map(thread, map);
 	thread__put(thread);
+	map__put(map);
 	return 0;
 
 out_problem_map:
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 4d3a92d5dff3..af572322586d 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -139,6 +139,7 @@ void map__init(struct map *map, enum map_type type,
 	map->groups   = NULL;
 	map->referenced = false;
 	map->erange_warned = false;
+	atomic_set(&map->refcnt, 1);
 }
 
 struct map *map__new(struct machine *machine, u64 start, u64 len,
@@ -229,6 +230,12 @@ void map__delete(struct map *map)
 	free(map);
 }
 
+void map__put(struct map *map)
+{
+	if (map && atomic_dec_and_test(&map->refcnt))
+		map__delete(map);
+}
+
 void map__fixup_start(struct map *map)
 {
 	struct rb_root *symbols = &map->dso->symbols[map->type];
@@ -448,7 +455,7 @@ static void __maps__purge(struct maps *maps)
 
 		next = rb_next(&pos->rb_node);
 		rb_erase_init(&pos->rb_node, root);
-		map__delete(pos);
+		map__put(pos);
 	}
 }
 
@@ -458,7 +465,7 @@ static void __maps__purge_removed_maps(struct maps *maps)
 
 	list_for_each_entry_safe(pos, n, &maps->removed_maps, node) {
 		list_del_init(&pos->node);
-		map__delete(pos);
+		map__put(pos);
 	}
 }
 
@@ -682,7 +689,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 
 			if (before == NULL) {
 				err = -ENOMEM;
-				goto move_map;
+				goto put_map;
 			}
 
 			before->end = map->start;
@@ -696,7 +703,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 
 			if (after == NULL) {
 				err = -ENOMEM;
-				goto move_map;
+				goto put_map;
 			}
 
 			after->start = map->end;
@@ -704,14 +711,14 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 			if (verbose >= 2)
 				map__fprintf(after, fp);
 		}
-move_map:
+put_map:
 		/*
 		 * If we have references, just move them to a separate list.
 		 */
 		if (pos->referenced)
 			list_add_tail(&pos->node, &maps->removed_maps);
 		else
-			map__delete(pos);
+			map__put(pos);
 
 		if (err)
 			goto out;
@@ -772,6 +779,7 @@ static void __maps__insert(struct maps *maps, struct map *map)
 
 	rb_link_node(&map->rb_node, parent, p);
 	rb_insert_color(&map->rb_node, &maps->entries);
+	map__get(map);
 }
 
 void maps__insert(struct maps *maps, struct map *map)
@@ -784,6 +792,7 @@ void maps__insert(struct maps *maps, struct map *map)
 static void __maps__remove(struct maps *maps, struct map *map)
 {
 	rb_erase_init(&map->rb_node, &maps->entries);
+	map__put(map);
 }
 
 void maps__remove(struct maps *maps, struct map *map)
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index 6796f2785649..b8df09d94aca 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -52,6 +52,7 @@ struct map {
 
 	struct dso		*dso;
 	struct map_groups	*groups;
+	atomic_t		refcnt;
 };
 
 struct kmap {
@@ -150,6 +151,16 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 struct map *map__new2(u64 start, struct dso *dso, enum map_type type);
 void map__delete(struct map *map);
 struct map *map__clone(struct map *map);
+
+static inline struct map *map__get(struct map *map)
+{
+	if (map)
+		atomic_inc(&map->refcnt);
+	return map;
+}
+
+void map__put(struct map *map);
+
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *map, FILE *fp);
 size_t map__fprintf_dsoname(struct map *map, FILE *fp);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 32471d0839d1..b0b8a8080009 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -195,7 +195,7 @@ static void put_target_map(struct map *map, bool user)
 {
 	if (map && user) {
 		/* Only the user map needs to be released */
-		map__delete(map);
+		map__put(map);
 	}
 }
 
@@ -1791,7 +1791,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp,
 
 out:
 	if (map && !is_kprobe) {
-		map__delete(map);
+		map__put(map);
 	}
 
 	return ret;
@@ -2884,7 +2884,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter,
 	dso__fprintf_symbols_by_name(map->dso, map->type, stdout);
 end:
 	if (user) {
-		map__delete(map);
+		map__put(map);
 	}
 	exit_symbol_maps();
 
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index 9d526a5312b1..fa10116a12ab 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -972,8 +972,10 @@ int dso__load_sym(struct dso *dso, struct map *map,
 					map->unmap_ip = map__unmap_ip;
 					/* Ensure maps are correctly ordered */
 					if (kmaps) {
+						map__get(map);
 						map_groups__remove(kmaps, map);
 						map_groups__insert(kmaps, map);
+						map__put(map);
 					}
 				}
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 743a9b360e3d..a3e80d6ad70a 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1180,13 +1180,16 @@ static int dso__load_kcore(struct dso *dso, struct map *map,
 			map->pgoff	= new_map->pgoff;
 			map->map_ip	= new_map->map_ip;
 			map->unmap_ip	= new_map->unmap_ip;
-			map__delete(new_map);
 			/* Ensure maps are correctly ordered */
+			map__get(map);
 			map_groups__remove(kmaps, map);
 			map_groups__insert(kmaps, map);
+			map__put(map);
 		} else {
 			map_groups__insert(kmaps, new_map);
 		}
+
+		map__put(new_map);
 	}
 
 	/*
@@ -1212,7 +1215,7 @@ out_err:
 	while (!list_empty(&md.maps)) {
 		map = list_entry(md.maps.next, struct map, node);
 		list_del_init(&map->node);
-		map__delete(map);
+		map__put(map);
 	}
 	close(fd);
 	return -EINVAL;

From 18ffdfe8e98f861a39590ef2374ad51fc963567e Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 25 May 2015 22:51:54 +0200
Subject: [PATCH 219/305] perf tools: Add hint for 'Too many events are
 opened.' error message

Enhancing the 'Too many events are opened.' error message with hint to
use use 'ulimit -n <limit>' command.

Before:

  $ perf record -e 'sched:*,syscalls:*' ls
  Error:
  Too many events are opened.
  Try again after reducing the number of events.

Now:

  $ perf record -e 'sched:*,syscalls:*' ls
  Error:
  Too many events are opened.
  Probably the maximum number of open file descriptors has been reached.
  Hint: Try again after reducing the number of events.
  Hint: Try increasing the limit with 'ulimit -n <limit>'

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Michael Petlan <mpetlan@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1432587114-14924-1-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index c886b9f7a48d..a3e36fc634dc 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -2149,7 +2149,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 	case EMFILE:
 		return scnprintf(msg, size, "%s",
 			 "Too many events are opened.\n"
-			 "Try again after reducing the number of events.");
+			 "Probably the maximum number of open file descriptors has been reached.\n"
+			 "Hint: Try again after reducing the number of events.\n"
+			 "Hint: Try increasing the limit with 'ulimit -n <limit>'");
 	case ENODEV:
 		if (target->cpu_list)
 			return scnprintf(msg, size, "%s",

From 83be34a7a913bdf9f21f524333c63d9c48a28ef4 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 27 May 2015 10:51:46 -0700
Subject: [PATCH 220/305] perf annotation: Add symbol__get_annotation

Add a new utility function to get an function annotation out of existing
code.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1432749114-904-4-git-send-email-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 7f5bdfc9bc87..bf8043009909 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -506,6 +506,17 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 	return 0;
 }
 
+static struct annotation *symbol__get_annotation(struct symbol *sym)
+{
+	struct annotation *notes = symbol__annotation(sym);
+
+	if (notes->src == NULL) {
+		if (symbol__alloc_hist(sym) < 0)
+			return NULL;
+	}
+	return notes;
+}
+
 static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 				    int evidx, u64 addr)
 {
@@ -513,13 +524,9 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map,
 
 	if (sym == NULL)
 		return 0;
-
-	notes = symbol__annotation(sym);
-	if (notes->src == NULL) {
-		if (symbol__alloc_hist(sym) < 0)
-			return -ENOMEM;
-	}
-
+	notes = symbol__get_annotation(sym);
+	if (notes == NULL)
+		return -ENOMEM;
 	return __symbol__inc_addr_samples(sym, map, notes, evidx, addr);
 }
 

From f00898f4e20b286877b8d6d96d6e404661fd7985 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@linux.intel.com>
Date: Wed, 27 May 2015 10:51:51 -0700
Subject: [PATCH 221/305] perf tools: Move branch option parsing to own file

.. to allow sharing between builtin-record and builtin-top later.  No
code changes, just moved code.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1432749114-904-9-git-send-email-andi@firstfloor.org
[ Rename too generic branch.[ch] name to parse-branch-options.[ch] ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c            | 89 +-----------------------
 tools/perf/util/Build                  |  1 +
 tools/perf/util/parse-branch-options.c | 93 ++++++++++++++++++++++++++
 tools/perf/util/parse-branch-options.h |  5 ++
 4 files changed, 100 insertions(+), 88 deletions(-)
 create mode 100644 tools/perf/util/parse-branch-options.c
 create mode 100644 tools/perf/util/parse-branch-options.h

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 5dfe91395617..91aa2a3dcf19 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -28,6 +28,7 @@
 #include "util/thread_map.h"
 #include "util/data.h"
 #include "util/auxtrace.h"
+#include "util/parse-branch-options.h"
 
 #include <unistd.h>
 #include <sched.h>
@@ -751,94 +752,6 @@ out_delete_session:
 	return status;
 }
 
-#define BRANCH_OPT(n, m) \
-	{ .name = n, .mode = (m) }
-
-#define BRANCH_END { .name = NULL }
-
-struct branch_mode {
-	const char *name;
-	int mode;
-};
-
-static const struct branch_mode branch_modes[] = {
-	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
-	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
-	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
-	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
-	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
-	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
-	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
-	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
-	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
-	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
-	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
-	BRANCH_END
-};
-
-static int
-parse_branch_stack(const struct option *opt, const char *str, int unset)
-{
-#define ONLY_PLM \
-	(PERF_SAMPLE_BRANCH_USER	|\
-	 PERF_SAMPLE_BRANCH_KERNEL	|\
-	 PERF_SAMPLE_BRANCH_HV)
-
-	uint64_t *mode = (uint64_t *)opt->value;
-	const struct branch_mode *br;
-	char *s, *os = NULL, *p;
-	int ret = -1;
-
-	if (unset)
-		return 0;
-
-	/*
-	 * cannot set it twice, -b + --branch-filter for instance
-	 */
-	if (*mode)
-		return -1;
-
-	/* str may be NULL in case no arg is passed to -b */
-	if (str) {
-		/* because str is read-only */
-		s = os = strdup(str);
-		if (!s)
-			return -1;
-
-		for (;;) {
-			p = strchr(s, ',');
-			if (p)
-				*p = '\0';
-
-			for (br = branch_modes; br->name; br++) {
-				if (!strcasecmp(s, br->name))
-					break;
-			}
-			if (!br->name) {
-				ui__warning("unknown branch filter %s,"
-					    " check man page\n", s);
-				goto error;
-			}
-
-			*mode |= br->mode;
-
-			if (!p)
-				break;
-
-			s = p + 1;
-		}
-	}
-	ret = 0;
-
-	/* default to any branch */
-	if ((*mode & ~ONLY_PLM) == 0) {
-		*mode = PERF_SAMPLE_BRANCH_ANY;
-	}
-error:
-	free(os);
-	return ret;
-}
-
 static void callchain_debug(void)
 {
 	static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" };
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 6966d0743bf7..e4b676de2f64 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -75,6 +75,7 @@ libperf-$(CONFIG_X86) += tsc.o
 libperf-y += cloexec.o
 libperf-y += thread-stack.o
 libperf-$(CONFIG_AUXTRACE) += auxtrace.o
+libperf-y += parse-branch-options.o
 
 libperf-$(CONFIG_LIBELF) += symbol-elf.o
 libperf-$(CONFIG_LIBELF) += probe-event.o
diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
new file mode 100644
index 000000000000..9d999436658f
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.c
@@ -0,0 +1,93 @@
+#include "perf.h"
+#include "util/util.h"
+#include "util/debug.h"
+#include "util/parse-options.h"
+#include "util/parse-branch-options.h"
+
+#define BRANCH_OPT(n, m) \
+	{ .name = n, .mode = (m) }
+
+#define BRANCH_END { .name = NULL }
+
+struct branch_mode {
+	const char *name;
+	int mode;
+};
+
+static const struct branch_mode branch_modes[] = {
+	BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER),
+	BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL),
+	BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV),
+	BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY),
+	BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL),
+	BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN),
+	BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL),
+	BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX),
+	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
+	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
+	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+	BRANCH_END
+};
+
+int
+parse_branch_stack(const struct option *opt, const char *str, int unset)
+{
+#define ONLY_PLM \
+	(PERF_SAMPLE_BRANCH_USER	|\
+	 PERF_SAMPLE_BRANCH_KERNEL	|\
+	 PERF_SAMPLE_BRANCH_HV)
+
+	uint64_t *mode = (uint64_t *)opt->value;
+	const struct branch_mode *br;
+	char *s, *os = NULL, *p;
+	int ret = -1;
+
+	if (unset)
+		return 0;
+
+	/*
+	 * cannot set it twice, -b + --branch-filter for instance
+	 */
+	if (*mode)
+		return -1;
+
+	/* str may be NULL in case no arg is passed to -b */
+	if (str) {
+		/* because str is read-only */
+		s = os = strdup(str);
+		if (!s)
+			return -1;
+
+		for (;;) {
+			p = strchr(s, ',');
+			if (p)
+				*p = '\0';
+
+			for (br = branch_modes; br->name; br++) {
+				if (!strcasecmp(s, br->name))
+					break;
+			}
+			if (!br->name) {
+				ui__warning("unknown branch filter %s,"
+					    " check man page\n", s);
+				goto error;
+			}
+
+			*mode |= br->mode;
+
+			if (!p)
+				break;
+
+			s = p + 1;
+		}
+	}
+	ret = 0;
+
+	/* default to any branch */
+	if ((*mode & ~ONLY_PLM) == 0) {
+		*mode = PERF_SAMPLE_BRANCH_ANY;
+	}
+error:
+	free(os);
+	return ret;
+}
diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h
new file mode 100644
index 000000000000..b9d9470c2e82
--- /dev/null
+++ b/tools/perf/util/parse-branch-options.h
@@ -0,0 +1,5 @@
+#ifndef _PERF_PARSE_BRANCH_OPTIONS_H
+#define _PERF_PARSE_BRANCH_OPTIONS_H 1
+struct option;
+int parse_branch_stack(const struct option *opt, const char *str, int unset);
+#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */

From 09a216ea5c528356797dc12ab35234922a6c02be Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Thu, 28 May 2015 16:17:57 +1000
Subject: [PATCH 222/305] perf: Remove myself from MAINTAINERS entry

I haven't been working on perf for a while, so remove my name
from the MAINTAINERS entry for it.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Link: http://lkml.kernel.org/r/20150528061757.GB27903@iris.ozlabs.ibm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 MAINTAINERS | 1 -
 1 file changed, 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 474bcb6c0bac..65e5e29e8760 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7633,7 +7633,6 @@ F:	kernel/delayacct.c
 
 PERFORMANCE EVENTS SUBSYSTEM
 M:	Peter Zijlstra <a.p.zijlstra@chello.nl>
-M:	Paul Mackerras <paulus@samba.org>
 M:	Ingo Molnar <mingo@redhat.com>
 M:	Arnaldo Carvalho de Melo <acme@kernel.org>
 L:	linux-kernel@vger.kernel.org

From 44848cdbbd1bacbac2a825920b699adc000d51c7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Li=C5=A1ka?= <mliska@suse.cz>
Date: Fri, 29 May 2015 14:06:44 +0200
Subject: [PATCH 223/305] perf annotate: Fix -i option, which is currently
 ignored.

Assign input_name, received from program arguments, to file data
structure.

Signed-off-by: Martin Liska <mliska@suse.cz>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/55685654.2010209@suse.cz
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index c434e1264087..4e08c2d2090e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -289,7 +289,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		},
 	};
 	struct perf_data_file file = {
-		.path  = input_name,
 		.mode  = PERF_DATA_MODE_READ,
 	};
 	const struct option options[] = {
@@ -346,6 +345,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 	else if (annotate.use_gtk)
 		use_browser = 2;
 
+	file.path  = input_name;
+
 	setup_browser(true);
 
 	annotate.session = perf_session__new(&file, false, &annotate.tool);

From 60fb7742928dab3c6a0fec7f2d2cce26d9366a3c Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Thu, 28 May 2015 02:25:05 +0000
Subject: [PATCH 224/305] perf probe: Fix 'function unused' warning

By 'make build-test' a warning is found in probe-event.c that, after
commit 419e873828 (perf probe: Show the error reason comes from
invalid DSO) the only user of kernel_get_module_dso() is
open_debuginfo(). Which is not compiled if HAVE_DWARF_SUPPORT not set.

'make build-test' found this problem when make_minimal.

This patch moves kernel_get_module_dso() to HAVE_DWARF_SUPPORT ifdef
section.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1432779905-206143-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 65 ++++++++++++++++++-----------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index b0b8a8080009..e6a02b1ffd6d 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -200,38 +200,6 @@ static void put_target_map(struct map *map, bool user)
 }
 
 
-static int kernel_get_module_dso(const char *module, struct dso **pdso)
-{
-	struct dso *dso;
-	struct map *map;
-	const char *vmlinux_name;
-	int ret = 0;
-
-	if (module) {
-		list_for_each_entry(dso, &host_machine->kernel_dsos.head,
-				    node) {
-			if (strncmp(dso->short_name + 1, module,
-				    dso->short_name_len - 2) == 0)
-				goto found;
-		}
-		pr_debug("Failed to find module %s.\n", module);
-		return -ENOENT;
-	}
-
-	map = host_machine->vmlinux_maps[MAP__FUNCTION];
-	dso = map->dso;
-
-	vmlinux_name = symbol_conf.vmlinux_name;
-	dso->load_errno = 0;
-	if (vmlinux_name)
-		ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
-	else
-		ret = dso__load_vmlinux_path(dso, map, NULL);
-found:
-	*pdso = dso;
-	return ret;
-}
-
 static int convert_exec_to_group(const char *exec, char **result)
 {
 	char *ptr1, *ptr2, *exec_copy;
@@ -279,6 +247,39 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
 }
 
 #ifdef HAVE_DWARF_SUPPORT
+
+static int kernel_get_module_dso(const char *module, struct dso **pdso)
+{
+	struct dso *dso;
+	struct map *map;
+	const char *vmlinux_name;
+	int ret = 0;
+
+	if (module) {
+		list_for_each_entry(dso, &host_machine->kernel_dsos.head,
+				    node) {
+			if (strncmp(dso->short_name + 1, module,
+				    dso->short_name_len - 2) == 0)
+				goto found;
+		}
+		pr_debug("Failed to find module %s.\n", module);
+		return -ENOENT;
+	}
+
+	map = host_machine->vmlinux_maps[MAP__FUNCTION];
+	dso = map->dso;
+
+	vmlinux_name = symbol_conf.vmlinux_name;
+	dso->load_errno = 0;
+	if (vmlinux_name)
+		ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL);
+	else
+		ret = dso__load_vmlinux_path(dso, map, NULL);
+found:
+	*pdso = dso;
+	return ret;
+}
+
 /*
  * Some binaries like glibc have special symbols which are on the symbol
  * table, but not in the debuginfo. If we can find the address of the

From 427cde3287f2c6349f308d0e22c9223f9ea05ef1 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 29 May 2015 16:33:29 +0300
Subject: [PATCH 225/305] perf db-export: Fix thread ref-counting

Thread ref-counting was not done for get_main_thread() meaning that
there was a thread__get() from machine__find_thread() that was not being
paired with thread__put(). Fix that.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1432906425-9911-2-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/db-export.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c
index eb7a2acb973b..1c9689e4cc17 100644
--- a/tools/perf/util/db-export.c
+++ b/tools/perf/util/db-export.c
@@ -234,7 +234,7 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym,
 static struct thread *get_main_thread(struct machine *machine, struct thread *thread)
 {
 	if (thread->pid_ == thread->tid)
-		return thread;
+		return thread__get(thread);
 
 	if (thread->pid_ == -1)
 		return NULL;
@@ -308,19 +308,18 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 	if (err)
 		return err;
 
-	/* FIXME: check refcounting for get_main_thread, that calls machine__find_thread... */
 	main_thread = get_main_thread(al->machine, thread);
 	if (main_thread)
 		comm = machine__thread_exec_comm(al->machine, main_thread);
 
 	err = db_export__thread(dbe, thread, al->machine, comm);
 	if (err)
-		return err;
+		goto out_put;
 
 	if (comm) {
 		err = db_export__comm(dbe, comm, main_thread);
 		if (err)
-			return err;
+			goto out_put;
 		es.comm_db_id = comm->db_id;
 	}
 
@@ -328,7 +327,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 
 	err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset);
 	if (err)
-		return err;
+		goto out_put;
 
 	if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) &&
 	    sample_addr_correlates_sym(&evsel->attr)) {
@@ -338,20 +337,22 @@ int db_export__sample(struct db_export *dbe, union perf_event *event,
 		err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id,
 				     &es.addr_sym_db_id, &es.addr_offset);
 		if (err)
-			return err;
+			goto out_put;
 		if (dbe->crp) {
 			err = thread_stack__process(thread, comm, sample, al,
 						    &addr_al, es.db_id,
 						    dbe->crp);
 			if (err)
-				return err;
+				goto out_put;
 		}
 	}
 
 	if (dbe->export_sample)
-		return dbe->export_sample(dbe, &es);
+		err = dbe->export_sample(dbe, &es);
 
-	return 0;
+out_put:
+	thread__put(main_thread);
+	return err;
 }
 
 static struct {

From b236512280fb96dcca45a3f5bbae1839bd673e58 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 29 May 2015 09:48:13 -0300
Subject: [PATCH 226/305] perf kmem: Fix compiler warning about may be
 accessing uninitialized variable

The last argument to strtok_r doesn't need to be initialized, its just a
placeholder to make this routine reentrant, but gcc doesn't know about
that and complains, breaking the build, fix it by setting it to NULL.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-8e8rgbg3aom9uarsyqjrsctg@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-kmem.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c
index 254614b10c4a..950f296dfcf7 100644
--- a/tools/perf/builtin-kmem.c
+++ b/tools/perf/builtin-kmem.c
@@ -644,7 +644,7 @@ static char *compact_gfp_flags(char *gfp_flags)
 {
 	char *orig_flags = strdup(gfp_flags);
 	char *new_flags = NULL;
-	char *str, *pos;
+	char *str, *pos = NULL;
 	size_t len = 0;
 
 	if (orig_flags == NULL)

From 3237f28188c3e41a938781a369652772a23e97a9 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 27 Mar 2015 13:08:01 +0000
Subject: [PATCH 227/305] perf tools: Add ARM64 perf_regs_load to support
 libunwind and enable testing

Newest libunwind does support ARM64, and perf is able to utilize it
also.

This patch enables the perf test dwarf unwind for arm64.

 Test result:
  # ./perf test unwind
  25: Test dwarf unwind                                      : Ok

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1427461681-72971-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/arch/arm64/Build                |  1 +
 tools/perf/arch/arm64/include/perf_regs.h  |  3 ++
 tools/perf/arch/arm64/tests/Build          |  2 +
 tools/perf/arch/arm64/tests/dwarf-unwind.c | 61 ++++++++++++++++++++++
 tools/perf/arch/arm64/tests/regs_load.S    | 46 ++++++++++++++++
 tools/perf/tests/Build                     |  2 +-
 tools/perf/tests/builtin-test.c            |  2 +-
 tools/perf/tests/tests.h                   |  2 +-
 8 files changed, 116 insertions(+), 3 deletions(-)
 create mode 100644 tools/perf/arch/arm64/tests/Build
 create mode 100644 tools/perf/arch/arm64/tests/dwarf-unwind.c
 create mode 100644 tools/perf/arch/arm64/tests/regs_load.S

diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build
index 54afe4a467e7..41bf61da476a 100644
--- a/tools/perf/arch/arm64/Build
+++ b/tools/perf/arch/arm64/Build
@@ -1 +1,2 @@
 libperf-y += util/
+libperf-$(CONFIG_DWARF_UNWIND) += tests/
diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h
index 1d3f39c3aa56..4e5af27e3fbf 100644
--- a/tools/perf/arch/arm64/include/perf_regs.h
+++ b/tools/perf/arch/arm64/include/perf_regs.h
@@ -5,8 +5,11 @@
 #include <linux/types.h>
 #include <asm/perf_regs.h>
 
+void perf_regs_load(u64 *regs);
+
 #define PERF_REGS_MASK	((1ULL << PERF_REG_ARM64_MAX) - 1)
 #define PERF_REGS_MAX	PERF_REG_ARM64_MAX
+#define PERF_SAMPLE_REGS_ABI	PERF_SAMPLE_REGS_ABI_64
 
 #define PERF_REG_IP	PERF_REG_ARM64_PC
 #define PERF_REG_SP	PERF_REG_ARM64_SP
diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build
new file mode 100644
index 000000000000..b30eff9bcc83
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/Build
@@ -0,0 +1,2 @@
+libperf-y += regs_load.o
+libperf-y += dwarf-unwind.o
diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c
new file mode 100644
index 000000000000..cf04a4c91c59
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c
@@ -0,0 +1,61 @@
+#include <string.h>
+#include "perf_regs.h"
+#include "thread.h"
+#include "map.h"
+#include "event.h"
+#include "debug.h"
+#include "tests/tests.h"
+
+#define STACK_SIZE 8192
+
+static int sample_ustack(struct perf_sample *sample,
+		struct thread *thread, u64 *regs)
+{
+	struct stack_dump *stack = &sample->user_stack;
+	struct map *map;
+	unsigned long sp;
+	u64 stack_size, *buf;
+
+	buf = malloc(STACK_SIZE);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	sp = (unsigned long) regs[PERF_REG_ARM64_SP];
+
+	map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp);
+	if (!map) {
+		pr_debug("failed to get stack map\n");
+		free(buf);
+		return -1;
+	}
+
+	stack_size = map->end - sp;
+	stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size;
+
+	memcpy(buf, (void *) sp, stack_size);
+	stack->data = (char *) buf;
+	stack->size = stack_size;
+	return 0;
+}
+
+int test__arch_unwind_sample(struct perf_sample *sample,
+		struct thread *thread)
+{
+	struct regs_dump *regs = &sample->user_regs;
+	u64 *buf;
+
+	buf = calloc(1, sizeof(u64) * PERF_REGS_MAX);
+	if (!buf) {
+		pr_debug("failed to allocate sample uregs data\n");
+		return -1;
+	}
+
+	perf_regs_load(buf);
+	regs->abi  = PERF_SAMPLE_REGS_ABI;
+	regs->regs = buf;
+	regs->mask = PERF_REGS_MASK;
+
+	return sample_ustack(sample, thread, buf);
+}
diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S
new file mode 100644
index 000000000000..025b46e579a6
--- /dev/null
+++ b/tools/perf/arch/arm64/tests/regs_load.S
@@ -0,0 +1,46 @@
+#include <linux/linkage.h>
+
+.text
+.type perf_regs_load,%function
+#define STR_REG(r)	str x##r, [x0, 8 * r]
+#define LDR_REG(r)	ldr x##r, [x0, 8 * r]
+#define SP	(8 * 31)
+#define PC	(8 * 32)
+ENTRY(perf_regs_load)
+	STR_REG(0)
+	STR_REG(1)
+	STR_REG(2)
+	STR_REG(3)
+	STR_REG(4)
+	STR_REG(5)
+	STR_REG(6)
+	STR_REG(7)
+	STR_REG(8)
+	STR_REG(9)
+	STR_REG(10)
+	STR_REG(11)
+	STR_REG(12)
+	STR_REG(13)
+	STR_REG(14)
+	STR_REG(15)
+	STR_REG(16)
+	STR_REG(17)
+	STR_REG(18)
+	STR_REG(19)
+	STR_REG(20)
+	STR_REG(21)
+	STR_REG(22)
+	STR_REG(23)
+	STR_REG(24)
+	STR_REG(25)
+	STR_REG(26)
+	STR_REG(27)
+	STR_REG(28)
+	STR_REG(29)
+	STR_REG(30)
+	mov x1, sp
+	str x1, [x0, #SP]
+	str x30, [x0, #PC]
+	LDR_REG(1)
+	ret
+ENDPROC(perf_regs_load)
diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 6a8801b32017..67f48e6620e1 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -34,7 +34,7 @@ perf-y += kmod-path.o
 
 perf-$(CONFIG_X86) += perf-time-to-tsc.o
 
-ifeq ($(ARCH),$(filter $(ARCH),x86 arm))
+ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64))
 perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 endif
 
diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index f42af98a5c16..b5b87a24a9e7 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -126,7 +126,7 @@ static struct test {
 		.desc = "Test parsing with no sample_id_all bit set",
 		.func = test__parse_no_sample_id_all,
 	},
-#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 	{
 		.desc = "Test dwarf unwind",
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index a10eaf5c4767..87bcaf16ae99 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -62,7 +62,7 @@ int test__fdarray__filter(void);
 int test__fdarray__add(void);
 int test__kmod_path__parse(void);
 
-#if defined(__x86_64__) || defined(__i386__) || defined(__arm__)
+#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__)
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
 struct perf_sample;

From 43f322b4ab3f203d8eefbdad6e58e1637221e4a7 Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Thu, 16 Apr 2015 16:52:53 +0300
Subject: [PATCH 228/305] perf tests: Switch from open to openat

Multiple perf tests fail on arm64 due to missing open syscall:

 2: detect open syscall event                              : FAILED!

open(2) is a legacy syscall, replaced with openat(2) since 2.6.16.  Thus
new architectures in kernel, such as arm64, don't implement these legacy
syscalls.

The patch replaces all sys_enter_open events with sys_enter_openat,
renames the related tests and test output to avoid confusion.

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429192375-13706-2-git-send-email-riku.voipio@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/builtin-test.c           | 12 ++++++------
 tools/perf/tests/open-syscall-all-cpus.c  | 12 ++++++------
 tools/perf/tests/open-syscall-tp-fields.c |  6 +++---
 tools/perf/tests/open-syscall.c           | 14 +++++++-------
 tools/perf/tests/parse-events.c           | 12 ++++++------
 tools/perf/tests/tests.h                  |  6 +++---
 6 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c
index b5b87a24a9e7..87b9961646e4 100644
--- a/tools/perf/tests/builtin-test.c
+++ b/tools/perf/tests/builtin-test.c
@@ -23,12 +23,12 @@ static struct test {
 		.func = test__vmlinux_matches_kallsyms,
 	},
 	{
-		.desc = "detect open syscall event",
-		.func = test__open_syscall_event,
+		.desc = "detect openat syscall event",
+		.func = test__openat_syscall_event,
 	},
 	{
-		.desc = "detect open syscall event on all cpus",
-		.func = test__open_syscall_event_on_all_cpus,
+		.desc = "detect openat syscall event on all cpus",
+		.func = test__openat_syscall_event_on_all_cpus,
 	},
 	{
 		.desc = "read samples using the mmap interface",
@@ -73,8 +73,8 @@ static struct test {
 		.func = test__perf_evsel__tp_sched_test,
 	},
 	{
-		.desc = "Generate and check syscalls:sys_enter_open event fields",
-		.func = test__syscall_open_tp_fields,
+		.desc = "Generate and check syscalls:sys_enter_openat event fields",
+		.func = test__syscall_openat_tp_fields,
 	},
 	{
 		.desc = "struct perf_event_attr setup",
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/open-syscall-all-cpus.c
index 3ec885c48f8f..e34dfdf96b5a 100644
--- a/tools/perf/tests/open-syscall-all-cpus.c
+++ b/tools/perf/tests/open-syscall-all-cpus.c
@@ -4,12 +4,12 @@
 #include "cpumap.h"
 #include "debug.h"
 
-int test__open_syscall_event_on_all_cpus(void)
+int test__openat_syscall_event_on_all_cpus(void)
 {
 	int err = -1, fd, cpu;
 	struct cpu_map *cpus;
 	struct perf_evsel *evsel;
-	unsigned int nr_open_calls = 111, i;
+	unsigned int nr_openat_calls = 111, i;
 	cpu_set_t cpu_set;
 	struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
 	char sbuf[STRERR_BUFSIZE];
@@ -27,7 +27,7 @@ int test__open_syscall_event_on_all_cpus(void)
 
 	CPU_ZERO(&cpu_set);
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
 	if (evsel == NULL) {
 		if (tracefs_configured())
 			pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -46,7 +46,7 @@ int test__open_syscall_event_on_all_cpus(void)
 	}
 
 	for (cpu = 0; cpu < cpus->nr; ++cpu) {
-		unsigned int ncalls = nr_open_calls + cpu;
+		unsigned int ncalls = nr_openat_calls + cpu;
 		/*
 		 * XXX eventually lift this restriction in a way that
 		 * keeps perf building on older glibc installations
@@ -66,7 +66,7 @@ int test__open_syscall_event_on_all_cpus(void)
 			goto out_close_fd;
 		}
 		for (i = 0; i < ncalls; ++i) {
-			fd = open("/etc/passwd", O_RDONLY);
+			fd = openat(0, "/etc/passwd", O_RDONLY);
 			close(fd);
 		}
 		CPU_CLR(cpus->map[cpu], &cpu_set);
@@ -96,7 +96,7 @@ int test__open_syscall_event_on_all_cpus(void)
 			break;
 		}
 
-		expected = nr_open_calls + cpu;
+		expected = nr_openat_calls + cpu;
 		if (evsel->counts->cpu[cpu].val != expected) {
 			pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n",
 				 expected, cpus->map[cpu], evsel->counts->cpu[cpu].val);
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/open-syscall-tp-fields.c
index 127dcae0b760..6245221479d7 100644
--- a/tools/perf/tests/open-syscall-tp-fields.c
+++ b/tools/perf/tests/open-syscall-tp-fields.c
@@ -5,7 +5,7 @@
 #include "tests.h"
 #include "debug.h"
 
-int test__syscall_open_tp_fields(void)
+int test__syscall_openat_tp_fields(void)
 {
 	struct record_opts opts = {
 		.target = {
@@ -29,7 +29,7 @@ int test__syscall_open_tp_fields(void)
 		goto out;
 	}
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
 	if (evsel == NULL) {
 		pr_debug("%s: perf_evsel__newtp\n", __func__);
 		goto out_delete_evlist;
@@ -66,7 +66,7 @@ int test__syscall_open_tp_fields(void)
 	/*
 	 * Generate the event:
 	 */
-	open(filename, flags);
+	openat(AT_FDCWD, filename, flags);
 
 	while (1) {
 		int before = nr_events;
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/open-syscall.c
index 07aa319bf334..9f9491bb8e48 100644
--- a/tools/perf/tests/open-syscall.c
+++ b/tools/perf/tests/open-syscall.c
@@ -3,11 +3,11 @@
 #include "debug.h"
 #include "tests.h"
 
-int test__open_syscall_event(void)
+int test__openat_syscall_event(void)
 {
 	int err = -1, fd;
 	struct perf_evsel *evsel;
-	unsigned int nr_open_calls = 111, i;
+	unsigned int nr_openat_calls = 111, i;
 	struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX);
 	char sbuf[STRERR_BUFSIZE];
 
@@ -16,7 +16,7 @@ int test__open_syscall_event(void)
 		return -1;
 	}
 
-	evsel = perf_evsel__newtp("syscalls", "sys_enter_open");
+	evsel = perf_evsel__newtp("syscalls", "sys_enter_openat");
 	if (evsel == NULL) {
 		if (tracefs_configured())
 			pr_debug("is tracefs mounted on /sys/kernel/tracing?\n");
@@ -34,8 +34,8 @@ int test__open_syscall_event(void)
 		goto out_evsel_delete;
 	}
 
-	for (i = 0; i < nr_open_calls; ++i) {
-		fd = open("/etc/passwd", O_RDONLY);
+	for (i = 0; i < nr_openat_calls; ++i) {
+		fd = openat(0, "/etc/passwd", O_RDONLY);
 		close(fd);
 	}
 
@@ -44,9 +44,9 @@ int test__open_syscall_event(void)
 		goto out_close_fd;
 	}
 
-	if (evsel->counts->cpu[0].val != nr_open_calls) {
+	if (evsel->counts->cpu[0].val != nr_openat_calls) {
 		pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n",
-			 nr_open_calls, evsel->counts->cpu[0].val);
+			 nr_openat_calls, evsel->counts->cpu[0].val);
 		goto out_close_fd;
 	}
 
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 82d2a1636f7f..d76963f7ad3d 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -427,7 +427,7 @@ static int test__checkevent_list(struct perf_evlist *evlist)
 	TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv);
 	TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip);
 
-	/* syscalls:sys_enter_open:k */
+	/* syscalls:sys_enter_openat:k */
 	evsel = perf_evsel__next(evsel);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong sample_type",
@@ -665,7 +665,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused)
 	TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries);
 	TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups);
 
-	/* group1 syscalls:sys_enter_open:H */
+	/* group1 syscalls:sys_enter_openat:H */
 	evsel = leader = perf_evlist__first(evlist);
 	TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type);
 	TEST_ASSERT_VAL("wrong sample_type",
@@ -1293,7 +1293,7 @@ struct evlist_test {
 
 static struct evlist_test test__events[] = {
 	{
-		.name  = "syscalls:sys_enter_open",
+		.name  = "syscalls:sys_enter_openat",
 		.check = test__checkevent_tracepoint,
 		.id    = 0,
 	},
@@ -1353,7 +1353,7 @@ static struct evlist_test test__events[] = {
 		.id    = 11,
 	},
 	{
-		.name  = "syscalls:sys_enter_open:k",
+		.name  = "syscalls:sys_enter_openat:k",
 		.check = test__checkevent_tracepoint_modifier,
 		.id    = 12,
 	},
@@ -1408,7 +1408,7 @@ static struct evlist_test test__events[] = {
 		.id    = 22,
 	},
 	{
-		.name  = "r1,syscalls:sys_enter_open:k,1:1:hp",
+		.name  = "r1,syscalls:sys_enter_openat:k,1:1:hp",
 		.check = test__checkevent_list,
 		.id    = 23,
 	},
@@ -1443,7 +1443,7 @@ static struct evlist_test test__events[] = {
 		.id    = 29,
 	},
 	{
-		.name  = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
+		.name  = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u",
 		.check = test__group3,
 		.id    = 30,
 	},
diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h
index 87bcaf16ae99..8e5038b48ba8 100644
--- a/tools/perf/tests/tests.h
+++ b/tools/perf/tests/tests.h
@@ -26,14 +26,14 @@ enum {
 
 /* Tests */
 int test__vmlinux_matches_kallsyms(void);
-int test__open_syscall_event(void);
-int test__open_syscall_event_on_all_cpus(void);
+int test__openat_syscall_event(void);
+int test__openat_syscall_event_on_all_cpus(void);
 int test__basic_mmap(void);
 int test__PERF_RECORD(void);
 int test__rdpmc(void);
 int test__perf_evsel__roundtrip_name_test(void);
 int test__perf_evsel__tp_sched_test(void);
-int test__syscall_open_tp_fields(void);
+int test__syscall_openat_tp_fields(void);
 int test__pmu(void);
 int test__attr(void);
 int test__dso_data(void);

From fbb2df231e11d8086dc51df73376b16407463de1 Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Fri, 29 May 2015 12:36:11 -0300
Subject: [PATCH 229/305] perf tests: Aename open*.c to openat*.c

Since the test being tested is now openat rather than open, rename the
files to make it explicit. The patch is separeted from the first to make
it simpler to deal with any potential conflicts in the Makefile

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429192375-13706-3-git-send-email-riku.voipio@linaro.org
[ Fixed it up wrt Build files ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/Build                                      | 6 +++---
 .../{open-syscall-all-cpus.c => openat-syscall-all-cpus.c}  | 0
 ...{open-syscall-tp-fields.c => openat-syscall-tp-fields.c} | 0
 tools/perf/tests/{open-syscall.c => openat-syscall.c}       | 0
 4 files changed, 3 insertions(+), 3 deletions(-)
 rename tools/perf/tests/{open-syscall-all-cpus.c => openat-syscall-all-cpus.c} (100%)
 rename tools/perf/tests/{open-syscall-tp-fields.c => openat-syscall-tp-fields.c} (100%)
 rename tools/perf/tests/{open-syscall.c => openat-syscall.c} (100%)

diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build
index 67f48e6620e1..ee41e705b2eb 100644
--- a/tools/perf/tests/Build
+++ b/tools/perf/tests/Build
@@ -3,9 +3,9 @@ perf-y += parse-events.o
 perf-y += dso-data.o
 perf-y += attr.o
 perf-y += vmlinux-kallsyms.o
-perf-y += open-syscall.o
-perf-y += open-syscall-all-cpus.o
-perf-y += open-syscall-tp-fields.o
+perf-y += openat-syscall.o
+perf-y += openat-syscall-all-cpus.o
+perf-y += openat-syscall-tp-fields.o
 perf-y += mmap-basic.o
 perf-y += perf-record.o
 perf-y += rdpmc.o
diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
similarity index 100%
rename from tools/perf/tests/open-syscall-all-cpus.c
rename to tools/perf/tests/openat-syscall-all-cpus.c
diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c
similarity index 100%
rename from tools/perf/tests/open-syscall-tp-fields.c
rename to tools/perf/tests/openat-syscall-tp-fields.c
diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/openat-syscall.c
similarity index 100%
rename from tools/perf/tests/open-syscall.c
rename to tools/perf/tests/openat-syscall.c

From cec83938707a7055c1f9decd81f2741420518e64 Mon Sep 17 00:00:00 2001
From: Riku Voipio <riku.voipio@linaro.org>
Date: Thu, 16 Apr 2015 16:52:55 +0300
Subject: [PATCH 230/305] perf tests: Remove getpgrp from mmap-basic

mmap-basic fails on arm64.

 4: read samples using the mmap interface: read samples using the mmap interface: FAILED!

This is because arm64 doesn't come with getpgrp() syscall. The syscall
is a BSD compatibility wrapper, Archs that don't define
__ARCH_WANT_SYS_GETPGRP do not have this. Remove it, since getpgid is
already used in the testcase.

Signed-off-by: Riku Voipio <riku.voipio@linaro.org>
Reviewed-by: Ingo Molnar <mingo@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1429192375-13706-4-git-send-email-riku.voipio@linaro.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/mmap-basic.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c
index 9b9622a33932..5855cf471210 100644
--- a/tools/perf/tests/mmap-basic.c
+++ b/tools/perf/tests/mmap-basic.c
@@ -23,10 +23,8 @@ int test__basic_mmap(void)
 	struct cpu_map *cpus;
 	struct perf_evlist *evlist;
 	cpu_set_t cpu_set;
-	const char *syscall_names[] = { "getsid", "getppid", "getpgrp",
-					"getpgid", };
-	pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp,
-				      (void*)getpgid };
+	const char *syscall_names[] = { "getsid", "getppid", "getpgid", };
+	pid_t (*syscalls[])(void) = { (void *)getsid, getppid, (void*)getpgid };
 #define nsyscalls ARRAY_SIZE(syscall_names)
 	unsigned int nr_events[nsyscalls],
 		     expected_nr_events[nsyscalls], i, j;

From 459ce518d9b563a99faa73aa340b764e0b3fb143 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 28 May 2015 12:40:55 -0300
Subject: [PATCH 231/305] perf machine: Adopt findnew_kernel method

It never was a 'struct dso' method, so fix that by rename
dso__kernel_findnew() to machine__findnew_kernel().

At some point I'll move it all to the machine.[ch] files, for now
lets ease patch review by not moving too much stuff.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-zrxmblgsg5vx0iv4rhvq2f6l@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c     |  4 ++--
 tools/perf/util/dso.h     |  4 ++--
 tools/perf/util/machine.c | 11 +++++------
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 7e11a700303f..fe5236833164 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -827,8 +827,8 @@ struct map *dso__new_map(const char *name)
 	return map;
 }
 
-struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
-		    const char *short_name, int dso_type)
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+				    const char *short_name, int dso_type)
 {
 	/*
 	 * The kernel dso could be created by build_id processing.
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index bcec06ad73a2..24a507a54147 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -294,8 +294,8 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map,
 bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by);
 
 struct map *dso__new_map(const char *name);
-struct dso *dso__kernel_findnew(struct machine *machine, const char *name,
-				const char *short_name, int dso_type);
+struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
+				    const char *short_name, int dso_type);
 
 void dsos__add(struct dsos *dsos, struct dso *dso);
 struct dso *dsos__addnew(struct dsos *dsos, const char *name);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 0c0e61cce577..8934dc4345fe 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -640,9 +640,8 @@ static struct dso *machine__get_kernel(struct machine *machine)
 		if (!vmlinux_name)
 			vmlinux_name = "[kernel.kallsyms]";
 
-		kernel = dso__kernel_findnew(machine, vmlinux_name,
-					     "[kernel]",
-					     DSO_TYPE_KERNEL);
+		kernel = machine__findnew_kernel(machine, vmlinux_name,
+						 "[kernel]", DSO_TYPE_KERNEL);
 	} else {
 		char bf[PATH_MAX];
 
@@ -652,9 +651,9 @@ static struct dso *machine__get_kernel(struct machine *machine)
 			vmlinux_name = machine__mmap_name(machine, bf,
 							  sizeof(bf));
 
-		kernel = dso__kernel_findnew(machine, vmlinux_name,
-					     "[guest.kernel]",
-					     DSO_TYPE_GUEST_KERNEL);
+		kernel = machine__findnew_kernel(machine, vmlinux_name,
+						 "[guest.kernel]",
+						 DSO_TYPE_GUEST_KERNEL);
 	}
 
 	if (kernel != NULL && (!kernel->has_build_id))

From 3d39ac538629e4f00a6e1c38d46346f1b8e69505 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 28 May 2015 13:06:42 -0300
Subject: [PATCH 232/305] perf machine: No need to have two DSOs lists

We can, given a DSO, figure out if it is a kernel, a kernel module or
a userlevel DSO, so stop having to process two lists in several
functions.

If searching becomes an issue at some point, we can have them in a
rbtree, etc.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-s4yb0onpdywu6dj2xl9lxi4t@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/hists_common.c |  2 +-
 tools/perf/util/build-id.c      | 59 +++++++++------------------------
 tools/perf/util/dso.c           |  2 +-
 tools/perf/util/header.c        |  6 +---
 tools/perf/util/machine.c       | 28 ++++++----------
 tools/perf/util/machine.h       |  3 +-
 tools/perf/util/map.c           |  2 +-
 tools/perf/util/probe-event.c   |  5 +--
 tools/perf/util/symbol-elf.c    |  6 +---
 tools/perf/util/vdso.c          |  6 ++--
 10 files changed, 39 insertions(+), 80 deletions(-)

diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index 456f884eb27b..bcde1d27919c 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -121,7 +121,7 @@ struct machine *setup_fake_machine(struct machines *machines)
 		size_t k;
 		struct dso *dso;
 
-		dso = __dsos__findnew(&machine->user_dsos,
+		dso = __dsos__findnew(&machine->dsos,
 				      fake_symbols[i].dso_name);
 		if (dso == NULL)
 			goto out;
diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c
index ad8cfcbaa25d..1f6fc2323ef9 100644
--- a/tools/perf/util/build-id.c
+++ b/tools/perf/util/build-id.c
@@ -162,15 +162,20 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id,
 	return write_padded(fd, name, name_len + 1, len);
 }
 
-static int __dsos__write_buildid_table(struct list_head *head,
-				       struct machine *machine,
-				       pid_t pid, u16 misc, int fd)
+static int machine__write_buildid_table(struct machine *machine, int fd)
 {
+	int err = 0;
 	char nm[PATH_MAX];
 	struct dso *pos;
+	u16 kmisc = PERF_RECORD_MISC_KERNEL,
+	    umisc = PERF_RECORD_MISC_USER;
 
-	dsos__for_each_with_build_id(pos, head) {
-		int err;
+	if (!machine__is_host(machine)) {
+		kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
+		umisc = PERF_RECORD_MISC_GUEST_USER;
+	}
+
+	dsos__for_each_with_build_id(pos, &machine->dsos.head) {
 		const char *name;
 		size_t name_len;
 
@@ -189,32 +194,12 @@ static int __dsos__write_buildid_table(struct list_head *head,
 			name_len = pos->long_name_len + 1;
 		}
 
-		err = write_buildid(name, name_len, pos->build_id,
-				    pid, misc, fd);
+		err = write_buildid(name, name_len, pos->build_id, machine->pid,
+				    pos->kernel ? kmisc : umisc, fd);
 		if (err)
-			return err;
+			break;
 	}
 
-	return 0;
-}
-
-static int machine__write_buildid_table(struct machine *machine, int fd)
-{
-	int err;
-	u16 kmisc = PERF_RECORD_MISC_KERNEL,
-	    umisc = PERF_RECORD_MISC_USER;
-
-	if (!machine__is_host(machine)) {
-		kmisc = PERF_RECORD_MISC_GUEST_KERNEL;
-		umisc = PERF_RECORD_MISC_GUEST_USER;
-	}
-
-	err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine,
-					  machine->pid, kmisc, fd);
-	if (err == 0)
-		err = __dsos__write_buildid_table(&machine->user_dsos.head,
-						  machine, machine->pid, umisc,
-						  fd);
 	return err;
 }
 
@@ -247,13 +232,7 @@ static int __dsos__hit_all(struct list_head *head)
 
 static int machine__hit_all_dsos(struct machine *machine)
 {
-	int err;
-
-	err = __dsos__hit_all(&machine->kernel_dsos.head);
-	if (err)
-		return err;
-
-	return __dsos__hit_all(&machine->user_dsos.head);
+	return __dsos__hit_all(&machine->dsos.head);
 }
 
 int dsos__hit_all(struct perf_session *session)
@@ -493,9 +472,7 @@ static int __dsos__cache_build_ids(struct list_head *head,
 
 static int machine__cache_build_ids(struct machine *machine)
 {
-	int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine);
-	ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine);
-	return ret;
+	return __dsos__cache_build_ids(&machine->dsos.head, machine);
 }
 
 int perf_session__cache_build_ids(struct perf_session *session)
@@ -520,11 +497,7 @@ int perf_session__cache_build_ids(struct perf_session *session)
 
 static bool machine__read_build_ids(struct machine *machine, bool with_hits)
 {
-	bool ret;
-
-	ret  = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits);
-	ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits);
-	return ret;
+	return __dsos__read_build_ids(&machine->dsos.head, with_hits);
 }
 
 bool perf_session__read_build_ids(struct perf_session *session, bool with_hits)
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index fe5236833164..ff040b0569d6 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -833,7 +833,7 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
 	/*
 	 * The kernel dso could be created by build_id processing.
 	 */
-	struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name);
+	struct dso *dso = __dsos__findnew(&machine->dsos, name);
 
 	/*
 	 * We need to run this in all cases, since during the build_id
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 3f0d809d853a..a900e9441fb5 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1238,7 +1238,6 @@ static int __event_process_build_id(struct build_id_event *bev,
 				    struct perf_session *session)
 {
 	int err = -1;
-	struct dsos *dsos;
 	struct machine *machine;
 	u16 misc;
 	struct dso *dso;
@@ -1253,22 +1252,19 @@ static int __event_process_build_id(struct build_id_event *bev,
 	switch (misc) {
 	case PERF_RECORD_MISC_KERNEL:
 		dso_type = DSO_TYPE_KERNEL;
-		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_GUEST_KERNEL:
 		dso_type = DSO_TYPE_GUEST_KERNEL;
-		dsos = &machine->kernel_dsos;
 		break;
 	case PERF_RECORD_MISC_USER:
 	case PERF_RECORD_MISC_GUEST_USER:
 		dso_type = DSO_TYPE_USER;
-		dsos = &machine->user_dsos;
 		break;
 	default:
 		goto out;
 	}
 
-	dso = __dsos__findnew(dsos, filename);
+	dso = __dsos__findnew(&machine->dsos, filename);
 	if (dso != NULL) {
 		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8934dc4345fe..ffd31079d447 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -26,8 +26,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
 {
 	map_groups__init(&machine->kmaps, machine);
 	RB_CLEAR_NODE(&machine->rb_node);
-	dsos__init(&machine->user_dsos);
-	dsos__init(&machine->kernel_dsos);
+	dsos__init(&machine->dsos);
 
 	machine->threads = RB_ROOT;
 	pthread_rwlock_init(&machine->threads_lock, NULL);
@@ -111,8 +110,7 @@ void machine__delete_threads(struct machine *machine)
 void machine__exit(struct machine *machine)
 {
 	map_groups__exit(&machine->kmaps);
-	dsos__delete(&machine->user_dsos);
-	dsos__delete(&machine->kernel_dsos);
+	dsos__delete(&machine->dsos);
 	vdso__exit(machine);
 	zfree(&machine->root_dir);
 	zfree(&machine->current_tid);
@@ -490,9 +488,9 @@ machine__module_dso(struct machine *machine, struct kmod_path *m,
 {
 	struct dso *dso;
 
-	dso = dsos__find(&machine->kernel_dsos, m->name, true);
+	dso = dsos__find(&machine->dsos, m->name, true);
 	if (!dso) {
-		dso = dsos__addnew(&machine->kernel_dsos, m->name);
+		dso = dsos__addnew(&machine->dsos, m->name);
 		if (dso == NULL)
 			return NULL;
 
@@ -561,13 +559,11 @@ out:
 size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
 {
 	struct rb_node *nd;
-	size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) +
-		     __dsos__fprintf(&machines->host.user_dsos.head, fp);
+	size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp);
 
 	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
 		struct machine *pos = rb_entry(nd, struct machine, rb_node);
-		ret += __dsos__fprintf(&pos->kernel_dsos.head, fp);
-		ret += __dsos__fprintf(&pos->user_dsos.head, fp);
+		ret += __dsos__fprintf(&pos->dsos.head, fp);
 	}
 
 	return ret;
@@ -576,8 +572,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp)
 size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp,
 				     bool (skip)(struct dso *dso, int parm), int parm)
 {
-	return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) +
-	       __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm);
+	return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm);
 }
 
 size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp,
@@ -1106,7 +1101,7 @@ static bool machine__uses_kcore(struct machine *machine)
 {
 	struct dso *dso;
 
-	list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
+	list_for_each_entry(dso, &machine->dsos.head, node) {
 		if (dso__is_kcore(dso))
 			return true;
 	}
@@ -1153,8 +1148,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		struct dso *kernel = NULL;
 		struct dso *dso;
 
-		list_for_each_entry(dso, &machine->kernel_dsos.head, node) {
-			if (is_kernel_module(dso->long_name))
+		list_for_each_entry(dso, &machine->dsos.head, node) {
+			if (dso->kernel && is_kernel_module(dso->long_name))
 				continue;
 
 			kernel = dso;
@@ -1162,8 +1157,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		}
 
 		if (kernel == NULL)
-			kernel = __dsos__findnew(&machine->kernel_dsos,
-						 kmmap_prefix);
+			kernel = __dsos__findnew(&machine->dsos, kmmap_prefix);
 		if (kernel == NULL)
 			goto out_problem;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index c7963c63c474..aabca583e655 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -34,8 +34,7 @@ struct machine {
 	struct list_head  dead_threads;
 	struct thread	  *last_match;
 	struct vdso_info  *vdso_info;
-	struct dsos	  user_dsos;
-	struct dsos	  kernel_dsos;
+	struct dsos	  dsos;
 	struct map_groups kmaps;
 	struct map	  *vmlinux_maps[MAP__NR_TYPES];
 	u64		  kernel_start;
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index af572322586d..57ff0256c22c 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -180,7 +180,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 			pgoff = 0;
 			dso = vdso__dso_findnew(machine, thread);
 		} else
-			dso = __dsos__findnew(&machine->user_dsos, filename);
+			dso = __dsos__findnew(&machine->dsos, filename);
 
 		if (dso == NULL)
 			goto out_delete;
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e6a02b1ffd6d..d27edef5eb5b 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -256,8 +256,9 @@ static int kernel_get_module_dso(const char *module, struct dso **pdso)
 	int ret = 0;
 
 	if (module) {
-		list_for_each_entry(dso, &host_machine->kernel_dsos.head,
-				    node) {
+		list_for_each_entry(dso, &host_machine->dsos.head, node) {
+			if (!dso->kernel)
+				continue;
 			if (strncmp(dso->short_name + 1, module,
 				    dso->short_name_len - 2) == 0)
 				goto found;
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index fa10116a12ab..a93ba85509b2 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1031,11 +1031,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
 				}
 				curr_dso->symtab_type = dso->symtab_type;
 				map_groups__insert(kmaps, curr_map);
-				/*
-				 * The new DSO should go to the kernel DSOS
-				 */
-				dsos__add(&map->groups->machine->kernel_dsos,
-					  curr_dso);
+				dsos__add(&map->groups->machine->dsos, curr_dso);
 				dso__set_loaded(curr_dso, map->type);
 			} else
 				curr_dso = curr_map->dso;
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 5c7dd796979d..d3651b43e945 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -127,7 +127,7 @@ static struct dso *vdso__new(struct machine *machine, const char *short_name,
 
 	dso = dso__new(short_name);
 	if (dso != NULL) {
-		dsos__add(&machine->user_dsos, dso);
+		dsos__add(&machine->dsos, dso);
 		dso__set_long_name(dso, long_name, false);
 	}
 
@@ -236,7 +236,7 @@ static struct dso *vdso__findnew_compat(struct machine *machine,
 	const char *file_name;
 	struct dso *dso;
 
-	dso = dsos__find(&machine->user_dsos, vdso_file->dso_name, true);
+	dso = dsos__find(&machine->dsos, vdso_file->dso_name, true);
 	if (dso)
 		return dso;
 
@@ -299,7 +299,7 @@ struct dso *vdso__dso_findnew(struct machine *machine,
 		return dso;
 #endif
 
-	dso = dsos__find(&machine->user_dsos, DSO__NAME_VDSO, true);
+	dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
 	if (!dso) {
 		char *file;
 

From aa7cc2ae5ae69aff555793fbfcff514141bb23f3 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 29 May 2015 11:31:12 -0300
Subject: [PATCH 233/305] perf machine: Introduce machine__findnew_dso() method

Similar to machine__findnew_thread(), also prepping for refcounting and
locking, this time for struct dso instances.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-fv3tshv5o1413coh147lszjc@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/hists_common.c | 3 +--
 tools/perf/util/dso.c           | 2 +-
 tools/perf/util/header.c        | 2 +-
 tools/perf/util/machine.c       | 7 ++++++-
 tools/perf/util/machine.h       | 2 ++
 tools/perf/util/map.c           | 2 +-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index bcde1d27919c..915f60af6a0e 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -121,8 +121,7 @@ struct machine *setup_fake_machine(struct machines *machines)
 		size_t k;
 		struct dso *dso;
 
-		dso = __dsos__findnew(&machine->dsos,
-				      fake_symbols[i].dso_name);
+		dso = machine__findnew_dso(machine, fake_symbols[i].dso_name);
 		if (dso == NULL)
 			goto out;
 
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index ff040b0569d6..b335db3532a2 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -833,7 +833,7 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
 	/*
 	 * The kernel dso could be created by build_id processing.
 	 */
-	struct dso *dso = __dsos__findnew(&machine->dsos, name);
+	struct dso *dso = machine__findnew_dso(machine, name);
 
 	/*
 	 * We need to run this in all cases, since during the build_id
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index a900e9441fb5..851143a7988d 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1264,7 +1264,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 		goto out;
 	}
 
-	dso = __dsos__findnew(&machine->dsos, filename);
+	dso = machine__findnew_dso(machine, filename);
 	if (dso != NULL) {
 		char sbuild_id[BUILD_ID_SIZE * 2 + 1];
 
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index ffd31079d447..698da1da5168 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1157,7 +1157,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		}
 
 		if (kernel == NULL)
-			kernel = __dsos__findnew(&machine->dsos, kmmap_prefix);
+			kernel = machine__findnew_dso(machine, kmmap_prefix);
 		if (kernel == NULL)
 			goto out_problem;
 
@@ -1915,3 +1915,8 @@ int machine__get_kernel_start(struct machine *machine)
 	}
 	return err;
 }
+
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
+{
+	return __dsos__findnew(&machine->dsos, filename);
+}
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index aabca583e655..39a0ca06cbd8 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -154,6 +154,8 @@ static inline bool machine__is_host(struct machine *machine)
 struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid);
 
+struct dso *machine__findnew_dso(struct machine *machine, const char *filename);
+
 size_t machine__fprintf(struct machine *machine, FILE *fp);
 
 static inline
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 57ff0256c22c..d15e1e9dd2ae 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -180,7 +180,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 			pgoff = 0;
 			dso = vdso__dso_findnew(machine, thread);
 		} else
-			dso = __dsos__findnew(&machine->dsos, filename);
+			dso = machine__findnew_dso(machine, filename);
 
 		if (dso == NULL)
 			goto out_delete;

From 9a4388c711d07889217b19eaf63485122dec8817 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 29 May 2015 11:54:08 -0300
Subject: [PATCH 234/305] perf machine: Fix up vdso methods names

To make it consistent with the other dso lifetime routines.

For instance:

 struct dso *vdso__new(struct machine *machine, const char *short_name,
		        const char *long_name)

Becomes:

 struct dso *machine__addnew_vdso(struct machine *machine, const
				  char *short_name, const char *long_name)

Because:

1) There is no 'struct vdso' for us to have vdso__ prefixed routines.

2) Because it will not really just create a new instance of 'struct
   dso', it'll call dso__new() but it will also insert it into the
   DSO's list/rbtree, and we have a method name for that: 'addnew',
   just like we have dsos__addnew().

3) So it is really a 'struct machine' operation, it is the first
   argument, etc.

This way the place where this is used gets consistent:

                if (vdso) {
                        pgoff = 0;
-                       dso = vdso__dso_findnew(machine, thread);
+                       dso = machine__findnew_vdso(machine, thread);
                } else
                        dso = machine__findnew_dso(machine, filename);

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-r3w3tvh8exm9xfz3p4tz9qbz@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c |  2 +-
 tools/perf/util/map.c     |  2 +-
 tools/perf/util/vdso.c    | 18 +++++++++---------
 tools/perf/util/vdso.h    |  4 ++--
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 698da1da5168..2ed61f59d415 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -111,7 +111,7 @@ void machine__exit(struct machine *machine)
 {
 	map_groups__exit(&machine->kmaps);
 	dsos__delete(&machine->dsos);
-	vdso__exit(machine);
+	machine__exit_vdso(machine);
 	zfree(&machine->root_dir);
 	zfree(&machine->current_tid);
 	pthread_rwlock_destroy(&machine->threads_lock);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index d15e1e9dd2ae..365011c233a6 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -178,7 +178,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 
 		if (vdso) {
 			pgoff = 0;
-			dso = vdso__dso_findnew(machine, thread);
+			dso = machine__findnew_vdso(machine, thread);
 		} else
 			dso = machine__findnew_dso(machine, filename);
 
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index d3651b43e945..2e8f6886ca72 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -101,7 +101,7 @@ static char *get_file(struct vdso_file *vdso_file)
 	return vdso;
 }
 
-void vdso__exit(struct machine *machine)
+void machine__exit_vdso(struct machine *machine)
 {
 	struct vdso_info *vdso_info = machine->vdso_info;
 
@@ -120,8 +120,8 @@ void vdso__exit(struct machine *machine)
 	zfree(&machine->vdso_info);
 }
 
-static struct dso *vdso__new(struct machine *machine, const char *short_name,
-			     const char *long_name)
+static struct dso *machine__addnew_vdso(struct machine *machine, const char *short_name,
+					const char *long_name)
 {
 	struct dso *dso;
 
@@ -244,10 +244,10 @@ static struct dso *vdso__findnew_compat(struct machine *machine,
 	if (!file_name)
 		return NULL;
 
-	return vdso__new(machine, vdso_file->dso_name, file_name);
+	return machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
 }
 
-static int vdso__dso_findnew_compat(struct machine *machine,
+static int machine__findnew_vdso_compat(struct machine *machine,
 				    struct thread *thread,
 				    struct vdso_info *vdso_info,
 				    struct dso **dso)
@@ -281,8 +281,8 @@ static int vdso__dso_findnew_compat(struct machine *machine,
 
 #endif
 
-struct dso *vdso__dso_findnew(struct machine *machine,
-			      struct thread *thread __maybe_unused)
+struct dso *machine__findnew_vdso(struct machine *machine,
+				  struct thread *thread __maybe_unused)
 {
 	struct vdso_info *vdso_info;
 	struct dso *dso;
@@ -295,7 +295,7 @@ struct dso *vdso__dso_findnew(struct machine *machine,
 		return NULL;
 
 #if BITS_PER_LONG == 64
-	if (vdso__dso_findnew_compat(machine, thread, vdso_info, &dso))
+	if (machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
 		return dso;
 #endif
 
@@ -307,7 +307,7 @@ struct dso *vdso__dso_findnew(struct machine *machine,
 		if (!file)
 			return NULL;
 
-		dso = vdso__new(machine, DSO__NAME_VDSO, file);
+		dso = machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
 	}
 
 	return dso;
diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h
index d97da1616f0c..cdc4fabfc212 100644
--- a/tools/perf/util/vdso.h
+++ b/tools/perf/util/vdso.h
@@ -23,7 +23,7 @@ bool dso__is_vdso(struct dso *dso);
 struct machine;
 struct thread;
 
-struct dso *vdso__dso_findnew(struct machine *machine, struct thread *thread);
-void vdso__exit(struct machine *machine);
+struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread);
+void machine__exit_vdso(struct machine *machine);
 
 #endif /* __PERF_VDSO__ */

From d7a3d85e08477a979933a2bb3b525a8de99543c2 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 29 May 2015 17:42:58 +0200
Subject: [PATCH 235/305] perf build: Do not fail on missing Build file

Allow nesting into directories without Build file. Currently we force
include of the Build file, which fails the build when the Build file is
missing.

We already support empty *-in.o' objects if there's nothing in the
directory to be compiled, so we can just use it for missing Build file
cases.

Also adding this case under tests.

Reported-by: Rabin Vincent <rabin.vincent@axis.com>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rabin Vincent <rabin.vincent@axis.com>
Link: http://lkml.kernel.org/r/1432914178-24086-1-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/Makefile.build         | 2 +-
 tools/build/tests/ex/Build         | 1 +
 tools/build/tests/ex/empty2/README | 2 ++
 3 files changed, 4 insertions(+), 1 deletion(-)
 create mode 100644 tools/build/tests/ex/empty2/README

diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build
index 10df57237a66..69c35cf09cad 100644
--- a/tools/build/Makefile.build
+++ b/tools/build/Makefile.build
@@ -37,7 +37,7 @@ subdir-obj-y :=
 
 # Build definitions
 build-file := $(dir)/Build
-include $(build-file)
+-include $(build-file)
 
 quiet_cmd_flex  = FLEX     $@
 quiet_cmd_bison = BISON    $@
diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build
index 0e6c3e6767e6..70d876237c57 100644
--- a/tools/build/tests/ex/Build
+++ b/tools/build/tests/ex/Build
@@ -2,6 +2,7 @@ ex-y += ex.o
 ex-y += a.o
 ex-y += b.o
 ex-y += empty/
+ex-y += empty2/
 
 libex-y += c.o
 libex-y += d.o
diff --git a/tools/build/tests/ex/empty2/README b/tools/build/tests/ex/empty2/README
new file mode 100644
index 000000000000..2107cc5bf5a9
--- /dev/null
+++ b/tools/build/tests/ex/empty2/README
@@ -0,0 +1,2 @@
+This directory is left intentionally without Build file
+to test proper nesting into Build-less directories.

From ed426915900db3c58c410b8b38f6ff0e46bf6c96 Mon Sep 17 00:00:00 2001
From: Namhyung Kim <namhyung@kernel.org>
Date: Fri, 29 May 2015 21:53:44 +0900
Subject: [PATCH 236/305] perf tools: Make Ctrl-C stop processing on TUI

It was inconvenient that perf cannot be quit with SIGINT during
processing samples on TUI especially for large data files.

This was because the first argument of SLang_init_tty(), abort_char,
being 0.  The manual says it's the ascii value of the control character
that will be used to generate the interrupt signal [1].  Passing -1
means to use the default value (Ctrl-C).

However, after processing samples, Ctrl-C was used to in other cases as
well - like stepping back from annotate.  So recover the original
behavior after processing.

[1] http://jedsoft.org/slang/doc/html/cslang-6.html#ss6.1

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1432904024-13170-1-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/annotate.c | 4 ++++
 tools/perf/ui/browsers/hists.c    | 4 ++++
 tools/perf/ui/tui/setup.c         | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index e5250eb2dd57..acb0e23b138e 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -838,6 +838,10 @@ int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
 int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel,
 			     struct hist_browser_timer *hbt)
 {
+	/* reset abort key so that it can get Ctrl-C as a key */
+	SLang_reset_tty();
+	SLang_init_tty(0, 0, 0);
+
 	return map_symbol__tui_annotate(&he->ms, evsel, hbt);
 }
 
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index f981cb8f0158..e64893f2fd7f 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1741,6 +1741,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	if (browser == NULL)
 		return -1;
 
+	/* reset abort key so that it can get Ctrl-C as a key */
+	SLang_reset_tty();
+	SLang_init_tty(0, 0, 0);
+
 	if (min_pcnt) {
 		browser->min_pcnt = min_pcnt;
 		hist_browser__update_nr_entries(browser);
diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c
index b77e1d771363..60d1f29b4b50 100644
--- a/tools/perf/ui/tui/setup.c
+++ b/tools/perf/ui/tui/setup.c
@@ -129,7 +129,7 @@ int ui__init(void)
 	err = SLsmg_init_smg();
 	if (err < 0)
 		goto out;
-	err = SLang_init_tty(0, 0, 0);
+	err = SLang_init_tty(-1, 0, 0);
 	if (err < 0)
 		goto out;
 

From 6bb536cc4b06df70012cec6168cc1788e200bc9d Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 29 May 2015 09:45:47 +0000
Subject: [PATCH 237/305] perf probe: Fix segfault when glob matching function
 without debuginfo

Commit 4c859351226c920b227fec040a3b447f0d482af3 ("perf probe: Support
glob wildcards for function name") introduces segfault problems when
debuginfo is not available:

 # perf probe 'sys_w*'
  Added new events:
  Segmentation fault

The first problem resides in find_probe_trace_events_from_map(). In
that function, find_probe_functions() is called to match each symbol
against glob to find the number of matching functions, but still use
map__for_each_symbol_by_name() to find 'struct symbol' for matching
functions. Unfortunately, map__for_each_symbol_by_name() does
exact matching by searching in an rbtree.

It doesn't know glob matching, and not easy for it to support it because
it use rbtree based binary search, but we are unable to ensure all names
matched by the glob (any glob passed by user) reside in one subtree.

This patch drops map__for_each_symbol_by_name(). Since there is no
rbtree again, re-matching all symbols costs a lot. This patch avoid it
by saving all matching results into an array (syms).

The second problem is the lost of tp->realname. In
__add_probe_trace_events(), if pev->point.function is glob, the event
name should be set to tev->point.realname. This patch ensures its
existence by strdup sym->name instead of leaving a NULL pointer there.

After this patch:

 # perf probe 'sys_w*'
 Added new events:
   probe:sys_waitid     (on sys_w*)
   probe:sys_wait4      (on sys_w*)
   probe:sys_waitpid    (on sys_w*)
   probe:sys_write      (on sys_w*)
   probe:sys_writev     (on sys_w*)

 You can now use it in all perf tools, such as:

         perf record -e probe:sys_writev -aR sleep 1

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1432892747-232506-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 26 +++++++++++++++++++++-----
 1 file changed, 21 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d27edef5eb5b..e6f215b7a052 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2494,7 +2494,8 @@ close_out:
 	return ret;
 }
 
-static int find_probe_functions(struct map *map, char *name)
+static int find_probe_functions(struct map *map, char *name,
+				struct symbol **syms)
 {
 	int found = 0;
 	struct symbol *sym;
@@ -2504,8 +2505,11 @@ static int find_probe_functions(struct map *map, char *name)
 		return 0;
 
 	map__for_each_symbol(map, sym, tmp) {
-		if (strglobmatch(sym->name, name))
+		if (strglobmatch(sym->name, name)) {
 			found++;
+			if (syms && found < probe_conf.max_probes)
+				syms[found - 1] = sym;
+		}
 	}
 
 	return found;
@@ -2528,11 +2532,12 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	struct map *map = NULL;
 	struct ref_reloc_sym *reloc_sym = NULL;
 	struct symbol *sym;
+	struct symbol **syms = NULL;
 	struct probe_trace_event *tev;
 	struct perf_probe_point *pp = &pev->point;
 	struct probe_trace_point *tp;
 	int num_matched_functions;
-	int ret, i;
+	int ret, i, j;
 
 	map = get_target_map(pev->target, pev->uprobes);
 	if (!map) {
@@ -2540,11 +2545,17 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 		goto out;
 	}
 
+	syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes);
+	if (!syms) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
 	/*
 	 * Load matched symbols: Since the different local symbols may have
 	 * same name but different addresses, this lists all the symbols.
 	 */
-	num_matched_functions = find_probe_functions(map, pp->function);
+	num_matched_functions = find_probe_functions(map, pp->function, syms);
 	if (num_matched_functions == 0) {
 		pr_err("Failed to find symbol %s in %s\n", pp->function,
 			pev->target ? : "kernel");
@@ -2575,7 +2586,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 
 	ret = 0;
 
-	map__for_each_symbol_by_name(map, pp->function, sym) {
+	for (j = 0; j < num_matched_functions; j++) {
+		sym = syms[j];
+
 		tev = (*tevs) + ret;
 		tp = &tev->point;
 		if (ret == num_matched_functions) {
@@ -2599,6 +2612,8 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 			tp->symbol = strdup_or_goto(sym->name, nomem_out);
 			tp->offset = pp->offset;
 		}
+		tp->realname = strdup_or_goto(sym->name, nomem_out);
+
 		tp->retprobe = pp->retprobe;
 		if (pev->target)
 			tev->point.module = strdup_or_goto(pev->target,
@@ -2629,6 +2644,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 
 out:
 	put_target_map(map, pev->uprobes);
+	free(syms);
 	return ret;
 
 nomem_out:

From 9ecae065f3295d7ea5e1c5492170a848ca255a46 Mon Sep 17 00:00:00 2001
From: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Date: Sun, 31 May 2015 11:36:23 +0530
Subject: [PATCH 238/305] perf tools: Remove newline char when reading event
 scale and unit

The <fd979c013207> commit intruduced the perf_event_sysfs_show function
to display the event_str value of an attr in kernel/event/core.c. But
the function returns the value with a newline char.

So, if a event also carries a event.unit file, when printing the counter
data perf tool formatting goes for a spin.

That is, because of the event unit, event name is printed in the newline
because of perf_event_sysfs_show returns with a newline char.

Now fixing perf core will break API, hencing proposing a fix in the perf tool.

Signed-off-by: Madhavan Srinivasan <maddy@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1433052383-21802-1-git-send-email-maddy@linux.vnet.ibm.com
[ Add spaces around operators ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/pmu.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
index 5d3ab7c8ceaf..0fcc624eb767 100644
--- a/tools/perf/util/pmu.c
+++ b/tools/perf/util/pmu.c
@@ -112,7 +112,11 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char *
 	if (sret < 0)
 		goto error;
 
-	scale[sret] = '\0';
+	if (scale[sret - 1] == '\n')
+		scale[sret - 1] = '\0';
+	else
+		scale[sret] = '\0';
+
 	/*
 	 * save current locale
 	 */
@@ -154,7 +158,10 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n
 
 	close(fd);
 
-	alias->unit[sret] = '\0';
+	if (alias->unit[sret - 1] == '\n')
+		alias->unit[sret - 1] = '\0';
+	else
+		alias->unit[sret] = '\0';
 
 	return 0;
 error:

From 0443f36b0de026143a78c858aac773572f7dd5db Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 2 Jun 2015 11:28:13 -0300
Subject: [PATCH 239/305] perf machine: Fix the search for the kernel DSO on
 the unified list

When unifying the user_dsos and kernel_dsos a bug was introduced by
inverting the check for dso->kernel, fix it.

Fixes: 3d39ac538629 ("perf machine: No need to have two DSOs lists")
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-xnrnq0kams3s2z9ek1wjb506@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 2ed61f59d415..4e29e80932e5 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1149,7 +1149,7 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		struct dso *dso;
 
 		list_for_each_entry(dso, &machine->dsos.head, node) {
-			if (dso->kernel && is_kernel_module(dso->long_name))
+			if (!dso->kernel || is_kernel_module(dso->long_name))
 				continue;
 
 			kernel = dso;

From 37fbe0a4a0a9afe3b0fe843a4775a85ccf430deb Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Mon, 1 Jun 2015 07:37:47 +0000
Subject: [PATCH 240/305] perf tools: Move linux/kernel.h to tools/include

This patch moves kernel.h from tools/perf/util/include/linux/kernel.h
to tools/include/linux/kernel.h to enable other libraries use macros in
it, like libbpf which will be introduced by further patches.

MANIFEST is also updated for 'make perf-*-src-pkg'.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1433144296-74992-2-git-send-email-wangnan0@huawei.com
[ Fixed up the ifdef guard to match other entries in tools/include/linux ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/{perf/util => }/include/linux/kernel.h | 4 ++--
 tools/perf/MANIFEST                          | 1 +
 2 files changed, 3 insertions(+), 2 deletions(-)
 rename tools/{perf/util => }/include/linux/kernel.h (97%)

diff --git a/tools/perf/util/include/linux/kernel.h b/tools/include/linux/kernel.h
similarity index 97%
rename from tools/perf/util/include/linux/kernel.h
rename to tools/include/linux/kernel.h
index 09e8e7aea7c6..76df53539c2a 100644
--- a/tools/perf/util/include/linux/kernel.h
+++ b/tools/include/linux/kernel.h
@@ -1,5 +1,5 @@
-#ifndef PERF_LINUX_KERNEL_H_
-#define PERF_LINUX_KERNEL_H_
+#ifndef __TOOLS_LINUX_KERNEL_H
+#define __TOOLS_LINUX_KERNEL_H
 
 #include <stdarg.h>
 #include <stdio.h>
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index a83cf75164e1..fce4a47347aa 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -40,6 +40,7 @@ tools/include/linux/bitops.h
 tools/include/linux/compiler.h
 tools/include/linux/export.h
 tools/include/linux/hash.h
+tools/include/linux/kernel.h
 tools/include/linux/log2.h
 tools/include/linux/types.h
 include/asm-generic/bitops/arch_hweight.h

From 4fc62a89dc607cee4f75f6dbb102cd6215fd0d64 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Mon, 1 Jun 2015 07:37:48 +0000
Subject: [PATCH 241/305] tools: Move
 tools/perf/util/include/linux/{list.h,poison.h} to tools/include

This patch moves list.h from tools/perf/util/include/linux/list.h to
tools/include/linux/list.h to enable other libraries use macros in it,
like libbpf which will be introduced by further patches. Since list.h
depend on poison.h, poison.h is also moved.

Both file use relative path, so one '..' is removed for each header to
make them suit for new directory.

MANIFEST is also updated for 'make perf-*-src-pkg'.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Brendan Gregg <brendan.d.gregg@gmail.com>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: David Ahern <dsahern@gmail.com>
Cc: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kaixu Xia <xiakaixu@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1433144296-74992-3-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/{perf/util => }/include/linux/list.h | 6 +++---
 tools/include/linux/poison.h               | 1 +
 tools/perf/MANIFEST                        | 2 ++
 tools/perf/util/include/linux/poison.h     | 1 -
 4 files changed, 6 insertions(+), 4 deletions(-)
 rename tools/{perf/util => }/include/linux/list.h (90%)
 create mode 100644 tools/include/linux/poison.h
 delete mode 100644 tools/perf/util/include/linux/poison.h

diff --git a/tools/perf/util/include/linux/list.h b/tools/include/linux/list.h
similarity index 90%
rename from tools/perf/util/include/linux/list.h
rename to tools/include/linux/list.h
index 76ddbc726343..76b014c96893 100644
--- a/tools/perf/util/include/linux/list.h
+++ b/tools/include/linux/list.h
@@ -1,10 +1,10 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 
-#include "../../../../include/linux/list.h"
+#include "../../../include/linux/list.h"
 
-#ifndef PERF_LIST_H
-#define PERF_LIST_H
+#ifndef TOOLS_LIST_H
+#define TOOLS_LIST_H
 /**
  * list_del_range - deletes range of entries from list.
  * @begin: first element in the range to delete from the list.
diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h
new file mode 100644
index 000000000000..0c27bdf14233
--- /dev/null
+++ b/tools/include/linux/poison.h
@@ -0,0 +1 @@
+#include "../../../include/linux/poison.h"
diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index fce4a47347aa..a0bdd6124583 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -41,7 +41,9 @@ tools/include/linux/compiler.h
 tools/include/linux/export.h
 tools/include/linux/hash.h
 tools/include/linux/kernel.h
+tools/include/linux/list.h
 tools/include/linux/log2.h
+tools/include/linux/poison.h
 tools/include/linux/types.h
 include/asm-generic/bitops/arch_hweight.h
 include/asm-generic/bitops/const_hweight.h
diff --git a/tools/perf/util/include/linux/poison.h b/tools/perf/util/include/linux/poison.h
deleted file mode 100644
index fef6dbc9ce13..000000000000
--- a/tools/perf/util/include/linux/poison.h
+++ /dev/null
@@ -1 +0,0 @@
-#include "../../../../include/linux/poison.h"

From 1f121b03d058dd07199d8924373d3c52a207f63b Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 3 Jun 2015 08:52:21 +0000
Subject: [PATCH 242/305] perf tools: Deal with kernel module names in '[]'
 correctly

Before patch ba92732e9808 ('perf kmaps: Check kmaps to make code more
robust'), 'perf report' and 'perf annotate' will segfault if trace data
contains kernel module information like this:

 # perf report -D -i ./perf.data
 ...
 0 0 0x188 [0x50]: PERF_RECORD_MMAP -1/0: [0xffffffbff1018000(0xf068000) @ 0]: x [test_module]
 ...

 # perf report -i ./perf.data --objdump=/path/to/objdump --kallsyms=/path/to/kallsyms

 perf: Segmentation fault
 -------- backtrace --------
 /path/to/perf[0x503478]
 /lib64/libc.so.6(+0x3545f)[0x7fb201f3745f]
 /path/to/perf[0x499b56]
 /path/to/perf(dso__load_kallsyms+0x13c)[0x49b56c]
 /path/to/perf(dso__load+0x72e)[0x49c21e]
 /path/to/perf(map__load+0x6e)[0x4ae9ee]
 /path/to/perf(thread__find_addr_map+0x24c)[0x47deec]
 /path/to/perf(perf_event__preprocess_sample+0x88)[0x47e238]
 /path/to/perf[0x43ad02]
 /path/to/perf[0x4b55bc]
 /path/to/perf(ordered_events__flush+0xca)[0x4b57ea]
 /path/to/perf[0x4b1a01]
 /path/to/perf(perf_session__process_events+0x3be)[0x4b428e]
 /path/to/perf(cmd_report+0xf11)[0x43bfc1]
 /path/to/perf[0x474702]
 /path/to/perf(main+0x5f5)[0x42de95]
 /lib64/libc.so.6(__libc_start_main+0xf4)[0x7fb201f23bd4]
 /path/to/perf[0x42dfc4]

This is because __kmod_path__parse treats '[' leading names as kernel
name instead of names of kernel module.

If perf.data contains build information and the buildid of such modules
can be found, the dso->kernel of it will be set to DSO_TYPE_KERNEL by
__event_process_build_id(), not kernel module.

It will then be passed to dso__load() -> dso__load_kernel_sym() ->
dso__load_kcore() if --kallsyms is provided.

The refered patch adds NULL pointer checker to avoid segfault. However,
such kernel modules are still processed incorrectly.

This patch fixes __kmod_path__parse, makes it treat names like
'[test_module]' as kernel modules.

kmod-path.c is also update to reflect the above changes.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1433321541-170245-1-git-send-email-wangnan0@huawei.com
[ Fixed the merged with 0443f36b0de0 ("perf machine: Fix the search
  for the kernel DSO on the unified list" ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/kmod-path.c | 72 ++++++++++++++++++++++++++++++++++++
 tools/perf/util/dso.c        | 45 ++++++++++++++++++++--
 tools/perf/util/dso.h        |  2 +-
 tools/perf/util/header.c     |  8 ++--
 tools/perf/util/machine.c    | 22 ++++++++++-
 5 files changed, 139 insertions(+), 10 deletions(-)

diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c
index e8d7cbb9320c..08c433b4bf4f 100644
--- a/tools/perf/tests/kmod-path.c
+++ b/tools/perf/tests/kmod-path.c
@@ -34,9 +34,21 @@ static int test(const char *path, bool alloc_name, bool alloc_ext,
 	return 0;
 }
 
+static int test_is_kernel_module(const char *path, int cpumode, bool expect)
+{
+	TEST_ASSERT_VAL("is_kernel_module",
+			(!!is_kernel_module(path, cpumode)) == (!!expect));
+	pr_debug("%s (cpumode: %d) - is_kernel_module: %s\n",
+			path, cpumode, expect ? "true" : "false");
+	return 0;
+}
+
 #define T(path, an, ae, k, c, n, e) \
 	TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e))
 
+#define M(path, c, e) \
+	TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e))
+
 int test__kmod_path__parse(void)
 {
 	/* path                alloc_name  alloc_ext   kmod  comp   name     ext */
@@ -44,30 +56,90 @@ int test__kmod_path__parse(void)
 	T("/xxxx/xxxx/x-x.ko", false     , true      , true, false, NULL   , NULL);
 	T("/xxxx/xxxx/x-x.ko", true      , false     , true, false, "[x_x]", NULL);
 	T("/xxxx/xxxx/x-x.ko", false     , false     , true, false, NULL   , NULL);
+	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true);
+	M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false);
 
 	/* path                alloc_name  alloc_ext   kmod  comp  name   ext */
 	T("/xxxx/xxxx/x.ko.gz", true     , true      , true, true, "[x]", "gz");
 	T("/xxxx/xxxx/x.ko.gz", false    , true      , true, true, NULL , "gz");
 	T("/xxxx/xxxx/x.ko.gz", true     , false     , true, true, "[x]", NULL);
 	T("/xxxx/xxxx/x.ko.gz", false    , false     , true, true, NULL , NULL);
+	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+	M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false);
 
 	/* path              alloc_name  alloc_ext  kmod   comp  name    ext */
 	T("/xxxx/xxxx/x.gz", true      , true     , false, true, "x.gz" ,"gz");
 	T("/xxxx/xxxx/x.gz", false     , true     , false, true, NULL   ,"gz");
 	T("/xxxx/xxxx/x.gz", true      , false    , false, true, "x.gz" , NULL);
 	T("/xxxx/xxxx/x.gz", false     , false    , false, true, NULL   , NULL);
+	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false);
+	M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false);
 
 	/* path   alloc_name  alloc_ext  kmod   comp  name     ext */
 	T("x.gz", true      , true     , false, true, "x.gz", "gz");
 	T("x.gz", false     , true     , false, true, NULL  , "gz");
 	T("x.gz", true      , false    , false, true, "x.gz", NULL);
 	T("x.gz", false     , false    , false, true, NULL  , NULL);
+	M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("x.gz", PERF_RECORD_MISC_KERNEL, false);
+	M("x.gz", PERF_RECORD_MISC_USER, false);
 
 	/* path      alloc_name  alloc_ext  kmod  comp  name  ext */
 	T("x.ko.gz", true      , true     , true, true, "[x]", "gz");
 	T("x.ko.gz", false     , true     , true, true, NULL , "gz");
 	T("x.ko.gz", true      , false    , true, true, "[x]", NULL);
 	T("x.ko.gz", false     , false    , true, true, NULL , NULL);
+	M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true);
+	M("x.ko.gz", PERF_RECORD_MISC_USER, false);
+
+	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
+	T("[test_module]", true      , true     , true, false, "[test_module]", NULL);
+	T("[test_module]", false     , true     , true, false, NULL           , NULL);
+	T("[test_module]", true      , false    , true, false, "[test_module]", NULL);
+	T("[test_module]", false     , false    , true, false, NULL           , NULL);
+	M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("[test_module]", PERF_RECORD_MISC_KERNEL, true);
+	M("[test_module]", PERF_RECORD_MISC_USER, false);
+
+	/* path            alloc_name  alloc_ext  kmod  comp   name             ext */
+	T("[test.module]", true      , true     , true, false, "[test.module]", NULL);
+	T("[test.module]", false     , true     , true, false, NULL           , NULL);
+	T("[test.module]", true      , false    , true, false, "[test.module]", NULL);
+	T("[test.module]", false     , false    , true, false, NULL           , NULL);
+	M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true);
+	M("[test.module]", PERF_RECORD_MISC_KERNEL, true);
+	M("[test.module]", PERF_RECORD_MISC_USER, false);
+
+	/* path     alloc_name  alloc_ext  kmod   comp   name      ext */
+	T("[vdso]", true      , true     , false, false, "[vdso]", NULL);
+	T("[vdso]", false     , true     , false, false, NULL    , NULL);
+	T("[vdso]", true      , false    , false, false, "[vdso]", NULL);
+	T("[vdso]", false     , false    , false, false, NULL    , NULL);
+	M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[vdso]", PERF_RECORD_MISC_KERNEL, false);
+	M("[vdso]", PERF_RECORD_MISC_USER, false);
+
+	/* path         alloc_name  alloc_ext  kmod   comp   name          ext */
+	T("[vsyscall]", true      , true     , false, false, "[vsyscall]", NULL);
+	T("[vsyscall]", false     , true     , false, false, NULL        , NULL);
+	T("[vsyscall]", true      , false    , false, false, "[vsyscall]", NULL);
+	T("[vsyscall]", false     , false    , false, false, NULL        , NULL);
+	M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false);
+	M("[vsyscall]", PERF_RECORD_MISC_USER, false);
+
+	/* path                alloc_name  alloc_ext  kmod   comp   name      ext */
+	T("[kernel.kallsyms]", true      , true     , false, false, "[kernel.kallsyms]", NULL);
+	T("[kernel.kallsyms]", false     , true     , false, false, NULL               , NULL);
+	T("[kernel.kallsyms]", true      , false    , false, false, "[kernel.kallsyms]", NULL);
+	T("[kernel.kallsyms]", false     , false    , false, false, NULL               , NULL);
+	M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false);
+	M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false);
+	M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false);
 
 	return 0;
 }
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index b335db3532a2..5ec9e892c89b 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -166,12 +166,28 @@ bool is_supported_compression(const char *ext)
 	return false;
 }
 
-bool is_kernel_module(const char *pathname)
+bool is_kernel_module(const char *pathname, int cpumode)
 {
 	struct kmod_path m;
+	int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	if (kmod_path__parse(&m, pathname))
-		return NULL;
+	WARN_ONCE(mode != cpumode,
+		  "Internal error: passing unmasked cpumode (%x) to is_kernel_module",
+		  cpumode);
+
+	switch (mode) {
+	case PERF_RECORD_MISC_USER:
+	case PERF_RECORD_MISC_HYPERVISOR:
+	case PERF_RECORD_MISC_GUEST_USER:
+		return false;
+	/* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */
+	default:
+		if (kmod_path__parse(&m, pathname)) {
+			pr_err("Failed to check whether %s is a kernel module or not. Assume it is.",
+					pathname);
+			return true;
+		}
+	}
 
 	return m.kmod;
 }
@@ -215,12 +231,33 @@ int __kmod_path__parse(struct kmod_path *m, const char *path,
 {
 	const char *name = strrchr(path, '/');
 	const char *ext  = strrchr(path, '.');
+	bool is_simple_name = false;
 
 	memset(m, 0x0, sizeof(*m));
 	name = name ? name + 1 : path;
 
+	/*
+	 * '.' is also a valid character for module name. For example:
+	 * [aaa.bbb] is a valid module name. '[' should have higher
+	 * priority than '.ko' suffix.
+	 *
+	 * The kernel names are from machine__mmap_name. Such
+	 * name should belong to kernel itself, not kernel module.
+	 */
+	if (name[0] == '[') {
+		is_simple_name = true;
+		if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) ||
+		    (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) ||
+		    (strncmp(name, "[vdso]", 6) == 0) ||
+		    (strncmp(name, "[vsyscall]", 10) == 0)) {
+			m->kmod = false;
+
+		} else
+			m->kmod = true;
+	}
+
 	/* No extension, just return name. */
-	if (ext == NULL) {
+	if ((ext == NULL) || is_simple_name) {
 		if (alloc_name) {
 			m->name = strdup(name);
 			return m->name ? 0 : -ENOMEM;
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index 24a507a54147..ba2d90ed881f 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -220,7 +220,7 @@ char dso__symtab_origin(const struct dso *dso);
 int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type,
 				   char *root_dir, char *filename, size_t size);
 bool is_supported_compression(const char *ext);
-bool is_kernel_module(const char *pathname);
+bool is_kernel_module(const char *pathname, int cpumode);
 bool decompress_to_file(const char *ext, const char *filename, int output_fd);
 bool dso__needs_decompress(struct dso *dso);
 
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index 851143a7988d..ac5aaaeed7ff 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1239,7 +1239,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 {
 	int err = -1;
 	struct machine *machine;
-	u16 misc;
+	u16 cpumode;
 	struct dso *dso;
 	enum dso_kernel_type dso_type;
 
@@ -1247,9 +1247,9 @@ static int __event_process_build_id(struct build_id_event *bev,
 	if (!machine)
 		goto out;
 
-	misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+	cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
 
-	switch (misc) {
+	switch (cpumode) {
 	case PERF_RECORD_MISC_KERNEL:
 		dso_type = DSO_TYPE_KERNEL;
 		break;
@@ -1270,7 +1270,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 
 		dso__set_build_id(dso, &bev->build_id);
 
-		if (!is_kernel_module(filename))
+		if (!is_kernel_module(filename, cpumode))
 			dso->kernel = dso_type;
 
 		build_id__sprintf(dso->build_id, sizeof(dso->build_id),
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 4e29e80932e5..9e02c86f39f5 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1149,9 +1149,29 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		struct dso *dso;
 
 		list_for_each_entry(dso, &machine->dsos.head, node) {
-			if (!dso->kernel || is_kernel_module(dso->long_name))
+
+			/*
+			 * The cpumode passed to is_kernel_module is not the
+			 * cpumode of *this* event. If we insist on passing
+			 * correct cpumode to is_kernel_module, we should
+			 * record the cpumode when we adding this dso to the
+			 * linked list.
+			 *
+			 * However we don't really need passing correct
+			 * cpumode.  We know the correct cpumode must be kernel
+			 * mode (if not, we should not link it onto kernel_dsos
+			 * list).
+			 *
+			 * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN.
+			 * is_kernel_module() treats it as a kernel cpumode.
+			 */
+
+			if (!dso->kernel ||
+			    is_kernel_module(dso->long_name,
+					     PERF_RECORD_MISC_CPUMODE_UNKNOWN))
 				continue;
 
+
 			kernel = dso;
 			break;
 		}

From c9fdfa14c3792c0160849c484e83aa57afd80ccc Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 14 May 2015 23:09:58 +0200
Subject: [PATCH 243/305] perf: add new PERF_SAMPLE_BRANCH_IND_JUMP branch
 sample type

This patch adds a new branch_sample_type flag to enable
filtering branch sampling to indirect jumps. The support
is subject to hardware or kernel software support on each
architecture.

Filtering on indirect jump is useful to study the targets
of the jump.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: dsahern@gmail.com
Cc: jolsa@redhat.com
Cc: kan.liang@intel.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/1431637800-31061-2-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/uapi/linux/perf_event.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 309211b3eb67..c4622f1ce046 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -167,6 +167,7 @@ enum perf_branch_sample_type_shift {
 	PERF_SAMPLE_BRANCH_COND_SHIFT		= 10, /* conditional branches */
 
 	PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT	= 11, /* call/ret stack */
+	PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT	= 12, /* indirect jumps */
 
 	PERF_SAMPLE_BRANCH_MAX_SHIFT		/* non-ABI */
 };
@@ -186,6 +187,7 @@ enum perf_branch_sample_type {
 	PERF_SAMPLE_BRANCH_COND		= 1U << PERF_SAMPLE_BRANCH_COND_SHIFT,
 
 	PERF_SAMPLE_BRANCH_CALL_STACK	= 1U << PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT,
+	PERF_SAMPLE_BRANCH_IND_JUMP	= 1U << PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT,
 
 	PERF_SAMPLE_BRANCH_MAX		= 1U << PERF_SAMPLE_BRANCH_MAX_SHIFT,
 };

From 7b74cfb2ecb4d56a25c89cdb561e4926db85feb1 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 14 May 2015 23:09:59 +0200
Subject: [PATCH 244/305] perf/x86/intel: add support for
 PERF_SAMPLE_BRANCH_IND_JUMP

This patch enables support for branch sampling filter
for indirect jumps (IND_JUMP). It enables LBR IND_JMP
filtering where available. There is also software filtering
support.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: dsahern@gmail.com
Cc: jolsa@redhat.com
Cc: kan.liang@intel.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/1431637800-31061-3-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_lbr.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 94e5b506caa6..201e16f6655a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -96,6 +96,7 @@ enum {
 	X86_BR_NO_TX		= 1 << 14,/* not in transaction */
 	X86_BR_ZERO_CALL	= 1 << 15,/* zero length call */
 	X86_BR_CALL_STACK	= 1 << 16,/* call stack */
+	X86_BR_IND_JMP		= 1 << 17,/* indirect jump */
 };
 
 #define X86_BR_PLM (X86_BR_USER | X86_BR_KERNEL)
@@ -113,6 +114,7 @@ enum {
 	 X86_BR_IRQ	 |\
 	 X86_BR_ABORT	 |\
 	 X86_BR_IND_CALL |\
+	 X86_BR_IND_JMP  |\
 	 X86_BR_ZERO_CALL)
 
 #define X86_BR_ALL (X86_BR_PLM | X86_BR_ANY)
@@ -523,6 +525,9 @@ static int intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
 			X86_BR_CALL_STACK;
 	}
 
+	if (br_type & PERF_SAMPLE_BRANCH_IND_JUMP)
+		mask |= X86_BR_IND_JMP;
+
 	/*
 	 * stash actual user request into reg, it may
 	 * be used by fixup code for some CPU
@@ -736,7 +741,7 @@ static int branch_type(unsigned long from, unsigned long to, int abort)
 			break;
 		case 4:
 		case 5:
-			ret = X86_BR_JMP;
+			ret = X86_BR_IND_JMP;
 			break;
 		}
 		break;
@@ -844,6 +849,7 @@ static const int nhm_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	 */
 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT] = LBR_IND_CALL | LBR_IND_JMP,
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]     = LBR_JCC,
+	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT] = LBR_IND_JMP,
 };
 
 static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -856,6 +862,7 @@ static const int snb_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 						| LBR_FAR,
 	[PERF_SAMPLE_BRANCH_IND_CALL_SHIFT]	= LBR_IND_CALL,
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
+	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
 };
 
 static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
@@ -870,6 +877,7 @@ static const int hsw_lbr_sel_map[PERF_SAMPLE_BRANCH_MAX_SHIFT] = {
 	[PERF_SAMPLE_BRANCH_COND_SHIFT]		= LBR_JCC,
 	[PERF_SAMPLE_BRANCH_CALL_STACK_SHIFT]	= LBR_REL_CALL | LBR_IND_CALL
 						| LBR_RETURN | LBR_CALL_STACK,
+	[PERF_SAMPLE_BRANCH_IND_JUMP_SHIFT]	= LBR_IND_JMP,
 };
 
 /* core */

From 5b68164d6a1fdbe02b30bd777d1f686c6d901f28 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Thu, 14 May 2015 23:10:00 +0200
Subject: [PATCH 245/305] perf record: Add support for sampling indirect jumps

This patch adds a new branch sampling type support for indirect jumps:

  perf record -j ind_jmp .......

It enables analysis of indirect jumps targets. It requires kernel and
possibly hardware support to operate correctly.

Signed-off-by: Stephane Eranian <eranian@google.com>
[ Fixup against: f00898f4e20b (perf tools: Move branch option parsing to own file) ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Andi Kleen <ak@linux.intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@redhat.com
Cc: dsahern@gmail.com
Cc: jolsa@redhat.com
Cc: kan.liang@intel.com
Cc: namhyung@kernel.org
Link: http://lkml.kernel.org/r/1431637800-31061-4-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/util/parse-branch-options.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c
index 9d999436658f..a3b1e13a05c0 100644
--- a/tools/perf/util/parse-branch-options.c
+++ b/tools/perf/util/parse-branch-options.c
@@ -26,6 +26,7 @@ static const struct branch_mode branch_modes[] = {
 	BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX),
 	BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX),
 	BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND),
+	BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP),
 	BRANCH_END
 };
 

From 851559e35fd5ab637783ba395e55edd50f761229 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 May 2015 15:33:47 -0400
Subject: [PATCH 246/305] perf/x86/intel: Use the PEBS auto reload mechanism
 when possible

When a fixed period is specified, this patch makes perf use the PEBS
auto reload mechanism. This makes normal profiling faster, because
it avoids one costly MSR write in the PMI handler.

However, the reset value will be loaded by hardware assist. There is a
small delay compared to the previous non-auto-reload mechanism. The
delay time is arbitrary, but very small. The assist cost is 400-800
cycles, assuming common cases with everything cached. The minimum period
the patch currently uses is 10000. In that extreme case it can be ~10%
if cycles are used.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1430940834-8964-2-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.c          | 15 +++++++++------
 arch/x86/kernel/cpu/perf_event.h          |  1 +
 arch/x86/kernel/cpu/perf_event_intel.c    |  8 ++++++--
 arch/x86/kernel/cpu/perf_event_intel_ds.c |  7 +++++++
 4 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index dbe3328f8ad7..9560d0fc6fa6 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1094,13 +1094,16 @@ int x86_perf_event_set_period(struct perf_event *event)
 
 	per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
 
-	/*
-	 * The hw event starts counting from this event offset,
-	 * mark it to be able to extra future deltas:
-	 */
-	local64_set(&hwc->prev_count, (u64)-left);
+	if (!(hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) ||
+	    local64_read(&hwc->prev_count) != (u64)-left) {
+		/*
+		 * The hw event starts counting from this event offset,
+		 * mark it to be able to extra future deltas:
+		 */
+		local64_set(&hwc->prev_count, (u64)-left);
 
-	wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+		wrmsrl(hwc->event_base, (u64)(-left) & x86_pmu.cntval_mask);
+	}
 
 	/*
 	 * Due to erratum on certan cpu we need
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 89e6cd61e6ae..7a3f0fdd2fbd 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -75,6 +75,7 @@ struct event_constraint {
 #define PERF_X86_EVENT_DYNAMIC		0x0080 /* dynamic alloc'd constraint */
 #define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
 #define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */
+#define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */
 
 
 struct amd_nb {
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 74f19d9268bb..17628930a80e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2260,8 +2260,12 @@ static int intel_pmu_hw_config(struct perf_event *event)
 	if (ret)
 		return ret;
 
-	if (event->attr.precise_ip && x86_pmu.pebs_aliases)
-		x86_pmu.pebs_aliases(event);
+	if (event->attr.precise_ip) {
+		if (!event->attr.freq)
+			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+		if (x86_pmu.pebs_aliases)
+			x86_pmu.pebs_aliases(event);
+	}
 
 	if (needs_branch_stack(event)) {
 		ret = intel_pmu_setup_lbr_filter(event);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 7f73b3553e2e..4802d5dec222 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -688,6 +688,7 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
+	struct debug_store *ds = cpuc->ds;
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
@@ -697,6 +698,12 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 		cpuc->pebs_enabled |= 1ULL << (hwc->idx + 32);
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled |= 1ULL << 63;
+
+	/* Use auto-reload if possible to save a MSR write in the PMI */
+	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
+		ds->pebs_event_reset[hwc->idx] =
+			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
+	}
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)

From 43cf76312faefed098c057082abac8a3d521e1dc Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 May 2015 15:33:48 -0400
Subject: [PATCH 247/305] perf/x86/intel: Introduce setup_pebs_sample_data()

Move code that sets up the PEBS sample data to a separate function.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1430940834-8964-3-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 93 +++++++++++++----------
 1 file changed, 51 insertions(+), 42 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 4802d5dec222..a5fe561c4902 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -853,8 +853,10 @@ static inline u64 intel_hsw_transaction(struct pebs_record_hsw *pebs)
 	return txn;
 }
 
-static void __intel_pmu_pebs_event(struct perf_event *event,
-				   struct pt_regs *iregs, void *__pebs)
+static void setup_pebs_sample_data(struct perf_event *event,
+				   struct pt_regs *iregs, void *__pebs,
+				   struct perf_sample_data *data,
+				   struct pt_regs *regs)
 {
 #define PERF_X86_EVENT_PEBS_HSW_PREC \
 		(PERF_X86_EVENT_PEBS_ST_HSW | \
@@ -866,30 +868,25 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	 */
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct pebs_record_hsw *pebs = __pebs;
-	struct perf_sample_data data;
-	struct pt_regs regs;
 	u64 sample_type;
 	int fll, fst, dsrc;
 	int fl = event->hw.flags;
 
-	if (!intel_pmu_save_and_restart(event))
-		return;
-
 	sample_type = event->attr.sample_type;
 	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
 
 	fll = fl & PERF_X86_EVENT_PEBS_LDLAT;
 	fst = fl & (PERF_X86_EVENT_PEBS_ST | PERF_X86_EVENT_PEBS_HSW_PREC);
 
-	perf_sample_data_init(&data, 0, event->hw.last_period);
+	perf_sample_data_init(data, 0, event->hw.last_period);
 
-	data.period = event->hw.last_period;
+	data->period = event->hw.last_period;
 
 	/*
 	 * Use latency for weight (only avail with PEBS-LL)
 	 */
 	if (fll && (sample_type & PERF_SAMPLE_WEIGHT))
-		data.weight = pebs->lat;
+		data->weight = pebs->lat;
 
 	/*
 	 * data.data_src encodes the data source
@@ -902,7 +899,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 			val = precise_datala_hsw(event, pebs->dse);
 		else if (fst)
 			val = precise_store_data(pebs->dse);
-		data.data_src.val = val;
+		data->data_src.val = val;
 	}
 
 	/*
@@ -915,58 +912,70 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 	 * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly.
 	 * A possible PERF_SAMPLE_REGS will have to transfer all regs.
 	 */
-	regs = *iregs;
-	regs.flags = pebs->flags;
-	set_linear_ip(&regs, pebs->ip);
-	regs.bp = pebs->bp;
-	regs.sp = pebs->sp;
+	*regs = *iregs;
+	regs->flags = pebs->flags;
+	set_linear_ip(regs, pebs->ip);
+	regs->bp = pebs->bp;
+	regs->sp = pebs->sp;
 
 	if (sample_type & PERF_SAMPLE_REGS_INTR) {
-		regs.ax = pebs->ax;
-		regs.bx = pebs->bx;
-		regs.cx = pebs->cx;
-		regs.dx = pebs->dx;
-		regs.si = pebs->si;
-		regs.di = pebs->di;
-		regs.bp = pebs->bp;
-		regs.sp = pebs->sp;
+		regs->ax = pebs->ax;
+		regs->bx = pebs->bx;
+		regs->cx = pebs->cx;
+		regs->dx = pebs->dx;
+		regs->si = pebs->si;
+		regs->di = pebs->di;
+		regs->bp = pebs->bp;
+		regs->sp = pebs->sp;
 
-		regs.flags = pebs->flags;
+		regs->flags = pebs->flags;
 #ifndef CONFIG_X86_32
-		regs.r8 = pebs->r8;
-		regs.r9 = pebs->r9;
-		regs.r10 = pebs->r10;
-		regs.r11 = pebs->r11;
-		regs.r12 = pebs->r12;
-		regs.r13 = pebs->r13;
-		regs.r14 = pebs->r14;
-		regs.r15 = pebs->r15;
+		regs->r8 = pebs->r8;
+		regs->r9 = pebs->r9;
+		regs->r10 = pebs->r10;
+		regs->r11 = pebs->r11;
+		regs->r12 = pebs->r12;
+		regs->r13 = pebs->r13;
+		regs->r14 = pebs->r14;
+		regs->r15 = pebs->r15;
 #endif
 	}
 
 	if (event->attr.precise_ip > 1 && x86_pmu.intel_cap.pebs_format >= 2) {
-		regs.ip = pebs->real_ip;
-		regs.flags |= PERF_EFLAGS_EXACT;
-	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(&regs))
-		regs.flags |= PERF_EFLAGS_EXACT;
+		regs->ip = pebs->real_ip;
+		regs->flags |= PERF_EFLAGS_EXACT;
+	} else if (event->attr.precise_ip > 1 && intel_pmu_pebs_fixup_ip(regs))
+		regs->flags |= PERF_EFLAGS_EXACT;
 	else
-		regs.flags &= ~PERF_EFLAGS_EXACT;
+		regs->flags &= ~PERF_EFLAGS_EXACT;
 
 	if ((sample_type & PERF_SAMPLE_ADDR) &&
 	    x86_pmu.intel_cap.pebs_format >= 1)
-		data.addr = pebs->dla;
+		data->addr = pebs->dla;
 
 	if (x86_pmu.intel_cap.pebs_format >= 2) {
 		/* Only set the TSX weight when no memory weight. */
 		if ((sample_type & PERF_SAMPLE_WEIGHT) && !fll)
-			data.weight = intel_hsw_weight(pebs);
+			data->weight = intel_hsw_weight(pebs);
 
 		if (sample_type & PERF_SAMPLE_TRANSACTION)
-			data.txn = intel_hsw_transaction(pebs);
+			data->txn = intel_hsw_transaction(pebs);
 	}
 
 	if (has_branch_stack(event))
-		data.br_stack = &cpuc->lbr_stack;
+		data->br_stack = &cpuc->lbr_stack;
+}
+
+static void __intel_pmu_pebs_event(struct perf_event *event,
+				   struct pt_regs *iregs, void *__pebs)
+{
+	struct perf_sample_data data;
+	struct pt_regs regs;
+
+	if (!intel_pmu_save_and_restart(event))
+		return;
+
+	setup_pebs_sample_data(event, iregs, __pebs, &data, &regs);
 
 	if (perf_event_overflow(event, &data, &regs))
 		x86_pmu_stop(event, 0);

From 21509084f999d7accd32e45961ef76853112e978 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 May 2015 15:33:49 -0400
Subject: [PATCH 248/305] perf/x86/intel: Handle multiple records in the PEBS
 buffer

When the PEBS interrupt threshold is larger than one record and the
machine supports multiple PEBS events, the records of these events are
mixed up and we need to demultiplex them.

Demuxing the records is hard because the hardware is deficient. The
hardware has two issues that, when combined, create impossible
scenarios to demux.

The first issue is that the 'status' field of the PEBS record is a copy
of the GLOBAL_STATUS MSR at PEBS assist time. To see why this is a
problem let us first describe the regular PEBS cycle:

A) the CTRn value reaches 0:
  - the corresponding bit in GLOBAL_STATUS gets set
  - we start arming the hardware assist
  < some unspecified amount of time later -- this could cover multiple
    events of interest >

B) the hardware assist is armed, any next event will trigger it

C) a matching event happens:
  - the hardware assist triggers and generates a PEBS record
    this includes a copy of GLOBAL_STATUS at this moment
  - if we auto-reload we (re)set CTRn
  - we clear the relevant bit in GLOBAL_STATUS

Now consider the following chain of events:

  A0, B0, A1, C0

The event generated for counter 0 will include a status with counter 1
set, even though its not at all related to the record. A similar thing
can happen with a !PEBS event if it just happens to overflow at the
right moment.

The second issue is that the hardware will only emit one record for two
or more counters if the event that triggers the assist is 'close'. The
'close' can be several cycles. In some cases even the complete assist,
if the event is something that doesn't need retirement.

For instance, consider this chain of events:

  A0, B0, A1, B1, C01

Where C01 is an event that triggers both hardware assists, we will
generate but a single record, but again with both counters listed in the
status field.

This time the record pertains to both events.

Note that these two cases are different but undistinguishable with the
data as generated. Therefore demuxing records with multiple PEBS bits
(we can safely ignore status bits for !PEBS counters) is impossible.

Furthermore we cannot emit the record to both events because that might
cause a data leak -- the events might not have the same privileges -- so
what this patch does is discard such events.

The assumption/hope is that such discards will be rare.

Here lists some possible ways you may get high discard rate.

  - when you count the same thing multiple times. But it is not a useful
    configuration.
  - you can be unfortunate if you measure with a userspace only PEBS
    event along with either a kernel or unrestricted PEBS event. Imagine
    the event triggering and setting the overflow flag right before
    entering the kernel. Then all kernel side events will end up with
    multiple bits set.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
[ Changelog improvements. ]
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1430940834-8964-4-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>

Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 150 +++++++++++++++-------
 include/linux/perf_event.h                |  13 ++
 kernel/events/core.c                      |   6 +-
 kernel/events/internal.h                  |   9 --
 4 files changed, 120 insertions(+), 58 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index a5fe561c4902..72529c237e6e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -872,6 +872,9 @@ static void setup_pebs_sample_data(struct perf_event *event,
 	int fll, fst, dsrc;
 	int fl = event->hw.flags;
 
+	if (pebs == NULL)
+		return;
+
 	sample_type = event->attr.sample_type;
 	dsrc = sample_type & PERF_SAMPLE_DATA_SRC;
 
@@ -966,19 +969,68 @@ static void setup_pebs_sample_data(struct perf_event *event,
 		data->br_stack = &cpuc->lbr_stack;
 }
 
+static inline void *
+get_next_pebs_record_by_bit(void *base, void *top, int bit)
+{
+	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
+	void *at;
+	u64 pebs_status;
+
+	if (base == NULL)
+		return NULL;
+
+	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
+		struct pebs_record_nhm *p = at;
+
+		if (test_bit(bit, (unsigned long *)&p->status)) {
+
+			if (p->status == (1 << bit))
+				return at;
+
+			/* clear non-PEBS bit and re-check */
+			pebs_status = p->status & cpuc->pebs_enabled;
+			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+			if (pebs_status == (1 << bit))
+				return at;
+		}
+	}
+	return NULL;
+}
+
 static void __intel_pmu_pebs_event(struct perf_event *event,
-				   struct pt_regs *iregs, void *__pebs)
+				   struct pt_regs *iregs,
+				   void *base, void *top,
+				   int bit, int count)
 {
 	struct perf_sample_data data;
 	struct pt_regs regs;
+	int i;
+	void *at = get_next_pebs_record_by_bit(base, top, bit);
 
-	if (!intel_pmu_save_and_restart(event))
+	if (!intel_pmu_save_and_restart(event) &&
+	    !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
 		return;
 
-	setup_pebs_sample_data(event, iregs, __pebs, &data, &regs);
+	if (count > 1) {
+		for (i = 0; i < count - 1; i++) {
+			setup_pebs_sample_data(event, iregs, at, &data, &regs);
+			perf_event_output(event, &data, &regs);
+			at += x86_pmu.pebs_record_size;
+			at = get_next_pebs_record_by_bit(at, top, bit);
+		}
+	}
 
-	if (perf_event_overflow(event, &data, &regs))
+	setup_pebs_sample_data(event, iregs, at, &data, &regs);
+
+	/*
+	 * All but the last records are processed.
+	 * The last one is left to be able to call the overflow handler.
+	 */
+	if (perf_event_overflow(event, &data, &regs)) {
 		x86_pmu_stop(event, 0);
+		return;
+	}
+
 }
 
 static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
@@ -1008,72 +1060,78 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	if (!event->attr.precise_ip)
 		return;
 
-	n = top - at;
+	n = (top - at) / x86_pmu.pebs_record_size;
 	if (n <= 0)
 		return;
 
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
-	at += n - 1;
-
-	__intel_pmu_pebs_event(event, iregs, at);
+	__intel_pmu_pebs_event(event, iregs, at, top, 0, n);
 }
 
 static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
-	struct perf_event *event = NULL;
-	void *at, *top;
-	u64 status = 0;
+	struct perf_event *event;
+	void *base, *at, *top;
 	int bit;
+	short counts[MAX_PEBS_EVENTS] = {};
 
 	if (!x86_pmu.pebs_active)
 		return;
 
-	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
 	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	if (unlikely(at > top))
+	if (unlikely(base >= top))
 		return;
 
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
-		  "Unexpected number of pebs records %ld\n",
-		  (long)(top - at) / x86_pmu.pebs_record_size);
-
-	for (; at < top; at += x86_pmu.pebs_record_size) {
+	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;
 
-		for_each_set_bit(bit, (unsigned long *)&p->status,
-				 x86_pmu.max_pebs_events) {
-			event = cpuc->events[bit];
-			if (!test_bit(bit, cpuc->active_mask))
-				continue;
-
-			WARN_ON_ONCE(!event);
-
-			if (!event->attr.precise_ip)
-				continue;
-
-			if (__test_and_set_bit(bit, (unsigned long *)&status))
-				continue;
-
-			break;
-		}
-
-		if (!event || bit >= x86_pmu.max_pebs_events)
+		bit = find_first_bit((unsigned long *)&p->status,
+					x86_pmu.max_pebs_events);
+		if (bit >= x86_pmu.max_pebs_events)
 			continue;
+		if (!test_bit(bit, cpuc->active_mask))
+			continue;
+		/*
+		 * The PEBS hardware does not deal well with the situation
+		 * when events happen near to each other and multiple bits
+		 * are set. But it should happen rarely.
+		 *
+		 * If these events include one PEBS and multiple non-PEBS
+		 * events, it doesn't impact PEBS record. The record will
+		 * be handled normally. (slow path)
+		 *
+		 * If these events include two or more PEBS events, the
+		 * records for the events can be collapsed into a single
+		 * one, and it's not possible to reconstruct all events
+		 * that caused the PEBS record. It's called collision.
+		 * If collision happened, the record will be dropped.
+		 *
+		 */
+		if (p->status != (1 << bit)) {
+			u64 pebs_status;
 
-		__intel_pmu_pebs_event(event, iregs, at);
+			/* slow path */
+			pebs_status = p->status & cpuc->pebs_enabled;
+			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
+			if (pebs_status != (1 << bit))
+				continue;
+		}
+		counts[bit]++;
+	}
+
+	for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
+		if (counts[bit] == 0)
+			continue;
+		event = cpuc->events[bit];
+		WARN_ON_ONCE(!event);
+		WARN_ON_ONCE(!event->attr.precise_ip);
+
+		__intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]);
 	}
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 06580028cee6..5f192e1bc98e 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -730,6 +730,19 @@ extern int perf_event_overflow(struct perf_event *event,
 				 struct perf_sample_data *data,
 				 struct pt_regs *regs);
 
+extern void perf_event_output(struct perf_event *event,
+				struct perf_sample_data *data,
+				struct pt_regs *regs);
+
+extern void
+perf_event_header__init_id(struct perf_event_header *header,
+			   struct perf_sample_data *data,
+			   struct perf_event *event);
+extern void
+perf_event__output_id_sample(struct perf_event *event,
+			     struct perf_output_handle *handle,
+			     struct perf_sample_data *sample);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index eddf1ed4155e..e499b4e43aff 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5381,9 +5381,9 @@ void perf_prepare_sample(struct perf_event_header *header,
 	}
 }
 
-static void perf_event_output(struct perf_event *event,
-				struct perf_sample_data *data,
-				struct pt_regs *regs)
+void perf_event_output(struct perf_event *event,
+			struct perf_sample_data *data,
+			struct pt_regs *regs)
 {
 	struct perf_output_handle handle;
 	struct perf_event_header header;
diff --git a/kernel/events/internal.h b/kernel/events/internal.h
index 9f6ce9ba4a04..2deb24c7a40d 100644
--- a/kernel/events/internal.h
+++ b/kernel/events/internal.h
@@ -72,15 +72,6 @@ static inline bool rb_has_aux(struct ring_buffer *rb)
 void perf_event_aux_event(struct perf_event *event, unsigned long head,
 			  unsigned long size, u64 flags);
 
-extern void
-perf_event_header__init_id(struct perf_event_header *header,
-			   struct perf_sample_data *data,
-			   struct perf_event *event);
-extern void
-perf_event__output_id_sample(struct perf_event *event,
-			     struct perf_output_handle *handle,
-			     struct perf_sample_data *sample);
-
 extern struct page *
 perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff);
 

From 3569c0d7c5440d6fd06b10e1ef9614588a049bc7 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 May 2015 15:33:50 -0400
Subject: [PATCH 249/305] perf/x86/intel: Implement batched PEBS interrupt
 handling (large PEBS interrupt threshold)

PEBS always had the capability to log samples to its buffers without
an interrupt. Traditionally perf has not used this but always set the
PEBS threshold to one.

For frequently occurring events (like cycles or branches or load/store)
this in term requires using a relatively high sampling period to avoid
overloading the system, by only processing PMIs. This in term increases
sampling error.

For the common cases we still need to use the PMI because the PEBS
hardware has various limitations. The biggest one is that it can not
supply a callgraph. It also requires setting a fixed period, as the
hardware does not support adaptive period. Another issue is that it
cannot supply a time stamp and some other options. To supply a TID it
requires flushing on context switch. It can however supply the IP, the
load/store address, TSX information, registers, and some other things.

So we can make PEBS work for some specific cases, basically as long as
you can do without a callgraph and can set the period you can use this
new PEBS mode.

The main benefit is the ability to support much lower sampling period
(down to -c 1000) without extensive overhead.

One use cases is for example to increase the resolution of the c2c tool.
Another is double checking when you suspect the standard sampling has
too much sampling error.

Some numbers on the overhead, using cycle soak, comparing the elapsed
time from "kernbench -M -H" between plain (threshold set to one) and
multi (large threshold).

The test command for plain:
  "perf record --time -e cycles:p -c $period -- kernbench -M -H"

The test command for multi:
  "perf record --no-time -e cycles:p -c $period -- kernbench -M -H"

( The only difference of test command between multi and plain is time
  stamp options. Since time stamp is not supported by large PEBS
  threshold, it can be used as a flag to indicate if large threshold is
  enabled during the test. )

	period    plain(Sec)  multi(Sec)  Delta
	10003     32.7        16.5        16.2
	20003     30.2        16.2        14.0
	40003     18.6        14.1        4.5
	80003     16.8        14.6        2.2
	100003    16.9        14.1        2.8
	800003    15.4        15.7        -0.3
	1000003   15.3        15.2        0.2
	2000003   15.3        15.1        0.1

With periods below 100003, plain (threshold one) cause much more
overhead. With 10003 sampling period, the Elapsed Time for multi is
even 2X faster than plain.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1430940834-8964-5-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h          | 11 +++++++++
 arch/x86/kernel/cpu/perf_event_intel.c    |  5 ++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 27 +++++++++++++++++++----
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index 7a3f0fdd2fbd..a73dfc97226b 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -76,6 +76,7 @@ struct event_constraint {
 #define PERF_X86_EVENT_RDPMC_ALLOWED	0x0100 /* grant rdpmc permission */
 #define PERF_X86_EVENT_EXCL_ACCT	0x0200 /* accounted EXCL event */
 #define PERF_X86_EVENT_AUTO_RELOAD	0x0400 /* use PEBS auto-reload */
+#define PERF_X86_EVENT_FREERUNNING	0x0800 /* use freerunning PEBS */
 
 
 struct amd_nb {
@@ -88,6 +89,16 @@ struct amd_nb {
 /* The maximal number of PEBS events: */
 #define MAX_PEBS_EVENTS		8
 
+/*
+ * Flags PEBS can handle without an PMI.
+ *
+ */
+#define PEBS_FREERUNNING_FLAGS \
+	(PERF_SAMPLE_IP | PERF_SAMPLE_ADDR | \
+	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
+	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
+	PERF_SAMPLE_TRANSACTION)
+
 /*
  * A debug store configuration.
  *
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 17628930a80e..6985f43c5eb9 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2261,8 +2261,11 @@ static int intel_pmu_hw_config(struct perf_event *event)
 		return ret;
 
 	if (event->attr.precise_ip) {
-		if (!event->attr.freq)
+		if (!event->attr.freq) {
 			event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
+			if (!(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
+				event->hw.flags |= PERF_X86_EVENT_FREERUNNING;
+		}
 		if (x86_pmu.pebs_aliases)
 			x86_pmu.pebs_aliases(event);
 	}
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 72529c237e6e..0ce455d958b8 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -250,7 +250,7 @@ static int alloc_pebs_buffer(int cpu)
 {
 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
 	int node = cpu_to_node(cpu);
-	int max, thresh = 1; /* always use a single PEBS record */
+	int max;
 	void *buffer, *ibuffer;
 
 	if (!x86_pmu.pebs)
@@ -280,9 +280,6 @@ static int alloc_pebs_buffer(int cpu)
 	ds->pebs_absolute_maximum = ds->pebs_buffer_base +
 		max * x86_pmu.pebs_record_size;
 
-	ds->pebs_interrupt_threshold = ds->pebs_buffer_base +
-		thresh * x86_pmu.pebs_record_size;
-
 	return 0;
 }
 
@@ -684,14 +681,22 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
 	return &emptyconstraint;
 }
 
+static inline bool pebs_is_enabled(struct cpu_hw_events *cpuc)
+{
+	return (cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
+}
+
 void intel_pmu_pebs_enable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
 	struct debug_store *ds = cpuc->ds;
+	bool first_pebs;
+	u64 threshold;
 
 	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
 
+	first_pebs = !pebs_is_enabled(cpuc);
 	cpuc->pebs_enabled |= 1ULL << hwc->idx;
 
 	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
@@ -699,11 +704,25 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled |= 1ULL << 63;
 
+	/*
+	 * When the event is constrained enough we can use a larger
+	 * threshold and run the event with less frequent PMI.
+	 */
+	if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
+		threshold = ds->pebs_absolute_maximum -
+			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+	} else {
+		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+	}
+
 	/* Use auto-reload if possible to save a MSR write in the PMI */
 	if (hwc->flags & PERF_X86_EVENT_AUTO_RELOAD) {
 		ds->pebs_event_reset[hwc->idx] =
 			(u64)(-hwc->sample_period) & x86_pmu.cntval_mask;
 	}
+
+	if (first_pebs || ds->pebs_interrupt_threshold > threshold)
+		ds->pebs_interrupt_threshold = threshold;
 }
 
 void intel_pmu_pebs_disable(struct perf_event *event)

From 9c964efa4330a58520783effe9847f15126fef1f Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 May 2015 15:33:51 -0400
Subject: [PATCH 250/305] perf/x86/intel: Drain the PEBS buffer during context
 switches

Flush the PEBS buffer during context switches if PEBS interrupt threshold
is larger than one. This allows perf to supply TID for sample outputs.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1430940834-8964-6-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event.h           |  6 +++-
 arch/x86/kernel/cpu/perf_event_intel.c     | 11 +++++++-
 arch/x86/kernel/cpu/perf_event_intel_ds.c  | 32 ++++++++++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel_lbr.c |  3 --
 4 files changed, 47 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index a73dfc97226b..74089bcb6d74 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -92,9 +92,11 @@ struct amd_nb {
 /*
  * Flags PEBS can handle without an PMI.
  *
+ * TID can only be handled by flushing at context switch.
+ *
  */
 #define PEBS_FREERUNNING_FLAGS \
-	(PERF_SAMPLE_IP | PERF_SAMPLE_ADDR | \
+	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
 	PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
 	PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
 	PERF_SAMPLE_TRANSACTION)
@@ -877,6 +879,8 @@ void intel_pmu_pebs_enable_all(void);
 
 void intel_pmu_pebs_disable_all(void);
 
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in);
+
 void intel_ds_init(void);
 
 void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in);
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 6985f43c5eb9..d455e2a61287 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2642,6 +2642,15 @@ static void intel_pmu_cpu_dying(int cpu)
 	fini_debug_store_on_cpu(cpu);
 }
 
+static void intel_pmu_sched_task(struct perf_event_context *ctx,
+				 bool sched_in)
+{
+	if (x86_pmu.pebs_active)
+		intel_pmu_pebs_sched_task(ctx, sched_in);
+	if (x86_pmu.lbr_nr)
+		intel_pmu_lbr_sched_task(ctx, sched_in);
+}
+
 PMU_FORMAT_ATTR(offcore_rsp, "config1:0-63");
 
 PMU_FORMAT_ATTR(ldlat, "config1:0-15");
@@ -2731,7 +2740,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
 	.guest_get_msrs		= intel_guest_get_msrs,
-	.sched_task		= intel_pmu_lbr_sched_task,
+	.sched_task		= intel_pmu_sched_task,
 };
 
 static __init void intel_clovertown_quirk(void)
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 0ce455d958b8..62852470ccbb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -546,6 +546,19 @@ int intel_pmu_drain_bts_buffer(void)
 	return 1;
 }
 
+static inline void intel_pmu_drain_pebs_buffer(void)
+{
+	struct pt_regs regs;
+
+	x86_pmu.drain_pebs(&regs);
+}
+
+void intel_pmu_pebs_sched_task(struct perf_event_context *ctx, bool sched_in)
+{
+	if (!sched_in)
+		intel_pmu_drain_pebs_buffer();
+}
+
 /*
  * PEBS
  */
@@ -711,8 +724,19 @@ void intel_pmu_pebs_enable(struct perf_event *event)
 	if (hwc->flags & PERF_X86_EVENT_FREERUNNING) {
 		threshold = ds->pebs_absolute_maximum -
 			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
+
+		if (first_pebs)
+			perf_sched_cb_inc(event->ctx->pmu);
 	} else {
 		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
+
+		/*
+		 * If not all events can use larger buffer,
+		 * roll back to threshold = 1
+		 */
+		if (!first_pebs &&
+		    (ds->pebs_interrupt_threshold > threshold))
+			perf_sched_cb_dec(event->ctx->pmu);
 	}
 
 	/* Use auto-reload if possible to save a MSR write in the PMI */
@@ -729,6 +753,7 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 {
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct hw_perf_event *hwc = &event->hw;
+	struct debug_store *ds = cpuc->ds;
 
 	cpuc->pebs_enabled &= ~(1ULL << hwc->idx);
 
@@ -737,6 +762,13 @@ void intel_pmu_pebs_disable(struct perf_event *event)
 	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
 		cpuc->pebs_enabled &= ~(1ULL << 63);
 
+	if (ds->pebs_interrupt_threshold >
+	    ds->pebs_buffer_base + x86_pmu.pebs_record_size) {
+		intel_pmu_drain_pebs_buffer();
+		if (!pebs_is_enabled(cpuc))
+			perf_sched_cb_dec(event->ctx->pmu);
+	}
+
 	if (cpuc->enabled)
 		wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
index 201e16f6655a..452a7bd2dedb 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
@@ -264,9 +264,6 @@ void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
 	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
 	struct x86_perf_task_context *task_ctx;
 
-	if (!x86_pmu.lbr_nr)
-		return;
-
 	/*
 	 * If LBR callstack feature is enabled and the stack was saved when
 	 * the task was scheduled out, restore the stack. Otherwise flush

From 156174999dd1d0fe8732f5a05f4e9cef921ad487 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zheng.z.yan@intel.com>
Date: Wed, 6 May 2015 15:33:52 -0400
Subject: [PATCH 251/305] perf/intel/x86: Enlarge the PEBS buffer

Currently the PEBS buffer size is 4k, it can only hold about 21
PEBS records. This patch enlarges the PEBS buffer size to 64k
(the same as the BTS buffer).

64k memory can hold about 330 PEBS records. This will significantly
reduce the number of PMIs when batched PEBS interrupts are enabled.

Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1430940834-8964-7-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 62852470ccbb..266079a3a646 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -11,7 +11,7 @@
 #define BTS_RECORD_SIZE		24
 
 #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
-#define PEBS_BUFFER_SIZE	PAGE_SIZE
+#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)
 #define PEBS_FIXUP_SIZE		PAGE_SIZE
 
 /*

From f38b0dbb491a6987e198aa6b428db8692a6480f8 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Sun, 10 May 2015 15:13:14 -0400
Subject: [PATCH 252/305] perf/x86/intel: Introduce PERF_RECORD_LOST_SAMPLES

After enlarging the PEBS interrupt threshold, there may be some mixed up
PEBS samples which are discarded by the kernel.

This patch makes the kernel emit a PERF_RECORD_LOST_SAMPLES record with
the number of possible discarded records when it is impossible to demux
the samples.

It makes sure the user is not left in the dark about such discards.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1431285195-14269-8-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 20 +++++++++++---
 include/linux/perf_event.h                |  3 +++
 include/uapi/linux/perf_event.h           | 12 +++++++++
 kernel/events/core.c                      | 33 +++++++++++++++++++++++
 4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 266079a3a646..34d0c4816141 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1126,6 +1126,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	void *base, *at, *top;
 	int bit;
 	short counts[MAX_PEBS_EVENTS] = {};
+	short error[MAX_PEBS_EVENTS] = {};
 
 	if (!x86_pmu.pebs_active)
 		return;
@@ -1169,20 +1170,33 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			/* slow path */
 			pebs_status = p->status & cpuc->pebs_enabled;
 			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
-			if (pebs_status != (1 << bit))
+			if (pebs_status != (1 << bit)) {
+				u8 i;
+
+				for_each_set_bit(i, (unsigned long *)&pebs_status,
+						 MAX_PEBS_EVENTS)
+					error[i]++;
 				continue;
+			}
 		}
 		counts[bit]++;
 	}
 
 	for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
-		if (counts[bit] == 0)
+		if ((counts[bit] == 0) && (error[bit] == 0))
 			continue;
 		event = cpuc->events[bit];
 		WARN_ON_ONCE(!event);
 		WARN_ON_ONCE(!event->attr.precise_ip);
 
-		__intel_pmu_pebs_event(event, iregs, base, top, bit, counts[bit]);
+		/* log dropped samples number */
+		if (error[bit])
+			perf_log_lost_samples(event, error[bit]);
+
+		if (counts[bit]) {
+			__intel_pmu_pebs_event(event, iregs, base,
+					       top, bit, counts[bit]);
+		}
 	}
 }
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 5f192e1bc98e..a204d5266f5f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -743,6 +743,9 @@ perf_event__output_id_sample(struct perf_event *event,
 			     struct perf_output_handle *handle,
 			     struct perf_sample_data *sample);
 
+extern void
+perf_log_lost_samples(struct perf_event *event, u64 lost);
+
 static inline bool is_sampling_event(struct perf_event *event)
 {
 	return event->attr.sample_period != 0;
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index c4622f1ce046..613ed9ad588f 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -802,6 +802,18 @@ enum perf_event_type {
 	 */
 	PERF_RECORD_ITRACE_START		= 12,
 
+	/*
+	 * Records the dropped/lost sample number.
+	 *
+	 * struct {
+	 *	struct perf_event_header	header;
+	 *
+	 *	u64				lost;
+	 *	struct sample_id		sample_id;
+	 * };
+	 */
+	PERF_RECORD_LOST_SAMPLES		= 13,
+
 	PERF_RECORD_MAX,			/* non-ABI */
 };
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index e499b4e43aff..9e0773d5d110 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -5974,6 +5974,39 @@ void perf_event_aux_event(struct perf_event *event, unsigned long head,
 	perf_output_end(&handle);
 }
 
+/*
+ * Lost/dropped samples logging
+ */
+void perf_log_lost_samples(struct perf_event *event, u64 lost)
+{
+	struct perf_output_handle handle;
+	struct perf_sample_data sample;
+	int ret;
+
+	struct {
+		struct perf_event_header	header;
+		u64				lost;
+	} lost_samples_event = {
+		.header = {
+			.type = PERF_RECORD_LOST_SAMPLES,
+			.misc = 0,
+			.size = sizeof(lost_samples_event),
+		},
+		.lost		= lost,
+	};
+
+	perf_event_header__init_id(&lost_samples_event.header, &sample, event);
+
+	ret = perf_output_begin(&handle, event,
+				lost_samples_event.header.size);
+	if (ret)
+		return;
+
+	perf_output_put(&handle, lost_samples_event);
+	perf_event__output_id_sample(event, &handle, &sample);
+	perf_output_end(&handle);
+}
+
 /*
  * IRQ throttle logging
  */

From c4937a91ea56b546234b0608a413ebad90536d26 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Sun, 10 May 2015 15:13:15 -0400
Subject: [PATCH 253/305] perf tools: handle PERF_RECORD_LOST_SAMPLES

This patch modifies the perf tool to handle the new RECORD type,
PERF_RECORD_LOST_SAMPLES.

The number of lost-sample events is stored in
.nr_events[PERF_RECORD_LOST_SAMPLES]. The exact number of samples
which the kernel dropped is stored in total_lost_samples.

When the percentage of dropped samples is greater than 5%, a warning
is printed.

Here are some examples:

Eg 1, Recording different frequently-occurring events is safe with the
      patch. Only a very low drop rate is associated with such actions.

$ perf record -e '{cycles:p,instructions:p}' -c 20003 --no-time ~/tchain ~/tchain

$ perf report -D | tail
          SAMPLE events:     120243
           MMAP2 events:          5
    LOST_SAMPLES events:         24
  FINISHED_ROUND events:         15
cycles:p stats:
           TOTAL events:      59348
          SAMPLE events:      59348
instructions:p stats:
           TOTAL events:      60895
          SAMPLE events:      60895

$ perf report --stdio --group
 # To display the perf.data header info, please use --header/--header-only options.
 #
 #
 # Total Lost Samples: 24
 #
 # Samples: 120K of event 'anon group { cycles:p, instructions:p }'
 # Event count (approx.): 24048600000
 #
 #         Overhead  Command      Shared Object     Symbol
 # ................  ...........  ................
 ..................................
 #
    99.74%  99.86%  tchain_edit  tchain_edit       [.] f3
     0.09%   0.02%  tchain_edit  tchain_edit       [.] f2
     0.04%   0.00%  tchain_edit  [kernel.vmlinux]  [k] ixgbe_read_reg

Eg 2, Recording the same thing multiple times can lead to high drop
      rate, but it is not a useful configuration.

$ perf record -e '{cycles:p,cycles:p}' -c 20003 --no-time ~/tchain
Warning: Processed 600592 samples and lost 99.73% samples!
[perf record: Woken up 148 times to write data]
[perf record: Captured and wrote 36.922 MB perf.data (1206322 samples)]
[perf record: Woken up 1 times to write data]
[perf record: Captured and wrote 0.121 MB perf.data (1629 samples)]

Signed-off-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: acme@infradead.org
Cc: eranian@google.com
Link: http://lkml.kernel.org/r/1431285195-14269-9-git-send-email-kan.liang@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 tools/perf/builtin-report.c |  1 +
 tools/perf/util/event.c     |  9 +++++++++
 tools/perf/util/event.h     | 17 +++++++++++++++++
 tools/perf/util/machine.c   | 10 ++++++++++
 tools/perf/util/machine.h   |  2 ++
 tools/perf/util/session.c   | 19 +++++++++++++++++++
 tools/perf/util/tool.h      |  1 +
 7 files changed, 59 insertions(+)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 56025d90622f..628090b478ab 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -320,6 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 {
 	struct perf_evsel *pos;
 
+	fprintf(stdout, "#\n# Total Lost Samples: %lu\n#\n", evlist->stats.total_lost_samples);
 	evlist__for_each(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		const char *evname = perf_evsel__name(pos);
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index c1925968a8af..793b1503d437 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -25,6 +25,7 @@ static const char *perf_event__names[] = {
 	[PERF_RECORD_SAMPLE]			= "SAMPLE",
 	[PERF_RECORD_AUX]			= "AUX",
 	[PERF_RECORD_ITRACE_START]		= "ITRACE_START",
+	[PERF_RECORD_LOST_SAMPLES]		= "LOST_SAMPLES",
 	[PERF_RECORD_HEADER_ATTR]		= "ATTR",
 	[PERF_RECORD_HEADER_EVENT_TYPE]		= "EVENT_TYPE",
 	[PERF_RECORD_HEADER_TRACING_DATA]	= "TRACING_DATA",
@@ -712,6 +713,14 @@ int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused,
 	return machine__process_itrace_start_event(machine, event);
 }
 
+int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine)
+{
+	return machine__process_lost_samples_event(machine, event, sample);
+}
+
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp)
 {
 	return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n",
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 97179abc80a1..5dc51ada05df 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -52,6 +52,11 @@ struct lost_event {
 	u64 lost;
 };
 
+struct lost_samples_event {
+	struct perf_event_header header;
+	u64 lost;
+};
+
 /*
  * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID
  */
@@ -235,6 +240,12 @@ enum auxtrace_error_type {
  * total_lost tells exactly how many events the kernel in fact lost, i.e. it is
  * the sum of all struct lost_event.lost fields reported.
  *
+ * The kernel discards mixed up samples and sends the number in a
+ * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored
+ * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells
+ * exactly how many samples the kernel in fact dropped, i.e. it is the sum of
+ * all struct lost_samples_event.lost fields reported.
+ *
  * The total_period is needed because by default auto-freq is used, so
  * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get
  * the total number of low level events, it is necessary to to sum all struct
@@ -244,6 +255,7 @@ struct events_stats {
 	u64 total_period;
 	u64 total_non_filtered_period;
 	u64 total_lost;
+	u64 total_lost_samples;
 	u64 total_invalid_chains;
 	u32 nr_events[PERF_RECORD_HEADER_MAX];
 	u32 nr_non_filtered_samples;
@@ -342,6 +354,7 @@ union perf_event {
 	struct comm_event		comm;
 	struct fork_event		fork;
 	struct lost_event		lost;
+	struct lost_samples_event	lost_samples;
 	struct read_event		read;
 	struct throttle_event		throttle;
 	struct sample_event		sample;
@@ -390,6 +403,10 @@ int perf_event__process_lost(struct perf_tool *tool,
 			     union perf_event *event,
 			     struct perf_sample *sample,
 			     struct machine *machine);
+int perf_event__process_lost_samples(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct machine *machine);
 int perf_event__process_aux(struct perf_tool *tool,
 			    union perf_event *event,
 			    struct perf_sample *sample,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 9e02c86f39f5..f15ed24a22ac 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -482,6 +482,14 @@ int machine__process_lost_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
+int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
+					union perf_event *event, struct perf_sample *sample)
+{
+	dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n",
+		    sample->id, event->lost_samples.lost);
+	return 0;
+}
+
 static struct dso*
 machine__module_dso(struct machine *machine, struct kmod_path *m,
 		    const char *filename)
@@ -1419,6 +1427,8 @@ int machine__process_event(struct machine *machine, union perf_event *event,
 		ret = machine__process_aux_event(machine, event); break;
 	case PERF_RECORD_ITRACE_START:
 		ret = machine__process_itrace_start_event(machine, event);
+	case PERF_RECORD_LOST_SAMPLES:
+		ret = machine__process_lost_samples_event(machine, event, sample); break;
 		break;
 	default:
 		ret = -1;
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 39a0ca06cbd8..8e1f796fd137 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -81,6 +81,8 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
 				struct perf_sample *sample);
 int machine__process_lost_event(struct machine *machine, union perf_event *event,
 				struct perf_sample *sample);
+int machine__process_lost_samples_event(struct machine *machine, union perf_event *event,
+					struct perf_sample *sample);
 int machine__process_aux_event(struct machine *machine,
 			       union perf_event *event);
 int machine__process_itrace_start_event(struct machine *machine,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 39fe09d5a87e..88d87bf3049f 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -325,6 +325,8 @@ void perf_tool__fill_defaults(struct perf_tool *tool)
 		tool->exit = process_event_stub;
 	if (tool->lost == NULL)
 		tool->lost = perf_event__process_lost;
+	if (tool->lost_samples == NULL)
+		tool->lost_samples = perf_event__process_lost_samples;
 	if (tool->aux == NULL)
 		tool->aux = perf_event__process_aux;
 	if (tool->itrace_start == NULL)
@@ -606,6 +608,7 @@ static perf_event__swap_op perf_event__swap_ops[] = {
 	[PERF_RECORD_SAMPLE]		  = perf_event__all64_swap,
 	[PERF_RECORD_AUX]		  = perf_event__aux_swap,
 	[PERF_RECORD_ITRACE_START]	  = perf_event__itrace_start_swap,
+	[PERF_RECORD_LOST_SAMPLES]	  = perf_event__all64_swap,
 	[PERF_RECORD_HEADER_ATTR]	  = perf_event__hdr_attr_swap,
 	[PERF_RECORD_HEADER_EVENT_TYPE]	  = perf_event__event_type_swap,
 	[PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap,
@@ -1049,6 +1052,10 @@ static int machines__deliver_event(struct machines *machines,
 		if (tool->lost == perf_event__process_lost)
 			evlist->stats.total_lost += event->lost.lost;
 		return tool->lost(tool, event, sample, machine);
+	case PERF_RECORD_LOST_SAMPLES:
+		if (tool->lost_samples == perf_event__process_lost_samples)
+			evlist->stats.total_lost_samples += event->lost_samples.lost;
+		return tool->lost_samples(tool, event, sample, machine);
 	case PERF_RECORD_READ:
 		return tool->read(tool, event, sample, evsel, machine);
 	case PERF_RECORD_THROTTLE:
@@ -1286,6 +1293,18 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 			    stats->nr_events[PERF_RECORD_LOST]);
 	}
 
+	if (session->tool->lost_samples == perf_event__process_lost_samples) {
+		double drop_rate;
+
+		drop_rate = (double)stats->total_lost_samples /
+			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
+		if (drop_rate > 0.05) {
+			ui__warning("Processed %lu samples and lost %3.2f%% samples!\n\n",
+				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
+				    drop_rate * 100.0);
+		}
+	}
+
 	if (stats->nr_unknown_events != 0) {
 		ui__warning("Found %u unknown events!\n\n"
 			    "Is this an older tool processing a perf.data "
diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h
index 7f282ad1d2bd..c307dd438286 100644
--- a/tools/perf/util/tool.h
+++ b/tools/perf/util/tool.h
@@ -43,6 +43,7 @@ struct perf_tool {
 			fork,
 			exit,
 			lost,
+			lost_samples,
 			aux,
 			itrace_start,
 			throttle,

From a3d86542de8850be52e8589da22b24002941dfb7 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 12 May 2015 15:18:18 +0200
Subject: [PATCH 254/305] perf/x86/intel/pebs: Add PEBSv3 decoding

PEBSv3 as present on Skylake fixed the long standing issue of the
status bits. They now really reflect the events that generated the
record.

Tested-by: Andi Kleen <ak@linux.intel.com>
Tested-by: Kan Liang <kan.liang@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 30 ++++++++++++++---------
 1 file changed, 19 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 34d0c4816141..71fc40238843 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -1034,6 +1034,9 @@ get_next_pebs_record_by_bit(void *base, void *top, int bit)
 		struct pebs_record_nhm *p = at;
 
 		if (test_bit(bit, (unsigned long *)&p->status)) {
+			/* PEBS v3 has accurate status bits */
+			if (x86_pmu.intel_cap.pebs_format >= 3)
+				return at;
 
 			if (p->status == (1 << bit))
 				return at;
@@ -1055,20 +1058,18 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 {
 	struct perf_sample_data data;
 	struct pt_regs regs;
-	int i;
 	void *at = get_next_pebs_record_by_bit(base, top, bit);
 
 	if (!intel_pmu_save_and_restart(event) &&
 	    !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
 		return;
 
-	if (count > 1) {
-		for (i = 0; i < count - 1; i++) {
-			setup_pebs_sample_data(event, iregs, at, &data, &regs);
-			perf_event_output(event, &data, &regs);
-			at += x86_pmu.pebs_record_size;
-			at = get_next_pebs_record_by_bit(at, top, bit);
-		}
+	while (count > 1) {
+		setup_pebs_sample_data(event, iregs, at, &data, &regs);
+		perf_event_output(event, &data, &regs);
+		at += x86_pmu.pebs_record_size;
+		at = get_next_pebs_record_by_bit(at, top, bit);
+		count--;
 	}
 
 	setup_pebs_sample_data(event, iregs, at, &data, &regs);
@@ -1124,9 +1125,9 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	struct debug_store *ds = cpuc->ds;
 	struct perf_event *event;
 	void *base, *at, *top;
-	int bit;
 	short counts[MAX_PEBS_EVENTS] = {};
 	short error[MAX_PEBS_EVENTS] = {};
+	int bit, i;
 
 	if (!x86_pmu.pebs_active)
 		return;
@@ -1142,6 +1143,15 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;
 
+		/* PEBS v3 has accurate status bits */
+		if (x86_pmu.intel_cap.pebs_format >= 3) {
+			for_each_set_bit(bit, (unsigned long *)&p->status,
+					 MAX_PEBS_EVENTS)
+				counts[bit]++;
+
+			continue;
+		}
+
 		bit = find_first_bit((unsigned long *)&p->status,
 					x86_pmu.max_pebs_events);
 		if (bit >= x86_pmu.max_pebs_events)
@@ -1171,8 +1181,6 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 			pebs_status = p->status & cpuc->pebs_enabled;
 			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
 			if (pebs_status != (1 << bit)) {
-				u8 i;
-
 				for_each_set_bit(i, (unsigned long *)&pebs_status,
 						 MAX_PEBS_EVENTS)
 					error[i]++;

From e2f56da1d6670070f6f55d43007cb7b03ee04c2f Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@redhat.com>
Date: Thu, 4 Jun 2015 15:50:55 +0200
Subject: [PATCH 255/305] perf stat: Add id into perf_stat struct

We need fast way to identify evsel as transaction event for shadow
counters computation. Currently we are using possition (in evlist) based
way.

Adding 'id' into 'struct perf_stat' so it can carry transaction event ID
and we can use it for shadow counters computations.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/20150604135055.GB23625@krava.redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c |  6 ++----
 tools/perf/util/stat.c    | 31 ++++++++++++++++++++++++++++++-
 tools/perf/util/stat.h    | 20 ++++++++++++++++++++
 3 files changed, 52 insertions(+), 5 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index fd577f725d23..a6ae1007f1f9 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -147,10 +147,6 @@ static int			(*aggr_get_id)(struct cpu_map *m, int cpu);
 
 static volatile int done = 0;
 
-struct perf_stat {
-	struct stats	  res_stats[3];
-};
-
 static inline void diff_timespec(struct timespec *r, struct timespec *a,
 				 struct timespec *b)
 {
@@ -180,6 +176,8 @@ static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel)
 
 	for (i = 0; i < 3; i++)
 		init_stats(&ps->res_stats[i]);
+
+	perf_stat_evsel_id_init(evsel);
 }
 
 static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 6506b3dfb605..8e9f6bb7581b 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -1,6 +1,6 @@
 #include <math.h>
-
 #include "stat.h"
+#include "evsel.h"
 
 void update_stats(struct stats *stats, u64 val)
 {
@@ -61,3 +61,32 @@ double rel_stddev_stats(double stddev, double avg)
 
 	return pct;
 }
+
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+			   enum perf_stat_evsel_id id)
+{
+	struct perf_stat *ps = evsel->priv;
+
+	return ps->id == id;
+}
+
+#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
+static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
+	ID(NONE, x),
+};
+#undef ID
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel)
+{
+	struct perf_stat *ps = evsel->priv;
+	int i;
+
+	/* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */
+
+	for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) {
+		if (!strcmp(perf_evsel__name(evsel), id_str[i])) {
+			ps->id = i;
+			break;
+		}
+	}
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 5667fc3e39cf..f4136cfd3cc9 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -9,6 +9,16 @@ struct stats
 	u64 max, min;
 };
 
+enum perf_stat_evsel_id {
+	PERF_STAT_EVSEL_ID__NONE = 0,
+	PERF_STAT_EVSEL_ID__MAX,
+};
+
+struct perf_stat {
+	struct stats		res_stats[3];
+	enum perf_stat_evsel_id	id;
+};
+
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
@@ -22,4 +32,14 @@ static inline void init_stats(struct stats *stats)
 	stats->min  = (u64) -1;
 	stats->max  = 0;
 }
+
+struct perf_evsel;
+bool __perf_evsel_stat__is(struct perf_evsel *evsel,
+			   enum perf_stat_evsel_id id);
+
+#define perf_stat_evsel__is(evsel, id) \
+	__perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id)
+
+void perf_stat_evsel_id_init(struct perf_evsel *evsel);
+
 #endif

From 4c358d5cf36192f22b8d331779cb92e3ede9cddf Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:52 +0200
Subject: [PATCH 256/305] perf stat: Replace transaction event possition check
 with id check

Using perf_stat::id to check for transaction events, instead of current
position based way.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 55 +++++----------------------------------
 tools/perf/util/stat.c    |  6 ++++-
 tools/perf/util/stat.h    |  4 +++
 3 files changed, 16 insertions(+), 49 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index a6ae1007f1f9..514493d703da 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -96,17 +96,6 @@ static const char * const transaction_limited_attrs[] = {
 	"}"
 };
 
-/* must match transaction_attrs and the beginning limited_attrs */
-enum {
-	T_TASK_CLOCK,
-	T_INSTRUCTIONS,
-	T_CYCLES,
-	T_CYCLES_IN_TX,
-	T_TRANSACTION_START,
-	T_ELISION_START,
-	T_CYCLES_IN_TX_CP,
-};
-
 static struct perf_evlist	*evsel_list;
 
 static struct target target = {
@@ -352,29 +341,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
-static struct perf_evsel *nth_evsel(int n)
-{
-	static struct perf_evsel **array;
-	static int array_len;
-	struct perf_evsel *ev;
-	int j;
-
-	/* Assumes this only called when evsel_list does not change anymore. */
-	if (!array) {
-		evlist__for_each(evsel_list, ev)
-			array_len++;
-		array = malloc(array_len * sizeof(void *));
-		if (!array)
-			exit(ENOMEM);
-		j = 0;
-		evlist__for_each(evsel_list, ev)
-			array[j++] = ev;
-	}
-	if (n < array_len)
-		return array[n];
-	return NULL;
-}
-
 /*
  * Update various tracking values we maintain to print
  * more semantic information such as miss/hit ratios,
@@ -389,14 +355,11 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_nsecs_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
-	else if (transaction_run &&
-		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
+	else if (transaction_run && perf_stat_evsel__is(counter, CYCLES_IN_TX))
 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (transaction_run &&
-		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
+	else if (transaction_run && perf_stat_evsel__is(counter, TRANSACTION_START))
 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (transaction_run &&
-		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
+	else if (transaction_run && perf_stat_evsel__is(counter, ELISION_START))
 		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
@@ -1207,15 +1170,13 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		} else {
 			fprintf(output, "                                   ");
 		}
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
+	} else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total)
 			fprintf(output,
 				" #   %5.2f%% transactional cycles   ",
 				100.0 * (avg / total));
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
+	} else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 		if (total2 < avg)
@@ -1224,8 +1185,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 			fprintf(output,
 				" #   %5.2f%% aborted cycles         ",
 				100.0 * ((total2-avg) / total));
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
+	} else if (transaction_run && perf_stat_evsel__is(evsel, TRANSACTION_START) &&
 		   avg > 0 &&
 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
@@ -1234,8 +1194,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 			ratio = total / avg;
 
 		fprintf(output, " # %8.0f cycles / transaction   ", ratio);
-	} else if (transaction_run &&
-		   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
+	} else if (transaction_run && perf_stat_evsel__is(evsel, ELISION_START) &&
 		   avg > 0 &&
 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 8e9f6bb7581b..60b92822f655 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -72,7 +72,11 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel,
 
 #define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name
 static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = {
-	ID(NONE, x),
+	ID(NONE,		x),
+	ID(CYCLES_IN_TX,	cpu/cycles-t/),
+	ID(TRANSACTION_START,	cpu/tx-start/),
+	ID(ELISION_START,	cpu/el-start/),
+	ID(CYCLES_IN_TX_CP,	cpu/cycles-ct/),
 };
 #undef ID
 
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index f4136cfd3cc9..3df529bd0774 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -11,6 +11,10 @@ struct stats
 
 enum perf_stat_evsel_id {
 	PERF_STAT_EVSEL_ID__NONE = 0,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX,
+	PERF_STAT_EVSEL_ID__TRANSACTION_START,
+	PERF_STAT_EVSEL_ID__ELISION_START,
+	PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP,
 	PERF_STAT_EVSEL_ID__MAX,
 };
 

From a454742c1252d6242e00b5f4f6f9e5fbce3859d7 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:53 +0200
Subject: [PATCH 257/305] perf stat: Remove setup_events function

We can use already existing parse_events interface.

Both transaction_attrs and transaction_limited_attrs are changed to be
single strings.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-4-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 27 +++++++--------------------
 1 file changed, 7 insertions(+), 20 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 514493d703da..0c0071cf4fba 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -73,8 +73,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix);
 static void print_aggr(char *prefix);
 
 /* Default events used for perf stat -T */
-static const char * const transaction_attrs[] = {
-	"task-clock",
+static const char *transaction_attrs = {
+	"task-clock,"
 	"{"
 	"instructions,"
 	"cycles,"
@@ -86,8 +86,8 @@ static const char * const transaction_attrs[] = {
 };
 
 /* More limited version when the CPU does not have all events. */
-static const char * const transaction_limited_attrs[] = {
-	"task-clock",
+static const char * transaction_limited_attrs = {
+	"task-clock,"
 	"{"
 	"instructions,"
 	"cycles,"
@@ -1533,17 +1533,6 @@ static int perf_stat_init_aggr_mode(void)
 	return 0;
 }
 
-static int setup_events(const char * const *attrs, unsigned len)
-{
-	unsigned i;
-
-	for (i = 0; i < len; i++) {
-		if (parse_events(evsel_list, attrs[i], NULL))
-			return -1;
-	}
-	return 0;
-}
-
 /*
  * Add default attributes, if there were no attributes specified or
  * if -d/--detailed, -d -d or -d -d -d is used:
@@ -1665,12 +1654,10 @@ static int add_default_attributes(void)
 		int err;
 		if (pmu_have_event("cpu", "cycles-ct") &&
 		    pmu_have_event("cpu", "el-start"))
-			err = setup_events(transaction_attrs,
-					ARRAY_SIZE(transaction_attrs));
+			err = parse_events(evsel_list, transaction_attrs, NULL);
 		else
-			err = setup_events(transaction_limited_attrs,
-				 ARRAY_SIZE(transaction_limited_attrs));
-		if (err < 0) {
+			err = parse_events(evsel_list, transaction_limited_attrs, NULL);
+		if (err) {
 			fprintf(stderr, "Cannot set up transaction events\n");
 			return -1;
 		}

From 3e99e2f5e78e601591dbcf777c67a84aa9ea2ae5 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:54 +0200
Subject: [PATCH 258/305] perf stat: Remove transaction_run from shadow
 update/print code

It's no longer needed, because we use nameid to recognize transaction
events.

Keeping it only in stat code to initialize transaction events.

I.e. struct perf_stat::id, accessible via evsel->priv, will be only set
for transaction related events.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-5-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0c0071cf4fba..b3e08ce2c564 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -355,11 +355,11 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
 		update_stats(&runtime_nsecs_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
-	else if (transaction_run && perf_stat_evsel__is(counter, CYCLES_IN_TX))
+	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (transaction_run && perf_stat_evsel__is(counter, TRANSACTION_START))
+	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
 		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (transaction_run && perf_stat_evsel__is(counter, ELISION_START))
+	else if (perf_stat_evsel__is(counter, ELISION_START))
 		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
 		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
@@ -1170,13 +1170,13 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		} else {
 			fprintf(output, "                                   ");
 		}
-	} else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total)
 			fprintf(output,
 				" #   %5.2f%% transactional cycles   ",
 				100.0 * (avg / total));
-	} else if (transaction_run && perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 		if (total2 < avg)
@@ -1185,7 +1185,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 			fprintf(output,
 				" #   %5.2f%% aborted cycles         ",
 				100.0 * ((total2-avg) / total));
-	} else if (transaction_run && perf_stat_evsel__is(evsel, TRANSACTION_START) &&
+	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
 		   avg > 0 &&
 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
@@ -1194,7 +1194,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 			ratio = total / avg;
 
 		fprintf(output, " # %8.0f cycles / transaction   ", ratio);
-	} else if (transaction_run && perf_stat_evsel__is(evsel, ELISION_START) &&
+	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
 		   avg > 0 &&
 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
 		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);

From 1eda3b2144391e1ec9e1870bb32d5216ac7b384c Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:55 +0200
Subject: [PATCH 259/305] perf stat: Introduce reset_shadow_stats function

Move shadow counters reset code into separate function
as preparation for moving it into its own object.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-6-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b3e08ce2c564..fc85e6b9bd13 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -279,15 +279,8 @@ static int evsel_context(struct perf_evsel *evsel)
 	return ctx;
 }
 
-static void perf_stat__reset_stats(struct perf_evlist *evlist)
+static void reset_shadow_stats(void)
 {
-	struct perf_evsel *evsel;
-
-	evlist__for_each(evlist, evsel) {
-		perf_evsel__reset_stat_priv(evsel);
-		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
-	}
-
 	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
 	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
 	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
@@ -307,6 +300,18 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
 	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
 }
 
+static void perf_stat__reset_stats(struct perf_evlist *evlist)
+{
+	struct perf_evsel *evsel;
+
+	evlist__for_each(evlist, evsel) {
+		perf_evsel__reset_stat_priv(evsel);
+		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
+	}
+
+	reset_shadow_stats();
+}
+
 static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
 	struct perf_event_attr *attr = &evsel->attr;

From 556b1fb7f9c1a9fd43ea4dacd5d14ec39ac6296a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:56 +0200
Subject: [PATCH 260/305] perf stat: Introduce print_shadow_stats function

Move shadow counters display code into separate function as preparation
for moving it into its own object.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-7-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 72 +++++++++++++++++++++------------------
 1 file changed, 39 insertions(+), 33 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index fc85e6b9bd13..2ff2e22aa140 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1059,43 +1059,11 @@ static void print_ll_cache_misses(int cpu,
 	fprintf(output, " of all LL-cache hits   ");
 }
 
-static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
+static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
 {
 	double total, ratio = 0.0, total2;
-	double sc =  evsel->scale;
-	const char *fmt;
-	int cpu = cpu_map__id_to_cpu(id);
 	int ctx = evsel_context(evsel);
 
-	if (csv_output) {
-		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
-	} else {
-		if (big_num)
-			fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
-		else
-			fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
-	}
-
-	aggr_printout(evsel, id, nr);
-
-	if (aggr_mode == AGGR_GLOBAL)
-		cpu = 0;
-
-	fprintf(output, fmt, avg, csv_sep);
-
-	if (evsel->unit)
-		fprintf(output, "%-*s%s",
-			csv_output ? 0 : unit_width,
-			evsel->unit, csv_sep);
-
-	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
-
-	if (evsel->cgrp)
-		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
-
-	if (csv_output || interval)
-		return;
-
 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total) {
@@ -1226,6 +1194,44 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	}
 }
 
+static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
+{
+	double sc =  evsel->scale;
+	const char *fmt;
+	int cpu = cpu_map__id_to_cpu(id);
+
+	if (csv_output) {
+		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
+	} else {
+		if (big_num)
+			fmt = sc != 1.0 ? "%'18.2f%s" : "%'18.0f%s";
+		else
+			fmt = sc != 1.0 ? "%18.2f%s" : "%18.0f%s";
+	}
+
+	aggr_printout(evsel, id, nr);
+
+	if (aggr_mode == AGGR_GLOBAL)
+		cpu = 0;
+
+	fprintf(output, fmt, avg, csv_sep);
+
+	if (evsel->unit)
+		fprintf(output, "%-*s%s",
+			csv_output ? 0 : unit_width,
+			evsel->unit, csv_sep);
+
+	fprintf(output, "%-*s", csv_output ? 0 : 25, perf_evsel__name(evsel));
+
+	if (evsel->cgrp)
+		fprintf(output, "%s%s", csv_sep, evsel->cgrp->name);
+
+	if (csv_output || interval)
+		return;
+
+	print_shadow_stats(evsel, avg, cpu);
+}
+
 static void print_aggr(char *prefix)
 {
 	struct perf_evsel *counter;

From 4d982740cd598bdd876d9a396cc919724af32bc9 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:57 +0200
Subject: [PATCH 261/305] perf stat: Add output file argument to
 print_shadow_stats function

As preparation for moving shadow counters code into its own object.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-8-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 112 +++++++++++++++++++-------------------
 1 file changed, 56 insertions(+), 56 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2ff2e22aa140..14a75ddb60c7 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -899,7 +899,7 @@ static const char *get_ratio_color(enum grc_type type, double ratio)
 	return color;
 }
 
-static void print_stalled_cycles_frontend(int cpu,
+static void print_stalled_cycles_frontend(FILE *out, int cpu,
 					  struct perf_evsel *evsel
 					  __maybe_unused, double avg)
 {
@@ -914,12 +914,12 @@ static void print_stalled_cycles_frontend(int cpu,
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " frontend cycles idle   ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " frontend cycles idle   ");
 }
 
-static void print_stalled_cycles_backend(int cpu,
+static void print_stalled_cycles_backend(FILE *out, int cpu,
 					 struct perf_evsel *evsel
 					 __maybe_unused, double avg)
 {
@@ -934,12 +934,12 @@ static void print_stalled_cycles_backend(int cpu,
 
 	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " backend  cycles idle   ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " backend  cycles idle   ");
 }
 
-static void print_branch_misses(int cpu,
+static void print_branch_misses(FILE *out, int cpu,
 				struct perf_evsel *evsel __maybe_unused,
 				double avg)
 {
@@ -954,12 +954,12 @@ static void print_branch_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all branches        ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all branches        ");
 }
 
-static void print_l1_dcache_misses(int cpu,
+static void print_l1_dcache_misses(FILE *out, int cpu,
 				   struct perf_evsel *evsel __maybe_unused,
 				   double avg)
 {
@@ -974,12 +974,12 @@ static void print_l1_dcache_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all L1-dcache hits  ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all L1-dcache hits  ");
 }
 
-static void print_l1_icache_misses(int cpu,
+static void print_l1_icache_misses(FILE *out, int cpu,
 				   struct perf_evsel *evsel __maybe_unused,
 				   double avg)
 {
@@ -994,12 +994,12 @@ static void print_l1_icache_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all L1-icache hits  ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all L1-icache hits  ");
 }
 
-static void print_dtlb_cache_misses(int cpu,
+static void print_dtlb_cache_misses(FILE *out, int cpu,
 				    struct perf_evsel *evsel __maybe_unused,
 				    double avg)
 {
@@ -1014,12 +1014,12 @@ static void print_dtlb_cache_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all dTLB cache hits ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all dTLB cache hits ");
 }
 
-static void print_itlb_cache_misses(int cpu,
+static void print_itlb_cache_misses(FILE *out, int cpu,
 				    struct perf_evsel *evsel __maybe_unused,
 				    double avg)
 {
@@ -1034,12 +1034,12 @@ static void print_itlb_cache_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all iTLB cache hits ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all iTLB cache hits ");
 }
 
-static void print_ll_cache_misses(int cpu,
+static void print_ll_cache_misses(FILE *out, int cpu,
 				  struct perf_evsel *evsel __maybe_unused,
 				  double avg)
 {
@@ -1054,12 +1054,12 @@ static void print_ll_cache_misses(int cpu,
 
 	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
 
-	fprintf(output, " #  ");
-	color_fprintf(output, color, "%6.2f%%", ratio);
-	fprintf(output, " of all LL-cache hits   ");
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all LL-cache hits   ");
 }
 
-static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
+static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu)
 {
 	double total, ratio = 0.0, total2;
 	int ctx = evsel_context(evsel);
@@ -1068,59 +1068,59 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total) {
 			ratio = avg / total;
-			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
+			fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
 		} else {
-			fprintf(output, "                                   ");
+			fprintf(out, "                                   ");
 		}
 		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
 		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
 		if (total && avg) {
 			ratio = total / avg;
-			fprintf(output, "\n");
+			fprintf(out, "\n");
 			if (aggr_mode == AGGR_NONE)
-				fprintf(output, "        ");
-			fprintf(output, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+				fprintf(out, "        ");
+			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
 		}
 
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
 			runtime_branches_stats[ctx][cpu].n != 0) {
-		print_branch_misses(cpu, evsel, avg);
+		print_branch_misses(out, cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
-		print_l1_dcache_misses(cpu, evsel, avg);
+		print_l1_dcache_misses(out, cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 			runtime_l1_icache_stats[ctx][cpu].n != 0) {
-		print_l1_icache_misses(cpu, evsel, avg);
+		print_l1_icache_misses(out, cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
-		print_dtlb_cache_misses(cpu, evsel, avg);
+		print_dtlb_cache_misses(out, cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
-		print_itlb_cache_misses(cpu, evsel, avg);
+		print_itlb_cache_misses(out, cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
 			runtime_ll_cache_stats[ctx][cpu].n != 0) {
-		print_ll_cache_misses(cpu, evsel, avg);
+		print_ll_cache_misses(out, cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
 			runtime_cacherefs_stats[ctx][cpu].n != 0) {
 		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
@@ -1128,25 +1128,25 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
 		if (total)
 			ratio = avg * 100 / total;
 
-		fprintf(output, " # %8.3f %% of all cache refs    ", ratio);
+		fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
 
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(cpu, evsel, avg);
+		print_stalled_cycles_frontend(out, cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(cpu, evsel, avg);
+		print_stalled_cycles_backend(out, cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
 
 		if (total) {
 			ratio = avg / total;
-			fprintf(output, " # %8.3f GHz                    ", ratio);
+			fprintf(out, " # %8.3f GHz                    ", ratio);
 		} else {
-			fprintf(output, "                                   ");
+			fprintf(out, "                                   ");
 		}
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
 		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total)
-			fprintf(output,
+			fprintf(out,
 				" #   %5.2f%% transactional cycles   ",
 				100.0 * (avg / total));
 	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
@@ -1155,7 +1155,7 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
-			fprintf(output,
+			fprintf(out,
 				" #   %5.2f%% aborted cycles         ",
 				100.0 * ((total2-avg) / total));
 	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
@@ -1166,7 +1166,7 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
 		if (total)
 			ratio = total / avg;
 
-		fprintf(output, " # %8.0f cycles / transaction   ", ratio);
+		fprintf(out, " # %8.0f cycles / transaction   ", ratio);
 	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
 		   avg > 0 &&
 		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
@@ -1175,7 +1175,7 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
 		if (total)
 			ratio = total / avg;
 
-		fprintf(output, " # %8.0f cycles / elision       ", ratio);
+		fprintf(out, " # %8.0f cycles / elision       ", ratio);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
 
@@ -1188,9 +1188,9 @@ static void print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu)
 			unit = 'K';
 		}
 
-		fprintf(output, " # %8.3f %c/sec                  ", ratio, unit);
+		fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
 	} else {
-		fprintf(output, "                                   ");
+		fprintf(out, "                                   ");
 	}
 }
 
@@ -1229,7 +1229,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	if (csv_output || interval)
 		return;
 
-	print_shadow_stats(evsel, avg, cpu);
+	print_shadow_stats(output, evsel, avg, cpu);
 }
 
 static void print_aggr(char *prefix)

From 7a23f57c89cec0e6d3189d420d992902d4465ff4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:58 +0200
Subject: [PATCH 262/305] perf stat: Add aggr_mode argument to
 print_shadow_stats function

As preparation for moving shadow counters code into its own object.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-9-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 14a75ddb60c7..50918dc9fb31 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -1059,7 +1059,8 @@ static void print_ll_cache_misses(FILE *out, int cpu,
 	fprintf(out, " of all LL-cache hits   ");
 }
 
-static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg, int cpu)
+static void print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+			       double avg, int cpu, enum aggr_mode aggr)
 {
 	double total, ratio = 0.0, total2;
 	int ctx = evsel_context(evsel);
@@ -1078,7 +1079,7 @@ static void print_shadow_stats(FILE *out, struct perf_evsel *evsel, double avg,
 		if (total && avg) {
 			ratio = total / avg;
 			fprintf(out, "\n");
-			if (aggr_mode == AGGR_NONE)
+			if (aggr == AGGR_NONE)
 				fprintf(out, "        ");
 			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
 		}
@@ -1229,7 +1230,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	if (csv_output || interval)
 		return;
 
-	print_shadow_stats(output, evsel, avg, cpu);
+	print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
 }
 
 static void print_aggr(char *prefix)

From f87027b9689d591ec22720944563a2d43ec835c4 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Wed, 3 Jun 2015 16:25:59 +0200
Subject: [PATCH 263/305] perf stat: Move shadow stat counters into separate
 object

Separating shadow counters code into separate object as a cleanup, but
mainly for upcomming changes, so could use it from script command
context.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1433341559-31848-10-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c     | 444 +---------------------------------
 tools/perf/util/Build         |   1 +
 tools/perf/util/stat-shadow.c | 434 +++++++++++++++++++++++++++++++++
 tools/perf/util/stat.h        |  16 ++
 4 files changed, 455 insertions(+), 440 deletions(-)
 create mode 100644 tools/perf/util/stat-shadow.c

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 50918dc9fb31..ff3d25803400 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -102,13 +102,6 @@ static struct target target = {
 	.uid	= UINT_MAX,
 };
 
-enum aggr_mode {
-	AGGR_NONE,
-	AGGR_GLOBAL,
-	AGGR_SOCKET,
-	AGGR_CORE,
-};
-
 static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
@@ -234,72 +227,6 @@ out_free:
 	return -1;
 }
 
-enum {
-	CTX_BIT_USER	= 1 << 0,
-	CTX_BIT_KERNEL	= 1 << 1,
-	CTX_BIT_HV	= 1 << 2,
-	CTX_BIT_HOST	= 1 << 3,
-	CTX_BIT_IDLE	= 1 << 4,
-	CTX_BIT_MAX	= 1 << 5,
-};
-
-#define NUM_CTX CTX_BIT_MAX
-
-static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats walltime_nsecs_stats;
-static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
-static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
-
-static int evsel_context(struct perf_evsel *evsel)
-{
-	int ctx = 0;
-
-	if (evsel->attr.exclude_kernel)
-		ctx |= CTX_BIT_KERNEL;
-	if (evsel->attr.exclude_user)
-		ctx |= CTX_BIT_USER;
-	if (evsel->attr.exclude_hv)
-		ctx |= CTX_BIT_HV;
-	if (evsel->attr.exclude_host)
-		ctx |= CTX_BIT_HOST;
-	if (evsel->attr.exclude_idle)
-		ctx |= CTX_BIT_IDLE;
-
-	return ctx;
-}
-
-static void reset_shadow_stats(void)
-{
-	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
-	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
-	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
-	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
-	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
-	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
-	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
-	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
-	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
-	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
-	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
-	memset(runtime_cycles_in_tx_stats, 0,
-			sizeof(runtime_cycles_in_tx_stats));
-	memset(runtime_transaction_stats, 0,
-		sizeof(runtime_transaction_stats));
-	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
-	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
-}
-
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
 	struct perf_evsel *evsel;
@@ -309,7 +236,7 @@ static void perf_stat__reset_stats(struct perf_evlist *evlist)
 		perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel));
 	}
 
-	reset_shadow_stats();
+	perf_stat__reset_shadow_stats();
 }
 
 static int create_perf_stat_counter(struct perf_evsel *evsel)
@@ -346,46 +273,6 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
-/*
- * Update various tracking values we maintain to print
- * more semantic information such as miss/hit ratios,
- * instruction rates, etc:
- */
-static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
-				int cpu)
-{
-	int ctx = evsel_context(counter);
-
-	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
-		update_stats(&runtime_nsecs_stats[cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
-	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
-		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
-		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
-	else if (perf_stat_evsel__is(counter, ELISION_START))
-		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
-	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
-}
-
 static void zero_per_pkg(struct perf_evsel *counter)
 {
 	if (counter->per_pkg_mask)
@@ -446,7 +333,7 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused,
 		perf_counts_values__scale(count, scale, NULL);
 		evsel->counts->cpu[cpu] = *count;
 		if (aggr_mode == AGGR_NONE)
-			update_shadow_stats(evsel, count->values, cpu);
+			perf_stat__update_shadow_stats(evsel, count->values, cpu);
 		break;
 	case AGGR_GLOBAL:
 		aggr->val += count->val;
@@ -494,7 +381,7 @@ static int read_counter_aggr(struct perf_evsel *counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	update_shadow_stats(counter, count, 0);
+	perf_stat__update_shadow_stats(counter, count, 0);
 
 	return 0;
 }
@@ -872,329 +759,6 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		fprintf(output, "                                   ");
 }
 
-/* used for get_ratio_color() */
-enum grc_type {
-	GRC_STALLED_CYCLES_FE,
-	GRC_STALLED_CYCLES_BE,
-	GRC_CACHE_MISSES,
-	GRC_MAX_NR
-};
-
-static const char *get_ratio_color(enum grc_type type, double ratio)
-{
-	static const double grc_table[GRC_MAX_NR][3] = {
-		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
-		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
-		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
-	};
-	const char *color = PERF_COLOR_NORMAL;
-
-	if (ratio > grc_table[type][0])
-		color = PERF_COLOR_RED;
-	else if (ratio > grc_table[type][1])
-		color = PERF_COLOR_MAGENTA;
-	else if (ratio > grc_table[type][2])
-		color = PERF_COLOR_YELLOW;
-
-	return color;
-}
-
-static void print_stalled_cycles_frontend(FILE *out, int cpu,
-					  struct perf_evsel *evsel
-					  __maybe_unused, double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " frontend cycles idle   ");
-}
-
-static void print_stalled_cycles_backend(FILE *out, int cpu,
-					 struct perf_evsel *evsel
-					 __maybe_unused, double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " backend  cycles idle   ");
-}
-
-static void print_branch_misses(FILE *out, int cpu,
-				struct perf_evsel *evsel __maybe_unused,
-				double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all branches        ");
-}
-
-static void print_l1_dcache_misses(FILE *out, int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all L1-dcache hits  ");
-}
-
-static void print_l1_icache_misses(FILE *out, int cpu,
-				   struct perf_evsel *evsel __maybe_unused,
-				   double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all L1-icache hits  ");
-}
-
-static void print_dtlb_cache_misses(FILE *out, int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all dTLB cache hits ");
-}
-
-static void print_itlb_cache_misses(FILE *out, int cpu,
-				    struct perf_evsel *evsel __maybe_unused,
-				    double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all iTLB cache hits ");
-}
-
-static void print_ll_cache_misses(FILE *out, int cpu,
-				  struct perf_evsel *evsel __maybe_unused,
-				  double avg)
-{
-	double total, ratio = 0.0;
-	const char *color;
-	int ctx = evsel_context(evsel);
-
-	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
-
-	if (total)
-		ratio = avg / total * 100.0;
-
-	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
-
-	fprintf(out, " #  ");
-	color_fprintf(out, color, "%6.2f%%", ratio);
-	fprintf(out, " of all LL-cache hits   ");
-}
-
-static void print_shadow_stats(FILE *out, struct perf_evsel *evsel,
-			       double avg, int cpu, enum aggr_mode aggr)
-{
-	double total, ratio = 0.0, total2;
-	int ctx = evsel_context(evsel);
-
-	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-		if (total) {
-			ratio = avg / total;
-			fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
-		} else {
-			fprintf(out, "                                   ");
-		}
-		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
-		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
-
-		if (total && avg) {
-			ratio = total / avg;
-			fprintf(out, "\n");
-			if (aggr == AGGR_NONE)
-				fprintf(out, "        ");
-			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
-		}
-
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-			runtime_branches_stats[ctx][cpu].n != 0) {
-		print_branch_misses(out, cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
-		print_l1_dcache_misses(out, cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_icache_stats[ctx][cpu].n != 0) {
-		print_l1_icache_misses(out, cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
-		print_dtlb_cache_misses(out, cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
-		print_itlb_cache_misses(out, cpu, evsel, avg);
-	} else if (
-		evsel->attr.type == PERF_TYPE_HW_CACHE &&
-		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
-					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
-					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_ll_cache_stats[ctx][cpu].n != 0) {
-		print_ll_cache_misses(out, cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-			runtime_cacherefs_stats[ctx][cpu].n != 0) {
-		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
-
-		if (total)
-			ratio = avg * 100 / total;
-
-		fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
-
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
-		print_stalled_cycles_frontend(out, cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
-		print_stalled_cycles_backend(out, cpu, evsel, avg);
-	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
-		total = avg_stats(&runtime_nsecs_stats[cpu]);
-
-		if (total) {
-			ratio = avg / total;
-			fprintf(out, " # %8.3f GHz                    ", ratio);
-		} else {
-			fprintf(out, "                                   ");
-		}
-	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-		if (total)
-			fprintf(out,
-				" #   %5.2f%% transactional cycles   ",
-				100.0 * (avg / total));
-	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
-		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
-		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
-		if (total2 < avg)
-			total2 = avg;
-		if (total)
-			fprintf(out,
-				" #   %5.2f%% aborted cycles         ",
-				100.0 * ((total2-avg) / total));
-	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
-		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
-
-		if (total)
-			ratio = total / avg;
-
-		fprintf(out, " # %8.0f cycles / transaction   ", ratio);
-	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
-		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
-
-		if (total)
-			ratio = total / avg;
-
-		fprintf(out, " # %8.0f cycles / elision       ", ratio);
-	} else if (runtime_nsecs_stats[cpu].n != 0) {
-		char unit = 'M';
-
-		total = avg_stats(&runtime_nsecs_stats[cpu]);
-
-		if (total)
-			ratio = 1000.0 * avg / total;
-		if (ratio < 0.001) {
-			ratio *= 1000;
-			unit = 'K';
-		}
-
-		fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
-	} else {
-		fprintf(out, "                                   ");
-	}
-}
-
 static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 {
 	double sc =  evsel->scale;
@@ -1230,7 +794,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	if (csv_output || interval)
 		return;
 
-	print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
+	perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode);
 }
 
 static void print_aggr(char *prefix)
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index e4b676de2f64..586a59d46022 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -68,6 +68,7 @@ libperf-y += rblist.o
 libperf-y += intlist.o
 libperf-y += vdso.o
 libperf-y += stat.o
+libperf-y += stat-shadow.o
 libperf-y += record.o
 libperf-y += srcline.o
 libperf-y += data.o
diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c
new file mode 100644
index 000000000000..53e8bb7bc852
--- /dev/null
+++ b/tools/perf/util/stat-shadow.c
@@ -0,0 +1,434 @@
+#include <stdio.h>
+#include "evsel.h"
+#include "stat.h"
+#include "color.h"
+
+enum {
+	CTX_BIT_USER	= 1 << 0,
+	CTX_BIT_KERNEL	= 1 << 1,
+	CTX_BIT_HV	= 1 << 2,
+	CTX_BIT_HOST	= 1 << 3,
+	CTX_BIT_IDLE	= 1 << 4,
+	CTX_BIT_MAX	= 1 << 5,
+};
+
+#define NUM_CTX CTX_BIT_MAX
+
+static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+
+struct stats walltime_nsecs_stats;
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+	int ctx = 0;
+
+	if (evsel->attr.exclude_kernel)
+		ctx |= CTX_BIT_KERNEL;
+	if (evsel->attr.exclude_user)
+		ctx |= CTX_BIT_USER;
+	if (evsel->attr.exclude_hv)
+		ctx |= CTX_BIT_HV;
+	if (evsel->attr.exclude_host)
+		ctx |= CTX_BIT_HOST;
+	if (evsel->attr.exclude_idle)
+		ctx |= CTX_BIT_IDLE;
+
+	return ctx;
+}
+
+void perf_stat__reset_shadow_stats(void)
+{
+	memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats));
+	memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats));
+	memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats));
+	memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats));
+	memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats));
+	memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats));
+	memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats));
+	memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats));
+	memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats));
+	memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats));
+	memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats));
+	memset(runtime_cycles_in_tx_stats, 0,
+			sizeof(runtime_cycles_in_tx_stats));
+	memset(runtime_transaction_stats, 0,
+		sizeof(runtime_transaction_stats));
+	memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats));
+	memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats));
+}
+
+/*
+ * Update various tracking values we maintain to print
+ * more semantic information such as miss/hit ratios,
+ * instruction rates, etc:
+ */
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+				    int cpu)
+{
+	int ctx = evsel_context(counter);
+
+	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
+		update_stats(&runtime_nsecs_stats[cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
+		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, CYCLES_IN_TX))
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, TRANSACTION_START))
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
+	else if (perf_stat_evsel__is(counter, ELISION_START))
+		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
+		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
+		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
+		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
+		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
+		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
+		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
+	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
+		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
+}
+
+/* used for get_ratio_color() */
+enum grc_type {
+	GRC_STALLED_CYCLES_FE,
+	GRC_STALLED_CYCLES_BE,
+	GRC_CACHE_MISSES,
+	GRC_MAX_NR
+};
+
+static const char *get_ratio_color(enum grc_type type, double ratio)
+{
+	static const double grc_table[GRC_MAX_NR][3] = {
+		[GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 },
+		[GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 },
+		[GRC_CACHE_MISSES] 	= { 20.0, 10.0, 5.0 },
+	};
+	const char *color = PERF_COLOR_NORMAL;
+
+	if (ratio > grc_table[type][0])
+		color = PERF_COLOR_RED;
+	else if (ratio > grc_table[type][1])
+		color = PERF_COLOR_MAGENTA;
+	else if (ratio > grc_table[type][2])
+		color = PERF_COLOR_YELLOW;
+
+	return color;
+}
+
+static void print_stalled_cycles_frontend(FILE *out, int cpu,
+					  struct perf_evsel *evsel
+					  __maybe_unused, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " frontend cycles idle   ");
+}
+
+static void print_stalled_cycles_backend(FILE *out, int cpu,
+					 struct perf_evsel *evsel
+					 __maybe_unused, double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " backend  cycles idle   ");
+}
+
+static void print_branch_misses(FILE *out, int cpu,
+				struct perf_evsel *evsel __maybe_unused,
+				double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all branches        ");
+}
+
+static void print_l1_dcache_misses(FILE *out, int cpu,
+				   struct perf_evsel *evsel __maybe_unused,
+				   double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all L1-dcache hits  ");
+}
+
+static void print_l1_icache_misses(FILE *out, int cpu,
+				   struct perf_evsel *evsel __maybe_unused,
+				   double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all L1-icache hits  ");
+}
+
+static void print_dtlb_cache_misses(FILE *out, int cpu,
+				    struct perf_evsel *evsel __maybe_unused,
+				    double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all dTLB cache hits ");
+}
+
+static void print_itlb_cache_misses(FILE *out, int cpu,
+				    struct perf_evsel *evsel __maybe_unused,
+				    double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all iTLB cache hits ");
+}
+
+static void print_ll_cache_misses(FILE *out, int cpu,
+				  struct perf_evsel *evsel __maybe_unused,
+				  double avg)
+{
+	double total, ratio = 0.0;
+	const char *color;
+	int ctx = evsel_context(evsel);
+
+	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
+
+	if (total)
+		ratio = avg / total * 100.0;
+
+	color = get_ratio_color(GRC_CACHE_MISSES, ratio);
+
+	fprintf(out, " #  ");
+	color_fprintf(out, color, "%6.2f%%", ratio);
+	fprintf(out, " of all LL-cache hits   ");
+}
+
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+				   double avg, int cpu, enum aggr_mode aggr)
+{
+	double total, ratio = 0.0, total2;
+	int ctx = evsel_context(evsel);
+
+	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		if (total) {
+			ratio = avg / total;
+			fprintf(out, " #   %5.2f  insns per cycle        ", ratio);
+		} else {
+			fprintf(out, "                                   ");
+		}
+		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
+		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
+
+		if (total && avg) {
+			ratio = total / avg;
+			fprintf(out, "\n");
+			if (aggr == AGGR_NONE)
+				fprintf(out, "        ");
+			fprintf(out, "                                                  #   %5.2f  stalled cycles per insn", ratio);
+		}
+
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
+			runtime_branches_stats[ctx][cpu].n != 0) {
+		print_branch_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
+		print_l1_dcache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_l1_icache_stats[ctx][cpu].n != 0) {
+		print_l1_icache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
+		print_dtlb_cache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
+		print_itlb_cache_misses(out, cpu, evsel, avg);
+	} else if (
+		evsel->attr.type == PERF_TYPE_HW_CACHE &&
+		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
+					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
+					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
+			runtime_ll_cache_stats[ctx][cpu].n != 0) {
+		print_ll_cache_misses(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
+			runtime_cacherefs_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
+
+		if (total)
+			ratio = avg * 100 / total;
+
+		fprintf(out, " # %8.3f %% of all cache refs    ", ratio);
+
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) {
+		print_stalled_cycles_frontend(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
+		print_stalled_cycles_backend(out, cpu, evsel, avg);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
+		total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+		if (total) {
+			ratio = avg / total;
+			fprintf(out, " # %8.3f GHz                    ", ratio);
+		} else {
+			fprintf(out, "                                   ");
+		}
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) {
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		if (total)
+			fprintf(out,
+				" #   %5.2f%% transactional cycles   ",
+				100.0 * (avg / total));
+	} else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) {
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+		if (total2 < avg)
+			total2 = avg;
+		if (total)
+			fprintf(out,
+				" #   %5.2f%% aborted cycles         ",
+				100.0 * ((total2-avg) / total));
+	} else if (perf_stat_evsel__is(evsel, TRANSACTION_START) &&
+		   avg > 0 &&
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(out, " # %8.0f cycles / transaction   ", ratio);
+	} else if (perf_stat_evsel__is(evsel, ELISION_START) &&
+		   avg > 0 &&
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(out, " # %8.0f cycles / elision       ", ratio);
+	} else if (runtime_nsecs_stats[cpu].n != 0) {
+		char unit = 'M';
+
+		total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+		if (total)
+			ratio = 1000.0 * avg / total;
+		if (ratio < 0.001) {
+			ratio *= 1000;
+			unit = 'K';
+		}
+
+		fprintf(out, " # %8.3f %c/sec                  ", ratio, unit);
+	} else {
+		fprintf(out, "                                   ");
+	}
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 3df529bd0774..615c779eb42a 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -2,6 +2,7 @@
 #define __PERF_STATS_H
 
 #include <linux/types.h>
+#include <stdio.h>
 
 struct stats
 {
@@ -23,6 +24,13 @@ struct perf_stat {
 	enum perf_stat_evsel_id	id;
 };
 
+enum aggr_mode {
+	AGGR_NONE,
+	AGGR_GLOBAL,
+	AGGR_SOCKET,
+	AGGR_CORE,
+};
+
 void update_stats(struct stats *stats, u64 val);
 double avg_stats(struct stats *stats);
 double stddev_stats(struct stats *stats);
@@ -46,4 +54,12 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel,
 
 void perf_stat_evsel_id_init(struct perf_evsel *evsel);
 
+extern struct stats walltime_nsecs_stats;
+
+void perf_stat__reset_shadow_stats(void);
+void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
+				    int cpu);
+void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
+				   double avg, int cpu, enum aggr_mode aggr);
+
 #endif

From e3d09ec8126fe2c9a3ade661e2126e215ca27a80 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Thu, 28 May 2015 13:28:54 +0000
Subject: [PATCH 264/305] tools lib traceevent: Export dynamic symbols used by
 traceevent plugins

Traceevent plugins need dynamic symbols exported from libtraceevent.a,
otherwise a dlopen error will occur during plugins loading.

This patch uses dynamic-list-file to export dynamic symbols which will
be used in plugins to perf executable.

The problem is covered up if feature-libpython is enabled, because
PYTHON_EMBED_LDOPTS contains '-Xlinker --export-dynamic' which adds all
symbols to the dynamic symbol table. So we should reproduce the problem
by setting NO_LIBPYTHON=1.

Before this patch:

  (Prepare plugins)
  $ ls /root/.traceevent/plugins/
  plugin_sched_switch.so
  plugin_function.so
  ...

  $ perf record -e 'ftrace:function' ls

  $ perf script
    Warning: could not load plugin '/mnt/data/root/.traceevent/plugins/plugin_sched_switch.so'
    /root/.traceevent/plugins/plugin_sched_switch.so: undefined symbol: pevent_unregister_event_handler

    Warning: could not load plugin '/root/.traceevent/plugins/plugin_function.so'
    /root/.traceevent/plugins/plugin_function.so: undefined symbol: warning
    ...
           :1049  1049 [000]  9666.754487: ftrace:function:  ffffffff8118bc50 <-- ffffffff8118c5b3
           :1049  1049 [000]  9666.754487: ftrace:function:  ffffffff818e2440 <-- ffffffff8118bc75
           :1049  1049 [000]  9666.754487: ftrace:function:  ffffffff8106eee0 <-- ffffffff811212e2

After this patch:

  $ perf record -e 'ftrace:function' ls
  $ perf script
           :1049  1049 [000]  9666.754487: ftrace:function: __set_task_comm
           :1049  1049 [000]  9666.754487: ftrace:function:    _raw_spin_lock
           :1049  1049 [000]  9666.754487: ftrace:function: task_tgid_nr_ns
           ...

Signed-off-by: He Kuang <hekuang@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1432819735-35040-1-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/Makefile | 14 +++++++++++++-
 tools/perf/Makefile.perf      | 14 ++++++++++++--
 2 files changed, 25 insertions(+), 3 deletions(-)

diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile
index 84640394ebf9..6daaff652aff 100644
--- a/tools/lib/traceevent/Makefile
+++ b/tools/lib/traceevent/Makefile
@@ -23,6 +23,7 @@ endef
 # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix.
 $(call allow-override,CC,$(CROSS_COMPILE)gcc)
 $(call allow-override,AR,$(CROSS_COMPILE)ar)
+$(call allow-override,NM,$(CROSS_COMPILE)nm)
 
 EXT = -std=gnu99
 INSTALL = install
@@ -157,8 +158,9 @@ PLUGINS_IN := $(PLUGINS:.so=-in.o)
 
 TE_IN    := $(OUTPUT)libtraceevent-in.o
 LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE))
+DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list
 
-CMD_TARGETS = $(LIB_FILE) $(PLUGINS)
+CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE)
 
 TARGETS = $(CMD_TARGETS)
 
@@ -175,6 +177,9 @@ $(OUTPUT)libtraceevent.so: $(TE_IN)
 $(OUTPUT)libtraceevent.a: $(TE_IN)
 	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^
 
+$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS)
+	$(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@)
+
 plugins: $(PLUGINS)
 
 __plugin_obj = $(notdir $@)
@@ -244,6 +249,13 @@ define do_install_plugins
 	done
 endef
 
+define do_generate_dynamic_list_file
+	(echo '{';							\
+	$(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u;	\
+	echo '};';							\
+	) > $2
+endef
+
 install_lib: all_cmd install_plugins
 	$(call QUIET_INSTALL, $(LIB_FILE)) \
 		$(call do_install,$(LIB_FILE),$(libdir_SQ))
diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 5816a3bb7e9f..b1dfcd8e93e3 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -173,6 +173,9 @@ endif
 LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
+LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
+LDFLAGS += -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+
 LIBAPI = $(LIB_PATH)libapi.a
 export LIBAPI
 
@@ -278,7 +281,7 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj
 $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
 	$(Q)$(MAKE) $(build)=perf
 
-$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN)
+$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@
 
 $(GTK_IN): FORCE
@@ -373,7 +376,13 @@ $(LIB_FILE): $(LIBPERF_IN)
 LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ)
 
 $(LIBTRACEEVENT): FORCE
-	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a
+
+libtraceevent_plugins: FORCE
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins
+
+$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins
+	$(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list
 
 $(LIBTRACEEVENT)-clean:
 	$(call QUIET_CLEAN, libtraceevent)
@@ -555,4 +564,5 @@ FORCE:
 .PHONY: all install clean config-clean strip install-gtk
 .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell
 .PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep
+.PHONY: libtraceevent_plugins
 

From 38e096249b4fca1a26ca8908ea2018a5faf366e2 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Thu, 28 May 2015 13:28:55 +0000
Subject: [PATCH 265/305] tools lib traceevent: Ignore libtrace-dynamic-list
 file

The libtrace-dynamic-list file is used to export symbols used by
traceevent plugins.

Signed-off-by: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1432819735-35040-2-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/lib/traceevent/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore
index 35f56be5a4cd..3c60335fe7be 100644
--- a/tools/lib/traceevent/.gitignore
+++ b/tools/lib/traceevent/.gitignore
@@ -1 +1,2 @@
 TRACEEVENT-CFLAGS
+libtraceevent-dynamic-list

From 457ae94ae047330e75c13f28ead6de31eab245ed Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Thu, 28 May 2015 13:17:30 +0000
Subject: [PATCH 266/305] perf record: Fix perf.data size in no-buildid mode

The size of perf.data is missing update in no-buildid mode, which gives
wrong output result.

Before this patch:

  $ perf.perf record -B -e syscalls:sys_enter_open uname
  Linux
  [ perf record: Woken up 1 times to  write data ]
  [ perf record: Captured and wrote 0.000 MB perf.data ]

After this patch:

  $ perf.perf record -B -e syscalls:sys_enter_open uname
  Linux
  [ perf record: Woken up 1 times to write data ]
  [ perf record: Captured and wrote 0.001 MB perf.data ]

Signed-off-by: He Kuang <hekuang@huawei.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1432819050-30511-1-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-record.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 91aa2a3dcf19..d3731cce7c1c 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -345,12 +345,9 @@ static int process_buildids(struct record *rec)
 	struct perf_data_file *file  = &rec->file;
 	struct perf_session *session = rec->session;
 
-	u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
-	if (size == 0)
+	if (file->size == 0)
 		return 0;
 
-	file->size = size;
-
 	/*
 	 * During this process, it'll load kernel map and replace the
 	 * dso->long_name to a real pathname it found.  In this case
@@ -719,6 +716,7 @@ out_child:
 
 	if (!err && !file->is_pipe) {
 		rec->session->header.data_size += rec->bytes_written;
+		file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR);
 
 		if (!rec->no_buildid) {
 			process_buildids(rec);

From 9f2de31542f1ac38a15117f90ee6b8449951d86e Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 1 Jun 2015 12:01:02 -0300
Subject: [PATCH 267/305] perf machine: Fix up some more method names

Calling the function 'machine__new_module' implies a new 'module' will
be allocated, when in fact what is returned is a 'struct map' instance,
that not necessarily will be instantiated, as if one already exists with
the given module name, it will be returned instead.

So be consistent with other "find and if not there, create" like
functions, like machine__findnew_thread, machine__findnew_dso, etc, and
rename it to machine__findnew_module_map(), that in turn will call
machine__findnew_module_dso().

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-acv830vd3hwww2ih5vjtbmu3@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c     | 18 +++++++++---------
 tools/perf/util/machine.h     |  4 ++--
 tools/perf/util/probe-event.c |  2 +-
 tools/perf/util/symbol.c      |  2 +-
 4 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index f15ed24a22ac..dfd419797e6e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -490,9 +490,9 @@ int machine__process_lost_samples_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-static struct dso*
-machine__module_dso(struct machine *machine, struct kmod_path *m,
-		    const char *filename)
+static struct dso *machine__findnew_module_dso(struct machine *machine,
+					       struct kmod_path *m,
+					       const char *filename)
 {
 	struct dso *dso;
 
@@ -534,8 +534,8 @@ int machine__process_itrace_start_event(struct machine *machine __maybe_unused,
 	return 0;
 }
 
-struct map *machine__new_module(struct machine *machine, u64 start,
-				const char *filename)
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+					const char *filename)
 {
 	struct map *map = NULL;
 	struct dso *dso;
@@ -549,7 +549,7 @@ struct map *machine__new_module(struct machine *machine, u64 start,
 	if (map)
 		goto out;
 
-	dso = machine__module_dso(machine, &m, filename);
+	dso = machine__findnew_module_dso(machine, &m, filename);
 	if (dso == NULL)
 		goto out;
 
@@ -1017,7 +1017,7 @@ static int machine__create_module(void *arg, const char *name, u64 start)
 	struct machine *machine = arg;
 	struct map *map;
 
-	map = machine__new_module(machine, start, name);
+	map = machine__findnew_module_map(machine, start, name);
 	if (map == NULL)
 		return -1;
 
@@ -1140,8 +1140,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 				strlen(kmmap_prefix) - 1) == 0;
 	if (event->mmap.filename[0] == '/' ||
 	    (!is_kernel_mmap && event->mmap.filename[0] == '[')) {
-		map = machine__new_module(machine, event->mmap.start,
-					  event->mmap.filename);
+		map = machine__findnew_module_map(machine, event->mmap.start,
+						  event->mmap.filename);
 		if (map == NULL)
 			goto out_problem;
 
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index 8e1f796fd137..ca267c41f28d 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -189,8 +189,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine,
 						 filter);
 }
 
-struct map *machine__new_module(struct machine *machine, u64 start,
-				const char *filename);
+struct map *machine__findnew_module_map(struct machine *machine, u64 start,
+					const char *filename);
 
 int machine__load_kallsyms(struct machine *machine, const char *filename,
 			   enum map_type type, symbol_filter_t filter);
diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index e6f215b7a052..d4cf50b91839 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -168,7 +168,7 @@ static struct map *kernel_get_module_map(const char *module)
 
 	/* A file path -- this is an offline module */
 	if (module && strchr(module, '/'))
-		return machine__new_module(host_machine, 0, module);
+		return machine__findnew_module_map(host_machine, 0, module);
 
 	if (!module)
 		module = "kernel";
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index a3e80d6ad70a..eaee5d32d39d 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1364,7 +1364,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod,
 	case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP:
 		/*
 		 * kernel modules know their symtab type - it's set when
-		 * creating a module dso in machine__new_module().
+		 * creating a module dso in machine__findnew_module_map().
 		 */
 		return kmod && dso->symtab_type == type;
 

From e88078442232f3bbcb4ff1d24b3f9ab3dca472b9 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 1 Jun 2015 15:40:01 -0300
Subject: [PATCH 268/305] perf tools: Protect accesses the dso rbtrees/lists
 with a rw lock

To allow concurrent access, next step: refcount struct dso instances, so
that we can ditch unused them when the last map pointing to it goes
away.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-yk1k08etpd2aoe3tnrf0oizn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/dso.c     | 52 +++++++++++++++++++++++++++-----------
 tools/perf/util/dso.h     | 10 +++++---
 tools/perf/util/machine.c | 27 +++++++++++++++-----
 tools/perf/util/vdso.c    | 53 +++++++++++++++++++++------------------
 4 files changed, 95 insertions(+), 47 deletions(-)

diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index 5ec9e892c89b..ff0204ac4321 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -889,8 +889,8 @@ struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
  * Either one of the dso or name parameter must be non-NULL or the
  * function will not work.
  */
-static struct dso *dso__findlink_by_longname(struct rb_root *root,
-					     struct dso *dso, const char *name)
+static struct dso *__dso__findlink_by_longname(struct rb_root *root,
+					       struct dso *dso, const char *name)
 {
 	struct rb_node **p = &root->rb_node;
 	struct rb_node  *parent = NULL;
@@ -937,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root,
 	return NULL;
 }
 
-static inline struct dso *
-dso__find_by_longname(const struct rb_root *root, const char *name)
+static inline struct dso *__dso__find_by_longname(struct rb_root *root,
+						  const char *name)
 {
-	return dso__findlink_by_longname((struct rb_root *)root, NULL, name);
+	return __dso__findlink_by_longname(root, NULL, name);
 }
 
 void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated)
@@ -1149,14 +1149,20 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits)
 	return have_build_id;
 }
 
-void dsos__add(struct dsos *dsos, struct dso *dso)
+void __dsos__add(struct dsos *dsos, struct dso *dso)
 {
 	list_add_tail(&dso->node, &dsos->head);
-	dso__findlink_by_longname(&dsos->root, dso, NULL);
+	__dso__findlink_by_longname(&dsos->root, dso, NULL);
 }
 
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
-		       bool cmp_short)
+void dsos__add(struct dsos *dsos, struct dso *dso)
+{
+	pthread_rwlock_wrlock(&dsos->lock);
+	__dsos__add(dsos, dso);
+	pthread_rwlock_unlock(&dsos->lock);
+}
+
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
 {
 	struct dso *pos;
 
@@ -1166,15 +1172,24 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name,
 				return pos;
 		return NULL;
 	}
-	return dso__find_by_longname(&dsos->root, name);
+	return __dso__find_by_longname(&dsos->root, name);
 }
 
-struct dso *dsos__addnew(struct dsos *dsos, const char *name)
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short)
+{
+	struct dso *dso;
+	pthread_rwlock_rdlock(&dsos->lock);
+	dso = __dsos__find(dsos, name, cmp_short);
+	pthread_rwlock_unlock(&dsos->lock);
+	return dso;
+}
+
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name)
 {
 	struct dso *dso = dso__new(name);
 
 	if (dso != NULL) {
-		dsos__add(dsos, dso);
+		__dsos__add(dsos, dso);
 		dso__set_basename(dso);
 	}
 	return dso;
@@ -1182,9 +1197,18 @@ struct dso *dsos__addnew(struct dsos *dsos, const char *name)
 
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name)
 {
-	struct dso *dso = dsos__find(dsos, name, false);
+	struct dso *dso = __dsos__find(dsos, name, false);
 
-	return dso ? dso : dsos__addnew(dsos, name);
+	return dso ? dso : __dsos__addnew(dsos, name);
+}
+
+struct dso *dsos__findnew(struct dsos *dsos, const char *name)
+{
+	struct dso *dso;
+	pthread_rwlock_wrlock(&dsos->lock);
+	dso = __dsos__findnew(dsos, name);
+	pthread_rwlock_unlock(&dsos->lock);
+	return dso;
 }
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index ba2d90ed881f..c16ab5d849c3 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -4,6 +4,7 @@
 #include <linux/types.h>
 #include <linux/rbtree.h>
 #include <stdbool.h>
+#include <pthread.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
 #include "map.h"
@@ -124,6 +125,7 @@ struct dso_cache {
 struct dsos {
 	struct list_head head;
 	struct rb_root	 root;	/* rbtree root sorted by long name */
+	pthread_rwlock_t lock;
 };
 
 struct auxtrace_cache;
@@ -297,11 +299,13 @@ struct map *dso__new_map(const char *name);
 struct dso *machine__findnew_kernel(struct machine *machine, const char *name,
 				    const char *short_name, int dso_type);
 
+void __dsos__add(struct dsos *dsos, struct dso *dso);
 void dsos__add(struct dsos *dsos, struct dso *dso);
-struct dso *dsos__addnew(struct dsos *dsos, const char *name);
-struct dso *dsos__find(const struct dsos *dsos, const char *name,
-		       bool cmp_short);
+struct dso *__dsos__addnew(struct dsos *dsos, const char *name);
+struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
+struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short);
 struct dso *__dsos__findnew(struct dsos *dsos, const char *name);
+struct dso *dsos__findnew(struct dsos *dsos, const char *name);
 bool __dsos__read_build_ids(struct list_head *head, bool with_hits);
 
 size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp,
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index dfd419797e6e..0cf56d6f073a 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -20,6 +20,7 @@ static void dsos__init(struct dsos *dsos)
 {
 	INIT_LIST_HEAD(&dsos->head);
 	dsos->root = RB_ROOT;
+	pthread_rwlock_init(&dsos->lock, NULL);
 }
 
 int machine__init(struct machine *machine, const char *root_dir, pid_t pid)
@@ -81,15 +82,21 @@ out_delete:
 	return NULL;
 }
 
-static void dsos__delete(struct dsos *dsos)
+static void dsos__exit(struct dsos *dsos)
 {
 	struct dso *pos, *n;
 
+	pthread_rwlock_wrlock(&dsos->lock);
+
 	list_for_each_entry_safe(pos, n, &dsos->head, node) {
 		RB_CLEAR_NODE(&pos->rb_node);
 		list_del(&pos->node);
 		dso__delete(pos);
 	}
+
+	pthread_rwlock_unlock(&dsos->lock);
+
+	pthread_rwlock_destroy(&dsos->lock);
 }
 
 void machine__delete_threads(struct machine *machine)
@@ -110,7 +117,7 @@ void machine__delete_threads(struct machine *machine)
 void machine__exit(struct machine *machine)
 {
 	map_groups__exit(&machine->kmaps);
-	dsos__delete(&machine->dsos);
+	dsos__exit(&machine->dsos);
 	machine__exit_vdso(machine);
 	zfree(&machine->root_dir);
 	zfree(&machine->current_tid);
@@ -496,11 +503,13 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
 {
 	struct dso *dso;
 
-	dso = dsos__find(&machine->dsos, m->name, true);
+	pthread_rwlock_wrlock(&machine->dsos.lock);
+
+	dso = __dsos__find(&machine->dsos, m->name, true);
 	if (!dso) {
-		dso = dsos__addnew(&machine->dsos, m->name);
+		dso = __dsos__addnew(&machine->dsos, m->name);
 		if (dso == NULL)
-			return NULL;
+			goto out_unlock;
 
 		if (machine__is_host(machine))
 			dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE;
@@ -515,6 +524,8 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
 		dso__set_long_name(dso, strdup(filename), true);
 	}
 
+out_unlock:
+	pthread_rwlock_unlock(&machine->dsos.lock);
 	return dso;
 }
 
@@ -1156,6 +1167,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 		struct dso *kernel = NULL;
 		struct dso *dso;
 
+		pthread_rwlock_rdlock(&machine->dsos.lock);
+
 		list_for_each_entry(dso, &machine->dsos.head, node) {
 
 			/*
@@ -1184,6 +1197,8 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 			break;
 		}
 
+		pthread_rwlock_unlock(&machine->dsos.lock);
+
 		if (kernel == NULL)
 			kernel = machine__findnew_dso(machine, kmmap_prefix);
 		if (kernel == NULL)
@@ -1948,5 +1963,5 @@ int machine__get_kernel_start(struct machine *machine)
 
 struct dso *machine__findnew_dso(struct machine *machine, const char *filename)
 {
-	return __dsos__findnew(&machine->dsos, filename);
+	return dsos__findnew(&machine->dsos, filename);
 }
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index 2e8f6886ca72..c646c74c34f8 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -120,14 +120,14 @@ void machine__exit_vdso(struct machine *machine)
 	zfree(&machine->vdso_info);
 }
 
-static struct dso *machine__addnew_vdso(struct machine *machine, const char *short_name,
-					const char *long_name)
+static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name,
+					  const char *long_name)
 {
 	struct dso *dso;
 
 	dso = dso__new(short_name);
 	if (dso != NULL) {
-		dsos__add(&machine->dsos, dso);
+		__dsos__add(&machine->dsos, dso);
 		dso__set_long_name(dso, long_name, false);
 	}
 
@@ -230,27 +230,31 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file)
 	return vdso_file->temp_file_name;
 }
 
-static struct dso *vdso__findnew_compat(struct machine *machine,
-					struct vdso_file *vdso_file)
+static struct dso *__machine__findnew_compat(struct machine *machine,
+					     struct vdso_file *vdso_file)
 {
 	const char *file_name;
 	struct dso *dso;
 
-	dso = dsos__find(&machine->dsos, vdso_file->dso_name, true);
+	pthread_rwlock_wrlock(&machine->dsos.lock);
+	dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true);
 	if (dso)
-		return dso;
+		goto out_unlock;
 
 	file_name = vdso__get_compat_file(vdso_file);
 	if (!file_name)
-		return NULL;
+		goto out_unlock;
 
-	return machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+	dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name);
+out_unlock:
+	pthread_rwlock_unlock(&machine->dsos.lock);
+	return dso;
 }
 
-static int machine__findnew_vdso_compat(struct machine *machine,
-				    struct thread *thread,
-				    struct vdso_info *vdso_info,
-				    struct dso **dso)
+static int __machine__findnew_vdso_compat(struct machine *machine,
+					  struct thread *thread,
+					  struct vdso_info *vdso_info,
+					  struct dso **dso)
 {
 	enum dso_type dso_type;
 
@@ -267,10 +271,10 @@ static int machine__findnew_vdso_compat(struct machine *machine,
 
 	switch (dso_type) {
 	case DSO__TYPE_32BIT:
-		*dso = vdso__findnew_compat(machine, &vdso_info->vdso32);
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdso32);
 		return 1;
 	case DSO__TYPE_X32BIT:
-		*dso = vdso__findnew_compat(machine, &vdso_info->vdsox32);
+		*dso = __machine__findnew_compat(machine, &vdso_info->vdsox32);
 		return 1;
 	case DSO__TYPE_UNKNOWN:
 	case DSO__TYPE_64BIT:
@@ -285,31 +289,32 @@ struct dso *machine__findnew_vdso(struct machine *machine,
 				  struct thread *thread __maybe_unused)
 {
 	struct vdso_info *vdso_info;
-	struct dso *dso;
+	struct dso *dso = NULL;
 
+	pthread_rwlock_wrlock(&machine->dsos.lock);
 	if (!machine->vdso_info)
 		machine->vdso_info = vdso_info__new();
 
 	vdso_info = machine->vdso_info;
 	if (!vdso_info)
-		return NULL;
+		goto out_unlock;
 
 #if BITS_PER_LONG == 64
-	if (machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
-		return dso;
+	if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso))
+		goto out_unlock;
 #endif
 
-	dso = dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
+	dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true);
 	if (!dso) {
 		char *file;
 
 		file = get_file(&vdso_info->vdso);
-		if (!file)
-			return NULL;
-
-		dso = machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
+		if (file)
+			dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file);
 	}
 
+out_unlock:
+	pthread_rwlock_unlock(&machine->dsos.lock);
 	return dso;
 }
 

From d3a7c489c7fd2463e3b2c3a2179c7be879dd9cb4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Tue, 2 Jun 2015 11:53:26 -0300
Subject: [PATCH 269/305] perf tools: Reference count struct dso

This has a different model than the 'thread' and 'map' struct lifetimes:
there is not a definitive "don't use this DSO anymore" event, i.e. we may
get many 'struct map' holding references to the '/usr/lib64/libc-2.20.so'
DSO but then at some point some DSO may have no references but we still
don't want to straight away release its resources, because "soon" we may
get a new 'struct map' that needs it and we want to reuse its symtab or
other resources.

So we need some way to garbage collect it when crossing some memory
usage threshold, which is left for anoter patch, for now it is
sufficient to release it when calling dsos__exit(), i.e. when deleting
the whole list as part of deleting the 'struct machine' containing it,
which will leave only referenced objects being used.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-majzgz07cm90t2tejrjy4clf@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/dso-data.c     |  4 ++--
 tools/perf/tests/hists_common.c |  6 +++++-
 tools/perf/util/dso.c           | 37 ++++++++++++++++++++++++++++++++-
 tools/perf/util/dso.h           | 14 ++++++++++++-
 tools/perf/util/header.c        |  1 +
 tools/perf/util/machine.c       | 15 +++++++++----
 tools/perf/util/map.c           | 11 ++++++++--
 tools/perf/util/probe-finder.c  |  2 +-
 tools/perf/util/symbol-elf.c    |  2 +-
 tools/perf/util/symbol.c        |  2 +-
 tools/perf/util/vdso.c          |  1 +
 11 files changed, 81 insertions(+), 14 deletions(-)

diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c
index 3e41c61bd861..a218aeaf56a0 100644
--- a/tools/perf/tests/dso-data.c
+++ b/tools/perf/tests/dso-data.c
@@ -166,7 +166,7 @@ int test__dso_data(void)
 		free(buf);
 	}
 
-	dso__delete(dso);
+	dso__put(dso);
 	unlink(file);
 	return 0;
 }
@@ -226,7 +226,7 @@ static void dsos__delete(int cnt)
 		struct dso *dso = dsos[i];
 
 		unlink(dso->name);
-		dso__delete(dso);
+		dso__put(dso);
 	}
 
 	free(dsos);
diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c
index 915f60af6a0e..ce80b274b097 100644
--- a/tools/perf/tests/hists_common.c
+++ b/tools/perf/tests/hists_common.c
@@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines)
 
 			sym = symbol__new(fsym->start, fsym->length,
 					  STB_GLOBAL, fsym->name);
-			if (sym == NULL)
+			if (sym == NULL) {
+				dso__put(dso);
 				goto out;
+			}
 
 			symbols__insert(&dso->symbols[MAP__FUNCTION], sym);
 		}
+
+		dso__put(dso);
 	}
 
 	return machine;
diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c
index ff0204ac4321..7c0c08386a1d 100644
--- a/tools/perf/util/dso.c
+++ b/tools/perf/util/dso.c
@@ -1049,6 +1049,7 @@ struct dso *dso__new(const char *name)
 		INIT_LIST_HEAD(&dso->node);
 		INIT_LIST_HEAD(&dso->data.open_entry);
 		pthread_mutex_init(&dso->lock, NULL);
+		atomic_set(&dso->refcnt, 1);
 	}
 
 	return dso;
@@ -1083,6 +1084,19 @@ void dso__delete(struct dso *dso)
 	free(dso);
 }
 
+struct dso *dso__get(struct dso *dso)
+{
+	if (dso)
+		atomic_inc(&dso->refcnt);
+	return dso;
+}
+
+void dso__put(struct dso *dso)
+{
+	if (dso && atomic_dec_and_test(&dso->refcnt))
+		dso__delete(dso);
+}
+
 void dso__set_build_id(struct dso *dso, void *build_id)
 {
 	memcpy(dso->build_id, build_id, sizeof(dso->build_id));
@@ -1153,6 +1167,27 @@ void __dsos__add(struct dsos *dsos, struct dso *dso)
 {
 	list_add_tail(&dso->node, &dsos->head);
 	__dso__findlink_by_longname(&dsos->root, dso, NULL);
+	/*
+	 * It is now in the linked list, grab a reference, then garbage collect
+	 * this when needing memory, by looking at LRU dso instances in the
+	 * list with atomic_read(&dso->refcnt) == 1, i.e. no references
+	 * anywhere besides the one for the list, do, under a lock for the
+	 * list: remove it from the list, then a dso__put(), that probably will
+	 * be the last and will then call dso__delete(), end of life.
+	 *
+	 * That, or at the end of the 'struct machine' lifetime, when all
+	 * 'struct dso' instances will be removed from the list, in
+	 * dsos__exit(), if they have no other reference from some other data
+	 * structure.
+	 *
+	 * E.g.: after processing a 'perf.data' file and storing references
+	 * to objects instantiated while processing events, we will have
+	 * references to the 'thread', 'map', 'dso' structs all from 'struct
+	 * hist_entry' instances, but we may not need anything not referenced,
+	 * so we might as well call machines__exit()/machines__delete() and
+	 * garbage collect it.
+	 */
+	dso__get(dso);
 }
 
 void dsos__add(struct dsos *dsos, struct dso *dso)
@@ -1206,7 +1241,7 @@ struct dso *dsos__findnew(struct dsos *dsos, const char *name)
 {
 	struct dso *dso;
 	pthread_rwlock_wrlock(&dsos->lock);
-	dso = __dsos__findnew(dsos, name);
+	dso = dso__get(__dsos__findnew(dsos, name));
 	pthread_rwlock_unlock(&dsos->lock);
 	return dso;
 }
diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
index c16ab5d849c3..2fe98bb0e95b 100644
--- a/tools/perf/util/dso.h
+++ b/tools/perf/util/dso.h
@@ -1,6 +1,7 @@
 #ifndef __PERF_DSO
 #define __PERF_DSO
 
+#include <linux/atomic.h>
 #include <linux/types.h>
 #include <linux/rbtree.h>
 #include <stdbool.h>
@@ -179,7 +180,7 @@ struct dso {
 		void	 *priv;
 		u64	 db_id;
 	};
-
+	atomic_t	 refcnt;
 	char		 name[0];
 };
 
@@ -206,6 +207,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated);
 
 int dso__name_len(const struct dso *dso);
 
+struct dso *dso__get(struct dso *dso);
+void dso__put(struct dso *dso);
+
+static inline void __dso__zput(struct dso **dso)
+{
+	dso__put(*dso);
+	*dso = NULL;
+}
+
+#define dso__zput(dso) __dso__zput(&dso)
+
 bool dso__loaded(const struct dso *dso, enum map_type type);
 
 bool dso__sorted_by_name(const struct dso *dso, enum map_type type);
diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
index ac5aaaeed7ff..21a77e7a171e 100644
--- a/tools/perf/util/header.c
+++ b/tools/perf/util/header.c
@@ -1277,6 +1277,7 @@ static int __event_process_build_id(struct build_id_event *bev,
 				  sbuild_id);
 		pr_debug("build id event received for %s: %s\n",
 			 dso->long_name, sbuild_id);
+		dso__put(dso);
 	}
 
 	err = 0;
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 0cf56d6f073a..132e35765101 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -82,7 +82,7 @@ out_delete:
 	return NULL;
 }
 
-static void dsos__exit(struct dsos *dsos)
+static void dsos__purge(struct dsos *dsos)
 {
 	struct dso *pos, *n;
 
@@ -90,12 +90,16 @@ static void dsos__exit(struct dsos *dsos)
 
 	list_for_each_entry_safe(pos, n, &dsos->head, node) {
 		RB_CLEAR_NODE(&pos->rb_node);
-		list_del(&pos->node);
-		dso__delete(pos);
+		list_del_init(&pos->node);
+		dso__put(pos);
 	}
 
 	pthread_rwlock_unlock(&dsos->lock);
+}
 
+static void dsos__exit(struct dsos *dsos)
+{
+	dsos__purge(dsos);
 	pthread_rwlock_destroy(&dsos->lock);
 }
 
@@ -524,6 +528,7 @@ static struct dso *machine__findnew_module_dso(struct machine *machine,
 		dso__set_long_name(dso, strdup(filename), true);
 	}
 
+	dso__get(dso);
 out_unlock:
 	pthread_rwlock_unlock(&machine->dsos.lock);
 	return dso;
@@ -1205,8 +1210,10 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
 			goto out_problem;
 
 		kernel->kernel = kernel_type;
-		if (__machine__create_kernel_maps(machine, kernel) < 0)
+		if (__machine__create_kernel_maps(machine, kernel) < 0) {
+			dso__put(kernel);
 			goto out_problem;
+		}
 
 		if (strstr(kernel->long_name, "vmlinux"))
 			dso__set_short_name(kernel, "[kernel.vmlinux]", false);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 365011c233a6..1241ab989cf5 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -132,7 +132,7 @@ void map__init(struct map *map, enum map_type type,
 	map->end      = end;
 	map->pgoff    = pgoff;
 	map->reloc    = 0;
-	map->dso      = dso;
+	map->dso      = dso__get(dso);
 	map->map_ip   = map__map_ip;
 	map->unmap_ip = map__unmap_ip;
 	RB_CLEAR_NODE(&map->rb_node);
@@ -198,6 +198,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len,
 			if (type != MAP__FUNCTION)
 				dso__set_loaded(dso, map->type);
 		}
+		dso__put(dso);
 	}
 	return map;
 out_delete:
@@ -224,9 +225,15 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type)
 	return map;
 }
 
-void map__delete(struct map *map)
+static void map__exit(struct map *map)
 {
 	BUG_ON(!RB_EMPTY_NODE(&map->rb_node));
+	dso__zput(map->dso);
+}
+
+void map__delete(struct map *map)
+{
+	map__exit(map);
 	free(map);
 }
 
diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c
index c50da392e256..2da65a710893 100644
--- a/tools/perf/util/probe-finder.c
+++ b/tools/perf/util/probe-finder.c
@@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path)
 			continue;
 		dinfo = __debuginfo__new(buf);
 	}
-	dso__delete(dso);
+	dso__put(dso);
 
 out:
 	/* if failed to open all distro debuginfo, open given binary */
diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c
index a93ba85509b2..65f7e389ae09 100644
--- a/tools/perf/util/symbol-elf.c
+++ b/tools/perf/util/symbol-elf.c
@@ -1016,7 +1016,7 @@ int dso__load_sym(struct dso *dso, struct map *map,
 				curr_map = map__new2(start, curr_dso,
 						     map->type);
 				if (curr_map == NULL) {
-					dso__delete(curr_dso);
+					dso__put(curr_dso);
 					goto out_elf_end;
 				}
 				if (adjust_kernel_syms) {
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index eaee5d32d39d..504f2d73b7ee 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -786,7 +786,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta,
 
 			curr_map = map__new2(pos->start, ndso, map->type);
 			if (curr_map == NULL) {
-				dso__delete(ndso);
+				dso__put(ndso);
 				return -1;
 			}
 
diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c
index c646c74c34f8..4b89118f158d 100644
--- a/tools/perf/util/vdso.c
+++ b/tools/perf/util/vdso.c
@@ -314,6 +314,7 @@ struct dso *machine__findnew_vdso(struct machine *machine,
 	}
 
 out_unlock:
+	dso__get(dso);
 	pthread_rwlock_unlock(&machine->dsos.lock);
 	return dso;
 }

From d7c72606d97e6f462a99b79e55b39808147d4c8b Mon Sep 17 00:00:00 2001
From: Milos Vyletel <milos@redhat.com>
Date: Mon, 8 Jun 2015 16:50:16 +0200
Subject: [PATCH 270/305] perf tools: Avoid possible race condition in
 copyfile()

Use unique temporary files when copying to buildid dir to prevent races
in case multiple instances are trying to copy same file. This is done by

- creating template in form <path>/.<filename>.XXXXXX where the suffix is
  used by mkstemp() to create unique file
- change file mode
- copy content
- if successful link temp file to target file
- unlink temp file

At this point the only file left at target path should be the desired
one either created by us or other instance if we raced. This should also
prevent not yet fully copied files to be visible to to other perf
instances that could try to parse them.

On top of that slow_copyfile no longer needs to deal with file mode when
creating file since temporary file is already created and mode is set.

Succesfully tested by myself by running perf record, archive and reading
the data on other system and by running perf buildid-cache on perf
binary itself. I also did revert fix from 0635b0f that to exposes
previously fixed race with EEXIST and recreator test passed sucessfully.

Signed-off-by: Milos Vyletel <milos@redhat.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1433775018-19868-1-git-send-email-milos@redhat.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/util.c | 46 ++++++++++++++++++++++++++++--------------
 1 file changed, 31 insertions(+), 15 deletions(-)

diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c
index 0c264bc685ac..edc2d633b332 100644
--- a/tools/perf/util/util.c
+++ b/tools/perf/util/util.c
@@ -115,20 +115,17 @@ int rm_rf(char *path)
 	return rmdir(path);
 }
 
-static int slow_copyfile(const char *from, const char *to, mode_t mode)
+static int slow_copyfile(const char *from, const char *to)
 {
 	int err = -1;
 	char *line = NULL;
 	size_t n;
 	FILE *from_fp = fopen(from, "r"), *to_fp;
-	mode_t old_umask;
 
 	if (from_fp == NULL)
 		goto out;
 
-	old_umask = umask(mode ^ 0777);
 	to_fp = fopen(to, "w");
-	umask(old_umask);
 	if (to_fp == NULL)
 		goto out_fclose_from;
 
@@ -178,29 +175,48 @@ int copyfile_mode(const char *from, const char *to, mode_t mode)
 	int fromfd, tofd;
 	struct stat st;
 	int err = -1;
+	char *tmp = NULL, *ptr = NULL;
 
 	if (stat(from, &st))
 		goto out;
 
-	if (st.st_size == 0) /* /proc? do it slowly... */
-		return slow_copyfile(from, to, mode);
+	/* extra 'x' at the end is to reserve space for '.' */
+	if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) {
+		tmp = NULL;
+		goto out;
+	}
+	ptr = strrchr(tmp, '/');
+	if (!ptr)
+		goto out;
+	ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1);
+	*ptr = '.';
+
+	tofd = mkstemp(tmp);
+	if (tofd < 0)
+		goto out;
+
+	if (fchmod(tofd, mode))
+		goto out_close_to;
+
+	if (st.st_size == 0) { /* /proc? do it slowly... */
+		err = slow_copyfile(from, tmp);
+		goto out_close_to;
+	}
 
 	fromfd = open(from, O_RDONLY);
 	if (fromfd < 0)
-		goto out;
-
-	tofd = creat(to, mode);
-	if (tofd < 0)
-		goto out_close_from;
+		goto out_close_to;
 
 	err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size);
 
-	close(tofd);
-	if (err)
-		unlink(to);
-out_close_from:
 	close(fromfd);
+out_close_to:
+	close(tofd);
+	if (!err)
+		err = link(tmp, to);
+	unlink(tmp);
 out:
+	free(tmp);
 	return err;
 }
 

From 5610032135c05e7bf9cba231826577a01719d010 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 10 Jun 2015 16:48:50 +0200
Subject: [PATCH 271/305] perf record: Amend option summaries

Because there's too many options and I cannot read, I frequently get
confused between -c and -P, and try to do things like:

  perf record -P 50000 -- foo

Which does not work; try and make the option description slightly longer
and hopefully less confusing.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150610144850.GP19282@twins.programming.kicks-ass.net
[ Do those changes on the man page as well ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-record.txt | 10 +++++++---
 tools/perf/builtin-record.c              |  7 +++----
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 280533ebf9df..6fdf78625c51 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -152,12 +152,16 @@ OPTIONS
 
 -d::
 --data::
-	Sample addresses.
+	Record the sample addresses.
 
 -T::
 --timestamp::
-	Sample timestamps. Use it with 'perf report -D' to see the timestamps,
-	for instance.
+	Record the sample timestamps. Use it with 'perf report -D' to see the
+	timestamps, for instance.
+
+-P::
+--period::
+	Record the sample period.
 
 -n::
 --no-samples::
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index d3731cce7c1c..4d6cdeb94fe1 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -1027,10 +1027,9 @@ struct option __record_options[] = {
 	OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"),
 	OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat,
 		    "per thread counts"),
-	OPT_BOOLEAN('d', "data", &record.opts.sample_address,
-		    "Sample addresses"),
-	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"),
-	OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"),
+	OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"),
+	OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"),
+	OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"),
 	OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples,
 		    "don't sample"),
 	OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache,

From 7310aed77ef2928bcfb8ee5ad71e2b091166b85e Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 11 Jun 2015 15:51:04 +0300
Subject: [PATCH 272/305] perf evsel: Display 0x for hex values when printing
 the attribute

Need to display '0x' prefix for hex values otherwise it is not obvious
they are hex.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1434027064-7554-1-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evsel.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index a3e36fc634dc..d4f9994ae47f 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -1058,7 +1058,7 @@ static void __p_read_format(char *buf, size_t size, u64 value)
 
 #define BUF_SIZE		1024
 
-#define p_hex(val)		snprintf(buf, BUF_SIZE, "%"PRIx64, (uint64_t)(val))
+#define p_hex(val)		snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val))
 #define p_unsigned(val)		snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val))
 #define p_signed(val)		snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val))
 #define p_sample_type(val)	__p_sample_type(buf, BUF_SIZE, val)

From cb5ef60067c11cc8887122f6f168c21941c5d624 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Thu, 11 Jun 2015 02:32:40 -0400
Subject: [PATCH 273/305] perf stat: Error out unsupported group leader
 immediately

perf stat ignores the unsupported event and continue to count supported
event. But if the unsupported event is group leader, perf tool will
crash. After applying this patch, the unsupported group leader will
error out immediately.

Without this patch:

  $ perf stat -x, -e '{node-prefetch-refs,cycles}' --  sleep 1
  perf: util/evsel.c:1009: get_group_fd: Assertion `!(fd == -1)' failed.
  Aborted (core dumped)

With this patch:

  $ perf stat -x, -e '{node-prefetch-refs,cycles}' --  sleep 1
  Error:
  The node-prefetch-refs event is not supported.

Commiter note: Here I got a different output, but no core dump:

  [acme@zoo linux]$ perf stat -x, -e '{node-prefetch-refs,cycles}' -- sleep 1
  Error:
  The sys_perf_event_open() syscall returned with 22 (Invalid argument)
  for event (node-prefetch-refs).
  /bin/dmesg may provide additional information.
  No CONFIG_PERF_EVENTS=y kernel support configured?

Signed-off-by: Kan Liang <kan.liang@intel.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <andi@firstfloor.org>
Link: http://lkml.kernel.org/r/1434004360-8570-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index ff3d25803400..b24ecee95fec 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -549,7 +549,10 @@ static int __run_perf_stat(int argc, const char **argv)
 					ui__warning("%s event is not supported by the kernel.\n",
 						    perf_evsel__name(counter));
 				counter->supported = false;
-				continue;
+
+				if ((counter->leader != counter) ||
+				    !(counter->leader->nr_members > 1))
+					continue;
 			}
 
 			perf_evsel__open_strerror(counter, &target,

From 6ba29c2fa5adcc33b201faec99057b6a72bd5029 Mon Sep 17 00:00:00 2001
From: He Kuang <hekuang@huawei.com>
Date: Thu, 11 Jun 2015 12:44:24 +0000
Subject: [PATCH 274/305] perf tools: Fix build failure on 32-bit arch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Failed in 32bit arch build like this:

    CC       /opt/h00206996/output/perf/arm32/builtin-record.o
  util/session.c: In function ‘perf_session__warn_about_errors’:
  util/session.c:1304:9: error: format ‘%lu’ expects argument of type ‘long unsigned int’,
                         but argument 2 has type ‘long long unsigned int’ [-Werror=format=]

  builtin-report.c: In function ‘perf_evlist__tty_browse_hists’:
  builtin-report.c:323:2: error: format ‘%lu’ expects argument of type ‘long unsigned int’,
                          but argument 3 has type ‘u64’ [-Werror=format=]

Replace %lu format strings in warning message with PRIu64 for u64
'total_lost_samples' to fix this problem.

Signed-off-by: He Kuang <hekuang@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Kan Liang <kan.liang@intel.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Wang Nan <wangnan0@huawei.com>
Link: http://lkml.kernel.org/r/1434026664-71642-1-git-send-email-hekuang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-report.c | 2 +-
 tools/perf/util/session.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index 628090b478ab..32626ea3e227 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -320,7 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist,
 {
 	struct perf_evsel *pos;
 
-	fprintf(stdout, "#\n# Total Lost Samples: %lu\n#\n", evlist->stats.total_lost_samples);
+	fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples);
 	evlist__for_each(evlist, pos) {
 		struct hists *hists = evsel__hists(pos);
 		const char *evname = perf_evsel__name(pos);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 88d87bf3049f..f31e024ddf7d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1299,7 +1299,7 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 		drop_rate = (double)stats->total_lost_samples /
 			    (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples);
 		if (drop_rate > 0.05) {
-			ui__warning("Processed %lu samples and lost %3.2f%% samples!\n\n",
+			ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n",
 				    stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples,
 				    drop_rate * 100.0);
 		}

From a1c2552dba788c2c5e71c5bf5bcf3971caf3dfa1 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Thu, 11 Jun 2015 22:47:54 -0300
Subject: [PATCH 275/305] trace: Beautify perf_event_open syscall

Syswide tracing and then running 'stat' and 'trace':

 $ perf trace -e perf_event_open
 1034.649 (0.019 ms): perf/6133 perf_event_open(attr_uptr: 0x36f0360, pid: 16134, cpu: -1, group_fd: -1, flags: FD_CLOEXEC) = -1 EINVAL Invalid argument
 1034.670 (0.008 ms): perf/6133 perf_event_open(attr_uptr: 0x36f0360, pid: 16134, cpu: -1, group_fd: -1) = -1 EINVAL Invalid argument
 1034.681 (0.007 ms): perf/6133 perf_event_open(attr_uptr: 0x36f0360, pid: 16134, cpu: -1, group_fd: -1) = -1 EINVAL Invalid argument
 1034.692 (0.007 ms): perf/6133 perf_event_open(attr_uptr: 0x36f0360, pid: 16134, cpu: -1, group_fd: -1) = -1 EINVAL Invalid argument
 9986.983 (0.014 ms): trace/6139 perf_event_open(attr_uptr: 0x7ffd9c629320, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
 9987.026 (0.016 ms): trace/6139 perf_event_open(attr_uptr: 0x37c7e70, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
 9987.041 (0.008 ms): trace/6139 perf_event_open(attr_uptr: 0x37c7e70, pid: -1, group_fd: -1, flags: FD_CLOEXEC) = 3
 9987.489 (0.092 ms): trace/6139 perf_event_open(attr_uptr: 0x3795ee0, pid: 16140, group_fd: -1, flags: FD_CLOEXEC) = 3
 9987.536 (0.044 ms): trace/6139 perf_event_open(attr_uptr: 0x3795ee0, pid: 16140, cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 4
 9987.580 (0.041 ms): trace/6139 perf_event_open(attr_uptr: 0x3795ee0, pid: 16140, cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 5
 9987.620 (0.037 ms): trace/6139 perf_event_open(attr_uptr: 0x3795ee0, pid: 16140, cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 7
 9987.659 (0.035 ms): trace/6139 perf_event_open(attr_uptr: 0x37975d0, pid: 16140, group_fd: -1, flags: FD_CLOEXEC) = 8
 9987.692 (0.031 ms): trace/6139 perf_event_open(attr_uptr: 0x37975d0, pid: 16140, cpu: 1, group_fd: -1, flags: FD_CLOEXEC) = 9
 9987.727 (0.032 ms): trace/6139 perf_event_open(attr_uptr: 0x37975d0, pid: 16140, cpu: 2, group_fd: -1, flags: FD_CLOEXEC) = 10
 9987.761 (0.031 ms): trace/6139 perf_event_open(attr_uptr: 0x37975d0, pid: 16140, cpu: 3, group_fd: -1, flags: FD_CLOEXEC) = 11

Need to intercept perf_copy_attr() with a kprobe or with eBPF...

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Link: http://lkml.kernel.org/n/tip-njb105hab2i3t5dexym9lskl@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-trace.c | 58 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index a05490d06374..4bf805b2fbf6 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -68,6 +68,23 @@
 # define MSG_CMSG_CLOEXEC	0x40000000
 #endif
 
+#ifndef PERF_FLAG_FD_NO_GROUP
+# define PERF_FLAG_FD_NO_GROUP		(1UL << 0)
+#endif
+
+#ifndef PERF_FLAG_FD_OUTPUT
+# define PERF_FLAG_FD_OUTPUT		(1UL << 1)
+#endif
+
+#ifndef PERF_FLAG_PID_CGROUP
+# define PERF_FLAG_PID_CGROUP		(1UL << 2) /* pid=cgroup id, per-cpu mode only */
+#endif
+
+#ifndef PERF_FLAG_FD_CLOEXEC
+# define PERF_FLAG_FD_CLOEXEC		(1UL << 3) /* O_CLOEXEC */
+#endif
+
+
 struct tp_field {
 	int offset;
 	union {
@@ -358,6 +375,14 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size,
 
 #define SCA_HEX syscall_arg__scnprintf_hex
 
+static size_t syscall_arg__scnprintf_int(char *bf, size_t size,
+					 struct syscall_arg *arg)
+{
+	return scnprintf(bf, size, "%d", arg->val);
+}
+
+#define SCA_INT syscall_arg__scnprintf_int
+
 static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size,
 					       struct syscall_arg *arg)
 {
@@ -810,6 +835,34 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size,
 
 #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags
 
+static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size,
+						struct syscall_arg *arg)
+{
+	int printed = 0, flags = arg->val;
+
+	if (flags == 0)
+		return 0;
+
+#define	P_FLAG(n) \
+	if (flags & PERF_FLAG_##n) { \
+		printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \
+		flags &= ~PERF_FLAG_##n; \
+	}
+
+	P_FLAG(FD_NO_GROUP);
+	P_FLAG(FD_OUTPUT);
+	P_FLAG(PID_CGROUP);
+	P_FLAG(FD_CLOEXEC);
+#undef P_FLAG
+
+	if (flags)
+		printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags);
+
+	return printed;
+}
+
+#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags
+
 static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size,
 						   struct syscall_arg *arg)
 {
@@ -1077,6 +1130,11 @@ static struct syscall_fmt {
 	{ .name	    = "openat",	    .errmsg = true,
 	  .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */
 			     [2] = SCA_OPEN_FLAGS, /* flags */ }, },
+	{ .name	    = "perf_event_open", .errmsg = true,
+	  .arg_scnprintf = { [1] = SCA_INT, /* pid */
+			     [2] = SCA_INT, /* cpu */
+			     [3] = SCA_FD,  /* group_fd */
+			     [4] = SCA_PERF_FLAGS,  /* flags */ }, },
 	{ .name	    = "pipe2",	    .errmsg = true,
 	  .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, },
 	{ .name	    = "poll",	    .errmsg = true, .timeout = true, },

From c8ad7063626406181a7ebab10cb31b4f741b13d4 Mon Sep 17 00:00:00 2001
From: David Ahern <david.ahern@oracle.com>
Date: Fri, 5 Jun 2015 13:42:53 -0400
Subject: [PATCH 276/305] perf tools: Update MANIFEST per files removed from
 kernel

Building perf out of kernel tree is currently broken because the
MANIFEST file refers to kernel files that have been removed. With this
patch make perf-targz-src-pkg succeeds as does building perf using the
generated tarfile.

Signed-off-by: David Ahern <david.ahern@oracle.com>
Link: http://lkml.kernel.org/r/1433526173-172332-1-git-send-email-david.ahern@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/MANIFEST | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST
index a0bdd6124583..fe50a1b34aa0 100644
--- a/tools/perf/MANIFEST
+++ b/tools/perf/MANIFEST
@@ -50,24 +50,20 @@ include/asm-generic/bitops/const_hweight.h
 include/asm-generic/bitops/fls64.h
 include/asm-generic/bitops/__fls.h
 include/asm-generic/bitops/fls.h
-include/linux/const.h
 include/linux/perf_event.h
 include/linux/rbtree.h
 include/linux/list.h
 include/linux/hash.h
 include/linux/stringify.h
-lib/find_next_bit.c
 lib/hweight.c
 lib/rbtree.c
 include/linux/swab.h
 arch/*/include/asm/unistd*.h
-arch/*/include/asm/perf_regs.h
 arch/*/include/uapi/asm/unistd*.h
 arch/*/include/uapi/asm/perf_regs.h
 arch/*/lib/memcpy*.S
 arch/*/lib/memset*.S
 include/linux/poison.h
-include/linux/magic.h
 include/linux/hw_breakpoint.h
 include/linux/rbtree_augmented.h
 include/uapi/linux/perf_event.h

From 35a23ff928b066b00a826d0a9ed9411b8ab479ef Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 12 Jun 2015 14:08:20 +0900
Subject: [PATCH 277/305] perf probe: Cut off the gcc optimization postfixes
 from function name

Cut off the postfixes which gcc added for optimized routines from the
event name automatically generated from symbol name, since *probe-events
doesn't accept it.  Those symbols will be used if we don't use debuginfo
to find target functions.

E.g. without this fix;
  -----
  # perf probe -va alloc_buf.isra.23
  probe-definition(0): alloc_buf.isra.23
  symbol:alloc_buf.isra.23 file:(null) line:0 offset:0 return:0 lazy:(null)
  [...]
  Opening /sys/kernel/debug/tracing/kprobe_events write=1
  Added new event:
  Writing event: p:probe/alloc_buf.isra.23 _text+4869328
  Failed to write event: Invalid argument
    Error: Failed to add events. Reason: Invalid argument (Code: -22)
  -----
With this fix;
  -----
  perf probe -va alloc_buf.isra.23
  probe-definition(0): alloc_buf.isra.23
  symbol:alloc_buf.isra.23 file:(null) line:0 offset:0 return:0 lazy:(null)
  [...]
  Opening /sys/kernel/debug/tracing/kprobe_events write=1
  Added new event:
  Writing event: p:probe/alloc_buf _text+4869328
    probe:alloc_buf      (on alloc_buf.isra.23)

  You can now use it in all perf tools, such as:

  	perf record -e probe:alloc_buf -aR sleep 1

  -----

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naohiro Aota <naota@elisp.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150612050820.20548.41625.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index d4cf50b91839..daa24a249e05 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2316,6 +2316,7 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
 			      struct strlist *namelist, bool allow_suffix)
 {
 	int i, ret;
+	char *p;
 
 	if (*base == '.')
 		base++;
@@ -2326,6 +2327,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base,
 		pr_debug("snprintf() failed: %d\n", ret);
 		return ret;
 	}
+	/* Cut off the postfixes (e.g. .const, .isra)*/
+	p = strchr(buf, '.');
+	if (p && p != buf)
+		*p = '\0';
 	if (!strlist__has_entry(namelist, buf))
 		return 0;
 

From 5c24b67aae72f54c5d2a6898be4bae9726df63bb Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Mon, 15 Jun 2015 23:29:51 -0300
Subject: [PATCH 278/305] perf tools: Replace map->referenced &
 maps->removed_maps with map->refcnt

Use just reference counts, so that when no more hist_entry instances
references a map and the thread instance goes away by processing a
PERF_RECORD_EXIT, we can delete the maps.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-oym7lfhcc7ss6xpz44h7nbxs@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/hist.c | 37 ++++++++++++++++-----------
 tools/perf/util/map.c  | 58 ++----------------------------------------
 tools/perf/util/map.h  | 10 ++++++--
 3 files changed, 32 insertions(+), 73 deletions(-)

diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index f53d017c7c22..6f28d53d4e46 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -313,8 +313,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
 				memset(&he->stat, 0, sizeof(he->stat));
 		}
 
-		if (he->ms.map)
-			he->ms.map->referenced = true;
+		map__get(he->ms.map);
 
 		if (he->branch_info) {
 			/*
@@ -324,6 +323,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
 			 */
 			he->branch_info = malloc(sizeof(*he->branch_info));
 			if (he->branch_info == NULL) {
+				map__zput(he->ms.map);
 				free(he->stat_acc);
 				free(he);
 				return NULL;
@@ -332,17 +332,13 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template,
 			memcpy(he->branch_info, template->branch_info,
 			       sizeof(*he->branch_info));
 
-			if (he->branch_info->from.map)
-				he->branch_info->from.map->referenced = true;
-			if (he->branch_info->to.map)
-				he->branch_info->to.map->referenced = true;
+			map__get(he->branch_info->from.map);
+			map__get(he->branch_info->to.map);
 		}
 
 		if (he->mem_info) {
-			if (he->mem_info->iaddr.map)
-				he->mem_info->iaddr.map->referenced = true;
-			if (he->mem_info->daddr.map)
-				he->mem_info->daddr.map->referenced = true;
+			map__get(he->mem_info->iaddr.map);
+			map__get(he->mem_info->daddr.map);
 		}
 
 		if (symbol_conf.use_callchain)
@@ -407,9 +403,8 @@ static struct hist_entry *hists__findnew_entry(struct hists *hists,
 			 * the history counter to increment.
 			 */
 			if (he->ms.map != entry->ms.map) {
-				he->ms.map = entry->ms.map;
-				if (he->ms.map)
-					he->ms.map->referenced = true;
+				map__put(he->ms.map);
+				he->ms.map = map__get(entry->ms.map);
 			}
 			goto out;
 		}
@@ -933,8 +928,20 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right)
 void hist_entry__delete(struct hist_entry *he)
 {
 	thread__zput(he->thread);
-	zfree(&he->branch_info);
-	zfree(&he->mem_info);
+	map__zput(he->ms.map);
+
+	if (he->branch_info) {
+		map__zput(he->branch_info->from.map);
+		map__zput(he->branch_info->to.map);
+		zfree(&he->branch_info);
+	}
+
+	if (he->mem_info) {
+		map__zput(he->mem_info->iaddr.map);
+		map__zput(he->mem_info->daddr.map);
+		zfree(&he->mem_info);
+	}
+
 	zfree(&he->stat_acc);
 	free_srcline(he->srcline);
 	free_callchain(he->callchain);
diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 1241ab989cf5..b5a5e9c02437 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -137,7 +137,6 @@ void map__init(struct map *map, enum map_type type,
 	map->unmap_ip = map__unmap_ip;
 	RB_CLEAR_NODE(&map->rb_node);
 	map->groups   = NULL;
-	map->referenced = false;
 	map->erange_warned = false;
 	atomic_set(&map->refcnt, 1);
 }
@@ -439,7 +438,6 @@ static void maps__init(struct maps *maps)
 {
 	maps->entries = RB_ROOT;
 	pthread_rwlock_init(&maps->lock, NULL);
-	INIT_LIST_HEAD(&maps->removed_maps);
 }
 
 void map_groups__init(struct map_groups *mg, struct machine *machine)
@@ -466,21 +464,10 @@ static void __maps__purge(struct maps *maps)
 	}
 }
 
-static void __maps__purge_removed_maps(struct maps *maps)
-{
-	struct map *pos, *n;
-
-	list_for_each_entry_safe(pos, n, &maps->removed_maps, node) {
-		list_del_init(&pos->node);
-		map__put(pos);
-	}
-}
-
 static void maps__exit(struct maps *maps)
 {
 	pthread_rwlock_wrlock(&maps->lock);
 	__maps__purge(maps);
-	__maps__purge_removed_maps(maps);
 	pthread_rwlock_unlock(&maps->lock);
 }
 
@@ -499,8 +486,6 @@ bool map_groups__empty(struct map_groups *mg)
 	for (i = 0; i < MAP__NR_TYPES; ++i) {
 		if (maps__first(&mg->maps[i]))
 			return false;
-		if (!list_empty(&mg->maps[i].removed_maps))
-			return false;
 	}
 
 	return true;
@@ -621,7 +606,7 @@ size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type,
 	return printed += maps__fprintf(&mg->maps[type], fp);
 }
 
-static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp)
+size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 {
 	size_t printed = 0, i;
 	for (i = 0; i < MAP__NR_TYPES; ++i)
@@ -629,39 +614,6 @@ static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp)
 	return printed;
 }
 
-static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg,
-						 enum map_type type, FILE *fp)
-{
-	struct map *pos;
-	size_t printed = 0;
-
-	list_for_each_entry(pos, &mg->maps[type].removed_maps, node) {
-		printed += fprintf(fp, "Map:");
-		printed += map__fprintf(pos, fp);
-		if (verbose > 1) {
-			printed += dso__fprintf(pos->dso, type, fp);
-			printed += fprintf(fp, "--\n");
-		}
-	}
-	return printed;
-}
-
-static size_t map_groups__fprintf_removed_maps(struct map_groups *mg,
-					       FILE *fp)
-{
-	size_t printed = 0, i;
-	for (i = 0; i < MAP__NR_TYPES; ++i)
-		printed += __map_groups__fprintf_removed_maps(mg, i, fp);
-	return printed;
-}
-
-size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
-{
-	size_t printed = map_groups__fprintf_maps(mg, fp);
-	printed += fprintf(fp, "Removed maps:\n");
-	return printed + map_groups__fprintf_removed_maps(mg, fp);
-}
-
 static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp)
 {
 	struct rb_root *root;
@@ -719,13 +671,7 @@ static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp
 				map__fprintf(after, fp);
 		}
 put_map:
-		/*
-		 * If we have references, just move them to a separate list.
-		 */
-		if (pos->referenced)
-			list_add_tail(&pos->node, &maps->removed_maps);
-		else
-			map__put(pos);
+		map__put(pos);
 
 		if (err)
 			goto out;
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index b8df09d94aca..d73e687b224e 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -34,7 +34,6 @@ struct map {
 	u64			start;
 	u64			end;
 	u8 /* enum map_type */	type;
-	bool			referenced;
 	bool			erange_warned;
 	u32			priv;
 	u32			prot;
@@ -63,7 +62,6 @@ struct kmap {
 struct maps {
 	struct rb_root	 entries;
 	pthread_rwlock_t lock;
-	struct list_head removed_maps;
 };
 
 struct map_groups {
@@ -161,6 +159,14 @@ static inline struct map *map__get(struct map *map)
 
 void map__put(struct map *map);
 
+static inline void __map__zput(struct map **map)
+{
+	map__put(*map);
+	*map = NULL;
+}
+
+#define map__zput(map) __map__zput(&map)
+
 int map__overlap(struct map *l, struct map *r);
 size_t map__fprintf(struct map *map, FILE *fp);
 size_t map__fprintf_dsoname(struct map *map, FILE *fp);

From a35489a6a217f0396c979009b0c875d871434737 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Fri, 12 Jun 2015 03:17:11 +0000
Subject: [PATCH 279/305] tools lib traceevent: Fix python/perf.so compiling
 error

'make build-test' finds an error that make_python_perf_so fails due to
missing of libtraceevent-dynamic-list:

 '.../python2' util/setup.py \
   --quiet build_ext; \
   mkdir -p python && \
   cp python_ext_build/lib/perf.so python/
   /path/to/ld: cannot open linker script file /path/to/kernel/tools/lib/traceevent/libtraceevent-dynamic-list: No such file or directory
   collect2: error: ld returned 1 exit status
   error: command 'x86_64-linux-gcc' failed with exit status 1
   cp: cannot stat 'python_ext_build/lib/perf.so': No such file or directory
   make[3]: *** [python/perf.so] Error 1
   make[2]: *** [python/perf.so] Error 2
     test: test -f ./python/perf.so
     make[1]: *** [make_python_perf_so] Error 1
     make: *** [build-test] Error 2
     make: Leaving directory `/path/to/kernel/tools/perf'

This is caused by commit e3d09ec8126fe2c9a3ade661e2126e215ca27a80
("tools lib traceevent: Export dynamic symbols used by traceevent
plugins") that, it adds the list file to LDFLAGS but forgot to add it to
dependency list of python/perf.so.

This patch fixes this problem.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: He Kuang <hekuang@huawei.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1434079031-123162-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index b1dfcd8e93e3..374378322db9 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -190,7 +190,7 @@ python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT
 PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
 
-$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS)
+$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
 	$(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	mkdir -p $(OUTPUT)python && \

From ba7ecb02e7b89b09d8cdf4c1514a386af8916c4b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Sat, 13 Jun 2015 10:31:16 +0900
Subject: [PATCH 280/305] perf probe: List probes in stdout

Since commit 5e17b28f1e24 ("perf probe: Add --quiet option to
suppress output result message") have replaced printf with pr_info,
perf probe -l outputs its result in stderr. However, that is not
what the commit expected.

E.g.:

  # perf probe -l > /dev/null
    probe:vfs_read       (on vfs_read@ksrc/linux-3/fs/read_write.c)

With this fix:

  # perf probe -l > list
  # cat list
    probe:vfs_read       (on vfs_read@ksrc/linux-3/fs/read_write.c)

Of course, --quiet(-q) still works on --add/--del.

  # perf probe -q vfs_write
  # perf probe -l
    probe:vfs_read       (on vfs_read@ksrc/linux-3/fs/read_write.c)
    probe:vfs_write      (on vfs_write@ksrc/linux-3/fs/read_write.c)
  -----

Reported-by: Naohiro Aota <naota@elisp.net>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naohiro Aota <naota@elisp.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150613013116.24402.2923.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 49 +++++++++++++++++++++++++----------
 1 file changed, 35 insertions(+), 14 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index daa24a249e05..c4ab58870fcc 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2126,9 +2126,9 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist,
 	return NULL;
 }
 
-/* Show an event */
-static int show_perf_probe_event(struct perf_probe_event *pev,
-				 const char *module)
+static int perf_probe_event__sprintf(struct perf_probe_event *pev,
+				     const char *module,
+				     struct strbuf *result)
 {
 	int i, ret;
 	char buf[128];
@@ -2141,27 +2141,47 @@ static int show_perf_probe_event(struct perf_probe_event *pev,
 
 	ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event);
 	if (ret < 0)
-		return ret;
+		goto out;
 
-	pr_info("  %-20s (on %s", buf, place);
+	strbuf_addf(result, "  %-20s (on %s", buf, place);
 	if (module)
-		pr_info(" in %s", module);
+		strbuf_addf(result, " in %s", module);
 
 	if (pev->nargs > 0) {
-		pr_info(" with");
+		strbuf_addstr(result, " with");
 		for (i = 0; i < pev->nargs; i++) {
 			ret = synthesize_perf_probe_arg(&pev->args[i],
 							buf, 128);
 			if (ret < 0)
-				break;
-			pr_info(" %s", buf);
+				goto out;
+			strbuf_addf(result, " %s", buf);
 		}
 	}
-	pr_info(")\n");
+	strbuf_addch(result, ')');
+out:
 	free(place);
 	return ret;
 }
 
+/* Show an event */
+static int show_perf_probe_event(struct perf_probe_event *pev,
+				 const char *module, bool use_stdout)
+{
+	struct strbuf buf = STRBUF_INIT;
+	int ret;
+
+	ret = perf_probe_event__sprintf(pev, module, &buf);
+	if (ret >= 0) {
+		if (use_stdout)
+			printf("%s\n", buf.buf);
+		else
+			pr_info("%s\n", buf.buf);
+	}
+	strbuf_release(&buf);
+
+	return ret;
+}
+
 static bool filter_probe_trace_event(struct probe_trace_event *tev,
 				     struct strfilter *filter)
 {
@@ -2200,9 +2220,10 @@ static int __show_perf_probe_events(int fd, bool is_kprobe,
 				goto next;
 			ret = convert_to_perf_probe_event(&tev, &pev,
 								is_kprobe);
-			if (ret >= 0)
-				ret = show_perf_probe_event(&pev,
-							    tev.point.module);
+			if (ret < 0)
+				goto next;
+			ret = show_perf_probe_event(&pev, tev.point.module,
+						    true);
 		}
 next:
 		clear_perf_probe_event(&pev);
@@ -2468,7 +2489,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 		group = pev->group;
 		pev->event = tev->event;
 		pev->group = tev->group;
-		show_perf_probe_event(pev, tev->point.module);
+		show_perf_probe_event(pev, tev->point.module, false);
 		/* Trick here - restore current event/group */
 		pev->event = (char *)event;
 		pev->group = (char *)group;

From b45f65e8fddc89ac6b46388908d3f6ac728be372 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 14 Jun 2015 10:19:16 +0200
Subject: [PATCH 281/305] perf tools: Introduce xyarray__reset function

To zero all the xyarray contents. It will be used in following patches.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1434269985-521-2-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/xyarray.c | 8 ++++++++
 tools/perf/util/xyarray.h | 2 ++
 2 files changed, 10 insertions(+)

diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c
index 22afbf6c536a..c10ba41ef3f6 100644
--- a/tools/perf/util/xyarray.c
+++ b/tools/perf/util/xyarray.c
@@ -9,11 +9,19 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size)
 	if (xy != NULL) {
 		xy->entry_size = entry_size;
 		xy->row_size   = row_size;
+		xy->entries    = xlen * ylen;
 	}
 
 	return xy;
 }
 
+void xyarray__reset(struct xyarray *xy)
+{
+	size_t n = xy->entries * xy->entry_size;
+
+	memset(xy->contents, 0, n);
+}
+
 void xyarray__delete(struct xyarray *xy)
 {
 	free(xy);
diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h
index c488a07275dd..7f30af371b7e 100644
--- a/tools/perf/util/xyarray.h
+++ b/tools/perf/util/xyarray.h
@@ -6,11 +6,13 @@
 struct xyarray {
 	size_t row_size;
 	size_t entry_size;
+	size_t entries;
 	char contents[];
 };
 
 struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size);
 void xyarray__delete(struct xyarray *xy);
+void xyarray__reset(struct xyarray *xy);
 
 static inline void *xyarray__entry(struct xyarray *xy, int x, int y)
 {

From 9d7e8c3a96e5a903a4e0951e08f7fa6957170bef Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 14 Jun 2015 10:19:17 +0200
Subject: [PATCH 282/305] perf tools: Add thread_map__(alloc|realloc) helpers

In order to have 'struct thread_map' allocation on single place and can
change it easily in following patch.

Using alloc|realloc for static helpers, because thread_map__new is
already used in public interface.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1434269985-521-3-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/thread_map.c | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c
index f93b9734735b..f4822bd03709 100644
--- a/tools/perf/util/thread_map.c
+++ b/tools/perf/util/thread_map.c
@@ -20,6 +20,15 @@ static int filter(const struct dirent *dir)
 		return 1;
 }
 
+static struct thread_map *thread_map__realloc(struct thread_map *map, int nr)
+{
+	size_t size = sizeof(*map) + sizeof(pid_t) * nr;
+
+	return realloc(map, size);
+}
+
+#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr)
+
 struct thread_map *thread_map__new_by_pid(pid_t pid)
 {
 	struct thread_map *threads;
@@ -33,7 +42,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
 	if (items <= 0)
 		return NULL;
 
-	threads = malloc(sizeof(*threads) + sizeof(pid_t) * items);
+	threads = thread_map__alloc(items);
 	if (threads != NULL) {
 		for (i = 0; i < items; i++)
 			threads->map[i] = atoi(namelist[i]->d_name);
@@ -49,7 +58,7 @@ struct thread_map *thread_map__new_by_pid(pid_t pid)
 
 struct thread_map *thread_map__new_by_tid(pid_t tid)
 {
-	struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+	struct thread_map *threads = thread_map__alloc(1);
 
 	if (threads != NULL) {
 		threads->map[0] = tid;
@@ -65,8 +74,8 @@ struct thread_map *thread_map__new_by_uid(uid_t uid)
 	int max_threads = 32, items, i;
 	char path[256];
 	struct dirent dirent, *next, **namelist = NULL;
-	struct thread_map *threads = malloc(sizeof(*threads) +
-					    max_threads * sizeof(pid_t));
+	struct thread_map *threads = thread_map__alloc(max_threads);
+
 	if (threads == NULL)
 		goto out;
 
@@ -185,8 +194,7 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str)
 			goto out_free_threads;
 
 		total_tasks += items;
-		nt = realloc(threads, (sizeof(*threads) +
-				       sizeof(pid_t) * total_tasks));
+		nt = thread_map__realloc(threads, total_tasks);
 		if (nt == NULL)
 			goto out_free_namelist;
 
@@ -216,7 +224,7 @@ out_free_threads:
 
 struct thread_map *thread_map__new_dummy(void)
 {
-	struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t));
+	struct thread_map *threads = thread_map__alloc(1);
 
 	if (threads != NULL) {
 		threads->map[0]	= -1;
@@ -253,7 +261,7 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str)
 			continue;
 
 		ntasks++;
-		nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks);
+		nt = thread_map__realloc(threads, ntasks);
 
 		if (nt == NULL)
 			goto out_free_threads;

From a9a3a4d92d8f2fb68f4b99d98505bebc70518599 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 14 Jun 2015 10:19:26 +0200
Subject: [PATCH 283/305] perf tools: Move
 perf_evsel__(alloc|free|reset)_counts into stat object

It's stat specific. Updating python build objects with stat.c.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1434269985-521-12-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/tests/openat-syscall-all-cpus.c |  1 +
 tools/perf/util/evsel.c                    | 19 +------------------
 tools/perf/util/evsel.h                    |  3 ---
 tools/perf/util/python-ext-sources         |  1 +
 tools/perf/util/stat.c                     | 18 ++++++++++++++++++
 tools/perf/util/stat.h                     |  3 +++
 6 files changed, 24 insertions(+), 21 deletions(-)

diff --git a/tools/perf/tests/openat-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c
index e34dfdf96b5a..9a7a116e09b8 100644
--- a/tools/perf/tests/openat-syscall-all-cpus.c
+++ b/tools/perf/tests/openat-syscall-all-cpus.c
@@ -3,6 +3,7 @@
 #include "thread_map.h"
 #include "cpumap.h"
 #include "debug.h"
+#include "stat.h"
 
 int test__openat_syscall_event_on_all_cpus(void)
 {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index d4f9994ae47f..33449decf7bd 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -26,6 +26,7 @@
 #include "perf_regs.h"
 #include "debug.h"
 #include "trace-event.h"
+#include "stat.h"
 
 static struct {
 	bool sample_id_all;
@@ -851,19 +852,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads)
 	return 0;
 }
 
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
-{
-	memset(evsel->counts, 0, (sizeof(*evsel->counts) +
-				 (ncpus * sizeof(struct perf_counts_values))));
-}
-
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
-{
-	evsel->counts = zalloc((sizeof(*evsel->counts) +
-				(ncpus * sizeof(struct perf_counts_values))));
-	return evsel->counts != NULL ? 0 : -ENOMEM;
-}
-
 static void perf_evsel__free_fd(struct perf_evsel *evsel)
 {
 	xyarray__delete(evsel->fd);
@@ -891,11 +879,6 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads)
 		}
 }
 
-void perf_evsel__free_counts(struct perf_evsel *evsel)
-{
-	zfree(&evsel->counts);
-}
-
 void perf_evsel__exit(struct perf_evsel *evsel)
 {
 	assert(list_empty(&evsel->node));
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 21ec08247d47..bb0579e8a10a 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -170,9 +170,6 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel);
 int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size);
 
 int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads);
-int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
-void perf_evsel__free_counts(struct perf_evsel *evsel);
 void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads);
 
 void __perf_evsel__set_sample_bit(struct perf_evsel *evsel,
diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources
index 4d28624a1eca..5925fec90562 100644
--- a/tools/perf/util/python-ext-sources
+++ b/tools/perf/util/python-ext-sources
@@ -16,6 +16,7 @@ util/util.c
 util/xyarray.c
 util/cgroup.c
 util/rblist.c
+util/stat.c
 util/strlist.c
 util/trace-event.c
 ../../lib/rbtree.c
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index 60b92822f655..ac589b6b8bce 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -94,3 +94,21 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
 		}
 	}
 }
+
+void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
+{
+	memset(evsel->counts, 0, (sizeof(*evsel->counts) +
+				 (ncpus * sizeof(struct perf_counts_values))));
+}
+
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
+{
+	evsel->counts = zalloc((sizeof(*evsel->counts) +
+				(ncpus * sizeof(struct perf_counts_values))));
+	return evsel->counts != NULL ? 0 : -ENOMEM;
+}
+
+void perf_evsel__free_counts(struct perf_evsel *evsel)
+{
+	zfree(&evsel->counts);
+}
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 615c779eb42a..6a782601c1c7 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -62,4 +62,7 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
 				   double avg, int cpu, enum aggr_mode aggr);
 
+void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
+int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
+void perf_evsel__free_counts(struct perf_evsel *evsel);
 #endif

From 9df38e82e2a103cf42177c164a4de9d58052ac3a Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 14 Jun 2015 10:19:27 +0200
Subject: [PATCH 284/305] perf stat: Introduce perf_counts__(new|delete|reset)
 functions

Move 'struct perf_counts' allocation|free|reset code into separate
functions.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/r/1434269985-521-13-git-send-email-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 19 +++++++------------
 tools/perf/util/stat.c    | 28 +++++++++++++++++++++++-----
 tools/perf/util/stat.h    |  3 +++
 3 files changed, 33 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index b24ecee95fec..fcf99bdeb19e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -178,24 +178,19 @@ static void perf_evsel__free_stat_priv(struct perf_evsel *evsel)
 
 static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel)
 {
-	void *addr;
-	size_t sz;
+	struct perf_counts *counts;
 
-	sz = sizeof(*evsel->counts) +
-	     (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values));
+	counts = perf_counts__new(perf_evsel__nr_cpus(evsel));
+	if (counts)
+		evsel->prev_raw_counts = counts;
 
-	addr = zalloc(sz);
-	if (!addr)
-		return -ENOMEM;
-
-	evsel->prev_raw_counts =  addr;
-
-	return 0;
+	return counts ? 0 : -ENOMEM;
 }
 
 static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel)
 {
-	zfree(&evsel->prev_raw_counts);
+	perf_counts__delete(evsel->prev_raw_counts);
+	evsel->prev_raw_counts = NULL;
 }
 
 static void perf_evlist__free_stats(struct perf_evlist *evlist)
diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c
index ac589b6b8bce..4014b709f956 100644
--- a/tools/perf/util/stat.c
+++ b/tools/perf/util/stat.c
@@ -95,20 +95,38 @@ void perf_stat_evsel_id_init(struct perf_evsel *evsel)
 	}
 }
 
+struct perf_counts *perf_counts__new(int ncpus)
+{
+	int size = sizeof(struct perf_counts) +
+		   ncpus * sizeof(struct perf_counts_values);
+
+	return zalloc(size);
+}
+
+void perf_counts__delete(struct perf_counts *counts)
+{
+	free(counts);
+}
+
+static void perf_counts__reset(struct perf_counts *counts, int ncpus)
+{
+	memset(counts, 0, (sizeof(*counts) +
+	       (ncpus * sizeof(struct perf_counts_values))));
+}
+
 void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus)
 {
-	memset(evsel->counts, 0, (sizeof(*evsel->counts) +
-				 (ncpus * sizeof(struct perf_counts_values))));
+	perf_counts__reset(evsel->counts, ncpus);
 }
 
 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus)
 {
-	evsel->counts = zalloc((sizeof(*evsel->counts) +
-				(ncpus * sizeof(struct perf_counts_values))));
+	evsel->counts = perf_counts__new(ncpus);
 	return evsel->counts != NULL ? 0 : -ENOMEM;
 }
 
 void perf_evsel__free_counts(struct perf_evsel *evsel)
 {
-	zfree(&evsel->counts);
+	perf_counts__delete(evsel->counts);
+	evsel->counts = NULL;
 }
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 6a782601c1c7..093dc3cb28dd 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -62,6 +62,9 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count,
 void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel,
 				   double avg, int cpu, enum aggr_mode aggr);
 
+struct perf_counts *perf_counts__new(int ncpus);
+void perf_counts__delete(struct perf_counts *counts);
+
 void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus);
 int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus);
 void perf_evsel__free_counts(struct perf_evsel *evsel);

From f005813afb89bae92faf254130c544dc68984c6b Mon Sep 17 00:00:00 2001
From: Hou Pengyang <houpengyang@huawei.com>
Date: Tue, 16 Jun 2015 11:16:35 +0000
Subject: [PATCH 285/305] perf unwind: Fix a compile error

When libunwind is on, there is a compile error as :

  util/unwind-libunwind.c:363:21: error: 'dso' undeclared (first use in this function)
      dso__data_put_fd(dso);

This patch fixes it.

Signed-off-by: Hou Pengyang <houpengyang@huawei.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Fixes: 4bb11d012ab248d0 ("perf tools: Add dso__data_get/put_fd()")
Link: http://lkml.kernel.org/r/1434453395-10560-1-git-send-email-houpengyang@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/unwind-libunwind.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c
index f079b63f0b7f..4c00507ee3fd 100644
--- a/tools/perf/util/unwind-libunwind.c
+++ b/tools/perf/util/unwind-libunwind.c
@@ -360,7 +360,7 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi,
 		unw_word_t base = is_exec ? 0 : map->start;
 
 		if (fd >= 0)
-			dso__data_put_fd(dso);
+			dso__data_put_fd(map->dso);
 
 		memset(&di, 0, sizeof(di));
 		if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name,

From b031220d520238075bd99513a420e65cf37866ad Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 16 Jun 2015 20:50:55 +0900
Subject: [PATCH 286/305] perf probe: Fix to return error if no probe is added

Fix perf probe to return an error if no probe is added due to the given
probe point being on the blacklist.

To fix this problem, this moves the blacklist checking to right after
finding symbols/probe-points and marks them as skipped.

If all the symbols are skipped, "perf probe"  returns an error as it
fails to find the corresponding probe address.

E.g. currently if a blacklisted probe is given:

  # perf probe do_trap && echo 'succeed'
  Added new event:
  Warning: Skipped probing on blacklisted function: sync_regs
  succeed

No! It must fail! With this patch, it correctly fails:

  # perf probe do_trap && echo 'succeed'
  do_trap is blacklisted function, skip it.
  Probe point 'do_trap' not found.
    Error: Failed to add events.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naohiro Aota <naota@elisp.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150616115055.19906.31359.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 113 +++++++++++++++++++++-------------
 1 file changed, 71 insertions(+), 42 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index c4ab58870fcc..85c8207c25cc 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -246,6 +246,20 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs)
 		clear_probe_trace_event(tevs + i);
 }
 
+static bool kprobe_blacklist__listed(unsigned long address);
+static bool kprobe_warn_out_range(const char *symbol, unsigned long address)
+{
+	/* Get the address of _etext for checking non-probable text symbol */
+	if (kernel_get_symbol_address_by_name("_etext", false) < address)
+		pr_warning("%s is out of .text, skip it.\n", symbol);
+	else if (kprobe_blacklist__listed(address))
+		pr_warning("%s is blacklisted function, skip it.\n", symbol);
+	else
+		return false;
+
+	return true;
+}
+
 #ifdef HAVE_DWARF_SUPPORT
 
 static int kernel_get_module_dso(const char *module, struct dso **pdso)
@@ -559,7 +573,6 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 					   bool uprobe)
 {
 	struct ref_reloc_sym *reloc_sym;
-	u64 etext_addr;
 	char *tmp;
 	int i, skipped = 0;
 
@@ -575,31 +588,28 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs,
 		pr_warning("Relocated base symbol is not found!\n");
 		return -EINVAL;
 	}
-	/* Get the address of _etext for checking non-probable text symbol */
-	etext_addr = kernel_get_symbol_address_by_name("_etext", false);
 
 	for (i = 0; i < ntevs; i++) {
-		if (tevs[i].point.address && !tevs[i].point.retprobe) {
-			/* If we found a wrong one, mark it by NULL symbol */
-			if (etext_addr < tevs[i].point.address) {
-				pr_warning("%s+%lu is out of .text, skip it.\n",
-				   tevs[i].point.symbol, tevs[i].point.offset);
-				tmp = NULL;
-				skipped++;
-			} else {
-				tmp = strdup(reloc_sym->name);
-				if (!tmp)
-					return -ENOMEM;
-			}
-			/* If we have no realname, use symbol for it */
-			if (!tevs[i].point.realname)
-				tevs[i].point.realname = tevs[i].point.symbol;
-			else
-				free(tevs[i].point.symbol);
-			tevs[i].point.symbol = tmp;
-			tevs[i].point.offset = tevs[i].point.address -
-					       reloc_sym->unrelocated_addr;
+		if (!tevs[i].point.address || tevs[i].point.retprobe)
+			continue;
+		/* If we found a wrong one, mark it by NULL symbol */
+		if (kprobe_warn_out_range(tevs[i].point.symbol,
+					  tevs[i].point.address)) {
+			tmp = NULL;
+			skipped++;
+		} else {
+			tmp = strdup(reloc_sym->name);
+			if (!tmp)
+				return -ENOMEM;
 		}
+		/* If we have no realname, use symbol for it */
+		if (!tevs[i].point.realname)
+			tevs[i].point.realname = tevs[i].point.symbol;
+		else
+			free(tevs[i].point.symbol);
+		tevs[i].point.symbol = tmp;
+		tevs[i].point.offset = tevs[i].point.address -
+				       reloc_sym->unrelocated_addr;
 	}
 	return skipped;
 }
@@ -2126,6 +2136,27 @@ kprobe_blacklist__find_by_address(struct list_head *blacklist,
 	return NULL;
 }
 
+static LIST_HEAD(kprobe_blacklist);
+
+static void kprobe_blacklist__init(void)
+{
+	if (!list_empty(&kprobe_blacklist))
+		return;
+
+	if (kprobe_blacklist__load(&kprobe_blacklist) < 0)
+		pr_debug("No kprobe blacklist support, ignored\n");
+}
+
+static void kprobe_blacklist__release(void)
+{
+	kprobe_blacklist__delete(&kprobe_blacklist);
+}
+
+static bool kprobe_blacklist__listed(unsigned long address)
+{
+	return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
+}
+
 static int perf_probe_event__sprintf(struct perf_probe_event *pev,
 				     const char *module,
 				     struct strbuf *result)
@@ -2409,8 +2440,6 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 	char buf[64];
 	const char *event, *group;
 	struct strlist *namelist;
-	LIST_HEAD(blacklist);
-	struct kprobe_blacklist_node *node;
 	bool safename;
 
 	if (pev->uprobes)
@@ -2430,28 +2459,15 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 		ret = -ENOMEM;
 		goto close_out;
 	}
-	/* Get kprobe blacklist if exists */
-	if (!pev->uprobes) {
-		ret = kprobe_blacklist__load(&blacklist);
-		if (ret < 0)
-			pr_debug("No kprobe blacklist support, ignored\n");
-	}
 
 	safename = (pev->point.function && !strisglob(pev->point.function));
 	ret = 0;
 	pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":");
 	for (i = 0; i < ntevs; i++) {
 		tev = &tevs[i];
-		/* Skip if the symbol is out of .text (marked previously) */
+		/* Skip if the symbol is out of .text or blacklisted */
 		if (!tev->point.symbol)
 			continue;
-		/* Ensure that the address is NOT blacklisted */
-		node = kprobe_blacklist__find_by_address(&blacklist,
-							 tev->point.address);
-		if (node) {
-			pr_warning("Warning: Skipped probing on blacklisted function: %s\n", node->symbol);
-			continue;
-		}
 
 		if (pev->event)
 			event = pev->event;
@@ -2513,7 +2529,6 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 			 tev->event);
 	}
 
-	kprobe_blacklist__delete(&blacklist);
 	strlist__delete(namelist);
 close_out:
 	close(fd);
@@ -2563,7 +2578,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 	struct perf_probe_point *pp = &pev->point;
 	struct probe_trace_point *tp;
 	int num_matched_functions;
-	int ret, i, j;
+	int ret, i, j, skipped = 0;
 
 	map = get_target_map(pev->target, pev->uprobes);
 	if (!map) {
@@ -2631,7 +2646,12 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 		}
 		/* Add one probe point */
 		tp->address = map->unmap_ip(map, sym->start) + pp->offset;
-		if (reloc_sym) {
+		/* If we found a wrong one, mark it by NULL symbol */
+		if (!pev->uprobes &&
+		    kprobe_warn_out_range(sym->name, tp->address)) {
+			tp->symbol = NULL;	/* Skip it */
+			skipped++;
+		} else if (reloc_sym) {
 			tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out);
 			tp->offset = tp->address - reloc_sym->addr;
 		} else {
@@ -2667,6 +2687,10 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev,
 		}
 		arch__fix_tev_from_maps(pev, tev, map);
 	}
+	if (ret == skipped) {
+		ret = -ENOENT;
+		goto err_out;
+	}
 
 out:
 	put_target_map(map, pev->uprobes);
@@ -2737,6 +2761,9 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
 	/* Loop 1: convert all events */
 	for (i = 0; i < npevs; i++) {
 		pkgs[i].pev = &pevs[i];
+		/* Init kprobe blacklist if needed */
+		if (!pkgs[i].pev->uprobes)
+			kprobe_blacklist__init();
 		/* Convert with or without debuginfo */
 		ret  = convert_to_probe_trace_events(pkgs[i].pev,
 						     &pkgs[i].tevs);
@@ -2744,6 +2771,8 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs)
 			goto end;
 		pkgs[i].ntevs = ret;
 	}
+	/* This just release blacklist only if allocated */
+	kprobe_blacklist__release();
 
 	/* Loop 2: add all events */
 	for (i = 0; i < npevs; i++) {

From 386299735e7056455bcead28f6ffa4357116ce38 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 17 Jun 2015 11:59:18 +0000
Subject: [PATCH 287/305] perf tools: Ignore .config-detected in .gitignore

Commit fcfd6611fbccdbf2593bd949097a5c0e45cd96da ("tools build: Add
detected config support") dynamically creates .config-detected. Add it
to .gitignore.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Zefan Li <lizefan@huawei.com>
Link: http://lkml.kernel.org/r/1434542358-5430-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore
index 812f904193e8..09db62ba5786 100644
--- a/tools/perf/.gitignore
+++ b/tools/perf/.gitignore
@@ -28,3 +28,4 @@ config.mak.autogen
 *-flex.*
 *.pyc
 *.pyo
+.config-detected

From b30b617292462ca7ee68834b117a7833f4a52e16 Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 17 Jun 2015 09:56:39 +0000
Subject: [PATCH 288/305] perf tools: Fix a problem when opening old perf.data
 with different byte order

Following error occurs when trying to use 'perf report' on x86_64 to
cross analysis a perf.data generated by an old perf on a big-endian
machine:

 # perf report
 *** Error in `/home/w00229757/perf': free(): invalid next size (fast): 0x00000000032c99f0 ***
 ======= Backtrace: =========
 /lib64/libc.so.6(+0x6eeef)[0x7ff6ff7e2eef]
 /lib64/libc.so.6(+0x78cae)[0x7ff6ff7eccae]
 /lib64/libc.so.6(+0x79987)[0x7ff6ff7ed987]
 /path/to/perf[0x4ac734]
 /path/to/perf[0x4ac829]
 /path/to/perf(perf_header__process_sections+0x129)[0x4ad2c9]
 /path/to/perf(perf_session__read_header+0x2e1)[0x4ad9e1]
 /path/to/perf(perf_session__new+0x168)[0x4bd458]
 /path/to/perf(cmd_report+0xfa0)[0x43eb70]
 /path/to/perf[0x47adc3]
 /path/to/perf(main+0x5f6)[0x42fd06]
 /lib64/libc.so.6(__libc_start_main+0xf5)[0x7ff6ff795bd5]
 /path/to/perf[0x42fe35]
 ======= Memory map: ========
 [SNIP]

The bug is in perf_event__attr_swap(). It swaps all fields in 'struct
perf_event_attr' without checking whether the swapped field exist or
not. In addition, in read_event_desc() allocs memory for attr according
to size read from perf.data.

Therefore, if the perf.data is collected by an old perf (without
aux_watermark, for example), when perf_event__attr_swap() swaping
attr->aux_watermark it destroy malloc's metadata.

This patch introduces boundary checking in perf_event__attr_swap(). It
adds macros bswap_field_64 and bswap_field_32 into
perf_event__attr_swap() to make it only swap exist fields.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1434534999-85347-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/session.c | 48 ++++++++++++++++++++++++++++-----------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index f31e024ddf7d..e1cd17c2afab 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -517,20 +517,42 @@ void perf_event__attr_swap(struct perf_event_attr *attr)
 {
 	attr->type		= bswap_32(attr->type);
 	attr->size		= bswap_32(attr->size);
-	attr->config		= bswap_64(attr->config);
-	attr->sample_period	= bswap_64(attr->sample_period);
-	attr->sample_type	= bswap_64(attr->sample_type);
-	attr->read_format	= bswap_64(attr->read_format);
-	attr->wakeup_events	= bswap_32(attr->wakeup_events);
-	attr->bp_type		= bswap_32(attr->bp_type);
-	attr->bp_addr		= bswap_64(attr->bp_addr);
-	attr->bp_len		= bswap_64(attr->bp_len);
-	attr->branch_sample_type = bswap_64(attr->branch_sample_type);
-	attr->sample_regs_user	 = bswap_64(attr->sample_regs_user);
-	attr->sample_stack_user  = bswap_32(attr->sample_stack_user);
-	attr->aux_watermark	 = bswap_32(attr->aux_watermark);
 
-	swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64));
+#define bswap_safe(f, n) 					\
+	(attr->size > (offsetof(struct perf_event_attr, f) + 	\
+		       sizeof(attr->f) * (n)))
+#define bswap_field(f, sz) 			\
+do { 						\
+	if (bswap_safe(f, 0))			\
+		attr->f = bswap_##sz(attr->f);	\
+} while(0)
+#define bswap_field_32(f) bswap_field(f, 32)
+#define bswap_field_64(f) bswap_field(f, 64)
+
+	bswap_field_64(config);
+	bswap_field_64(sample_period);
+	bswap_field_64(sample_type);
+	bswap_field_64(read_format);
+	bswap_field_32(wakeup_events);
+	bswap_field_32(bp_type);
+	bswap_field_64(bp_addr);
+	bswap_field_64(bp_len);
+	bswap_field_64(branch_sample_type);
+	bswap_field_64(sample_regs_user);
+	bswap_field_32(sample_stack_user);
+	bswap_field_32(aux_watermark);
+
+	/*
+	 * After read_format are bitfields. Check read_format because
+	 * we are unable to use offsetof on bitfield.
+	 */
+	if (bswap_safe(read_format, 1))
+		swap_bitfield((u8 *) (&attr->read_format + 1),
+			      sizeof(u64));
+#undef bswap_field_64
+#undef bswap_field_32
+#undef bswap_field
+#undef bswap_safe
 }
 
 static void perf_event__hdr_attr_swap(union perf_event *event,

From 5d618324dd4442d20dbdb039176787199ba2f12b Mon Sep 17 00:00:00 2001
From: Wang Nan <wangnan0@huawei.com>
Date: Wed, 17 Jun 2015 14:46:29 +0000
Subject: [PATCH 289/305] perf tools: Move libtraceevent dynamic list to
 separated LDFLAGS variable

Commit e3d09ec8126fe2c9a3ade661e2126e215ca27a80 ("tools lib traceevent:
Export dynamic symbols used by traceevent plugins") adds libtraceevent
dynamic list directly into LDFLAGS, which makes all targets depend on
that list through LDFLAGS.

This is not good since some of targets like libgtk.so doesn't use plugin
at all, but require the existance of that list because of linker
options.

This patch isolates the -Xlink option into LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS,
makes only perf and perf.so use it.

Signed-off-by: Wang Nan <wangnan0@huawei.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Cc: He Kuang <hekuang@huawei.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Zefan Li <lizefan@huawei.com>
Cc: pi3orama@163.com
Link: http://lkml.kernel.org/r/1434552389-89144-1-git-send-email-wangnan0@huawei.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Makefile.perf | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf
index 374378322db9..1af0cfeb7a57 100644
--- a/tools/perf/Makefile.perf
+++ b/tools/perf/Makefile.perf
@@ -174,7 +174,7 @@ LIBTRACEEVENT = $(TE_PATH)libtraceevent.a
 export LIBTRACEEVENT
 
 LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list
-LDFLAGS += -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
+LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST)
 
 LIBAPI = $(LIB_PATH)libapi.a
 export LIBAPI
@@ -191,7 +191,8 @@ PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources)
 PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI)
 
 $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST)
-	$(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \
+	$(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \
+	  $(PYTHON_WORD) util/setup.py \
 	  --quiet build_ext; \
 	mkdir -p $(OUTPUT)python && \
 	cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/
@@ -282,7 +283,8 @@ $(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE
 	$(Q)$(MAKE) $(build)=perf
 
 $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST)
-	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(PERF_IN) $(LIBS) -o $@
+	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \
+		$(PERF_IN) $(LIBS) -o $@
 
 $(GTK_IN): FORCE
 	$(Q)$(MAKE) $(build)=gtk

From d350bd571ffa89fc3bd07cfa9685d5210f459be8 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Tue, 16 Jun 2015 20:50:57 +0900
Subject: [PATCH 290/305] perf probe: Show usage even if the last event is
 skipped

When the last part of converted events are blacklisted or out-of-text,
those are skipped and perf probe doesn't show usage examples.  This
fixes it to show the example even if the last part of event list is
skipped.

E.g. without this patch, events are added, but suddenly end:

  # perf probe vfs_*
  vfs_caches_init_early is out of .text, skip it.
  vfs_caches_init is out of .text, skip it.
  Added new events:
    probe:vfs_fallocate  (on vfs_*)
    probe:vfs_open       (on vfs_*)
  ...
    probe:vfs_dentry_acceptable (on vfs_*)
    probe:vfs_load_quota_inode (on vfs_*)
  #

With this fix:

  # perf probe vfs_*
  vfs_caches_init_early is out of .text, skip it.
  vfs_caches_init is out of .text, skip it.
  Added new events:
    probe:vfs_fallocate  (on vfs_*)
  ...
    probe:vfs_load_quota_inode (on vfs_*)

  You can now use it in all perf tools, such as:

	perf record -e probe:vfs_load_quota_inode -aR sleep 1

Note that this can be reproduced ONLY IF the vfs_caches_init* is the
last part of matched symbol list. I've checked this happens on
"3.19.0-generic #18-Ubuntu" kernel binary.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naohiro Aota <naota@elisp.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150616115057.19906.5502.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 35 +++++++++++++++++------------------
 1 file changed, 17 insertions(+), 18 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 85c8207c25cc..65a1c8252270 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -2157,7 +2157,8 @@ static bool kprobe_blacklist__listed(unsigned long address)
 	return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address);
 }
 
-static int perf_probe_event__sprintf(struct perf_probe_event *pev,
+static int perf_probe_event__sprintf(const char *group, const char *event,
+				     struct perf_probe_event *pev,
 				     const char *module,
 				     struct strbuf *result)
 {
@@ -2170,7 +2171,7 @@ static int perf_probe_event__sprintf(struct perf_probe_event *pev,
 	if (!place)
 		return -EINVAL;
 
-	ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event);
+	ret = e_snprintf(buf, 128, "%s:%s", group, event);
 	if (ret < 0)
 		goto out;
 
@@ -2195,13 +2196,14 @@ out:
 }
 
 /* Show an event */
-static int show_perf_probe_event(struct perf_probe_event *pev,
+static int show_perf_probe_event(const char *group, const char *event,
+				 struct perf_probe_event *pev,
 				 const char *module, bool use_stdout)
 {
 	struct strbuf buf = STRBUF_INIT;
 	int ret;
 
-	ret = perf_probe_event__sprintf(pev, module, &buf);
+	ret = perf_probe_event__sprintf(group, event, pev, module, &buf);
 	if (ret >= 0) {
 		if (use_stdout)
 			printf("%s\n", buf.buf);
@@ -2253,7 +2255,8 @@ static int __show_perf_probe_events(int fd, bool is_kprobe,
 								is_kprobe);
 			if (ret < 0)
 				goto next;
-			ret = show_perf_probe_event(&pev, tev.point.module,
+			ret = show_perf_probe_event(pev.group, pev.event,
+						    &pev, tev.point.module,
 						    true);
 		}
 next:
@@ -2438,7 +2441,7 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 	int i, fd, ret;
 	struct probe_trace_event *tev = NULL;
 	char buf[64];
-	const char *event, *group;
+	const char *event = NULL, *group = NULL;
 	struct strlist *namelist;
 	bool safename;
 
@@ -2500,15 +2503,12 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 		/* Add added event name to namelist */
 		strlist__add(namelist, event);
 
-		/* Trick here - save current event/group */
-		event = pev->event;
-		group = pev->group;
-		pev->event = tev->event;
-		pev->group = tev->group;
-		show_perf_probe_event(pev, tev->point.module, false);
-		/* Trick here - restore current event/group */
-		pev->event = (char *)event;
-		pev->group = (char *)group;
+		/* We use tev's name for showing new events */
+		show_perf_probe_event(tev->group, tev->event, pev,
+				      tev->point.module, false);
+		/* Save the last valid name */
+		event = tev->event;
+		group = tev->group;
 
 		/*
 		 * Probes after the first probe which comes from same
@@ -2522,11 +2522,10 @@ static int __add_probe_trace_events(struct perf_probe_event *pev,
 		warn_uprobe_event_compat(tev);
 
 	/* Note that it is possible to skip all events because of blacklist */
-	if (ret >= 0 && tev->event) {
+	if (ret >= 0 && event) {
 		/* Show how to use the event. */
 		pr_info("\nYou can now use it in all perf tools, such as:\n\n");
-		pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group,
-			 tev->event);
+		pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event);
 	}
 
 	strlist__delete(namelist);

From 7737af010b097f3c1e2aeded21774d58b4aa2698 Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Wed, 17 Jun 2015 23:58:54 +0900
Subject: [PATCH 291/305] perf probe: Speed up perf probe --list by caching
 debuginfo

Speed up the "perf probe --list" by caching the last used debuginfo.
perf probe --list always open and load debuginfo for each entry of probe
list. This takes very a long time.

E.g. with vfs_* events (total 96 probes)

  [root@localhost perf]# time  ./perf probe -l &> /dev/null

  real    0m25.376s
  user    0m24.381s
  sys     0m1.012s

To solve this issue, this adds debuginfo_cache to cache the
last used debuginfo on memory.

With this fix, the perf-probe --list significantly improves
its speed.

  [root@localhost perf]#  time  ./perf probe -l &> /dev/null

  real    0m0.161s
  user    0m0.136s
  sys     0m0.025s

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Naohiro Aota <naota@elisp.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20150617145854.19715.15314.stgit@localhost.localdomain
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/probe-event.c | 48 ++++++++++++++++++++++++++++++++---
 1 file changed, 44 insertions(+), 4 deletions(-)

diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c
index 65a1c8252270..076527b639bd 100644
--- a/tools/perf/util/probe-event.c
+++ b/tools/perf/util/probe-event.c
@@ -429,6 +429,41 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent)
 	return ret;
 }
 
+/* For caching the last debuginfo */
+static struct debuginfo *debuginfo_cache;
+static char *debuginfo_cache_path;
+
+static struct debuginfo *debuginfo_cache__open(const char *module, bool silent)
+{
+	if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) ||
+	    (!debuginfo_cache_path && !module && debuginfo_cache))
+		goto out;
+
+	/* Copy module path */
+	free(debuginfo_cache_path);
+	if (module) {
+		debuginfo_cache_path = strdup(module);
+		if (!debuginfo_cache_path) {
+			debuginfo__delete(debuginfo_cache);
+			debuginfo_cache = NULL;
+			goto out;
+		}
+	}
+
+	debuginfo_cache = open_debuginfo(module, silent);
+	if (!debuginfo_cache)
+		zfree(&debuginfo_cache_path);
+out:
+	return debuginfo_cache;
+}
+
+static void debuginfo_cache__exit(void)
+{
+	debuginfo__delete(debuginfo_cache);
+	debuginfo_cache = NULL;
+	zfree(&debuginfo_cache_path);
+}
+
 
 static int get_text_start_address(const char *exec, unsigned long *address)
 {
@@ -490,12 +525,11 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp,
 	pr_debug("try to find information at %" PRIx64 " in %s\n", addr,
 		 tp->module ? : "kernel");
 
-	dinfo = open_debuginfo(tp->module, verbose == 0);
-	if (dinfo) {
+	dinfo = debuginfo_cache__open(tp->module, verbose == 0);
+	if (dinfo)
 		ret = debuginfo__find_probe_point(dinfo,
 						 (unsigned long)addr, pp);
-		debuginfo__delete(dinfo);
-	} else
+	else
 		ret = -ENOENT;
 
 	if (ret > 0) {
@@ -930,6 +964,10 @@ out:
 
 #else	/* !HAVE_DWARF_SUPPORT */
 
+static void debuginfo_cache__exit(void)
+{
+}
+
 static int
 find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused,
 				 struct perf_probe_point *pp __maybe_unused,
@@ -2266,6 +2304,8 @@ next:
 			break;
 	}
 	strlist__delete(rawlist);
+	/* Cleanup cached debuginfo if needed */
+	debuginfo_cache__exit();
 
 	return ret;
 }

From 7951722da2963cc1f1a7831a37aa2311ac927056 Mon Sep 17 00:00:00 2001
From: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Date: Fri, 12 Jun 2015 01:28:36 -0400
Subject: [PATCH 292/305] perf trace: Fix race condition at the end of started
 workloads

I get following crash on multiple systems and across several releases
(at least since v3.18).

	Core was generated by `/tmp/perf trace sleep 0.2 '.
	Program terminated with signal SIGSEGV, Segmentation fault.
	#0  perf_mmap__read_head (mm=0x3fff9bf30070) at util/evlist.h:195
	195		u64 head = ACCESS_ONCE(pc->data_head);
	(gdb) bt
	#0  perf_mmap__read_head (mm=0x3fff9bf30070) at util/evlist.h:195
	#1  perf_evlist__mmap_read (evlist=0x10027f11910, idx=<optimized out>)
	    at util/evlist.c:637
	#2  0x000000001003ce4c in trace__run (argv=<optimized out>,
	    argc=<optimized out>, trace=0x3fffd7b28288) at builtin-trace.c:2259
	#3  cmd_trace (argc=<optimized out>, argv=<optimized out>,
	    prefix=<optimized out>) at builtin-trace.c:2799
	#4  0x00000000100657b8 in run_builtin (p=0x10176798 <commands+480>, argc=3,
	    argv=0x3fffd7b2b550) at perf.c:370
	#5  0x00000000100063e8 in handle_internal_command (argv=0x3fffd7b2b550, argc=3)
	    at perf.c:429
	#6  run_argv (argv=0x3fffd7b2af70, argcp=0x3fffd7b2af7c) at perf.c:473
	#7  main (argc=3, argv=0x3fffd7b2b550) at perf.c:588

The problem seems to be a race condition, when the application has just
exited.  Some/all fds associated with the perf-events (tracepoints) go
into a POLLHUP/ POLLERR state and the mmap region associated with those
events are unmapped (in perf_evlist__filter_pollfd()).

But we go back and do a perf_evlist__mmap_read() which assumes that the
mmaps are still valid and we hit the crash.

If the mapping for an event is released, its refcnt is 0 (and ->base
is NULL), so ensure we have non-zero refcount before accessing the map.

Note that perf-record has a similar logic but unlike perf-trace, the
record__mmap_read_all() checks the evlist->mmap[i].base before accessing
the map.

Signed-off-by: Sukadev Bhattiprolu <sukadev@linux.vnet.ibm.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Li Zhang <zhlcindy@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20150612060003.GA19913@us.ibm.com
[ Fixed it up to use atomic_read() ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index dc1dc2c181ef..6b58a47a79ec 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -634,11 +634,18 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist,
 union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx)
 {
 	struct perf_mmap *md = &evlist->mmap[idx];
-	u64 head = perf_mmap__read_head(md);
+	u64 head;
 	u64 old = md->prev;
 	unsigned char *data = md->base + page_size;
 	union perf_event *event = NULL;
 
+	/*
+	 * Check if event was unmapped due to a POLLHUP/POLLERR.
+	 */
+	if (!atomic_read(&md->refcnt))
+		return NULL;
+
+	head = perf_mmap__read_head(md);
 	if (evlist->overwrite) {
 		/*
 		 * If we're further behind than half the buffer, there's a chance

From 2b56bcfb6f4bb0aa0813b6ee1dde8f5b23fce5d4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 17 Jun 2015 16:40:26 -0300
Subject: [PATCH 293/305] perf evlist: Add toggle_enable() method

For an upcoming feature in 'perf top' we will have a hotkey to
enable/disable events, so remember if the events in the list are
enabled or disabled and allows toggling this state using a new
method.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-64c4jvdl5feg2zhimxvokqka@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/evlist.c | 9 +++++++++
 tools/perf/util/evlist.h | 2 ++
 2 files changed, 11 insertions(+)

diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c
index 6b58a47a79ec..8366511b45f8 100644
--- a/tools/perf/util/evlist.c
+++ b/tools/perf/util/evlist.c
@@ -297,6 +297,8 @@ void perf_evlist__disable(struct perf_evlist *evlist)
 				      PERF_EVENT_IOC_DISABLE, 0);
 		}
 	}
+
+	evlist->enabled = false;
 }
 
 void perf_evlist__enable(struct perf_evlist *evlist)
@@ -316,6 +318,13 @@ void perf_evlist__enable(struct perf_evlist *evlist)
 				      PERF_EVENT_IOC_ENABLE, 0);
 		}
 	}
+
+	evlist->enabled = true;
+}
+
+void perf_evlist__toggle_enable(struct perf_evlist *evlist)
+{
+	(evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist);
 }
 
 int perf_evlist__disable_event(struct perf_evlist *evlist,
diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h
index 955bf31b7dd3..a8489b9d2812 100644
--- a/tools/perf/util/evlist.h
+++ b/tools/perf/util/evlist.h
@@ -41,6 +41,7 @@ struct perf_evlist {
 	int		 nr_groups;
 	int		 nr_mmaps;
 	bool		 overwrite;
+	bool		 enabled;
 	size_t		 mmap_len;
 	int		 id_pos;
 	int		 is_pos;
@@ -139,6 +140,7 @@ void perf_evlist__munmap(struct perf_evlist *evlist);
 
 void perf_evlist__disable(struct perf_evlist *evlist);
 void perf_evlist__enable(struct perf_evlist *evlist);
+void perf_evlist__toggle_enable(struct perf_evlist *evlist);
 
 int perf_evlist__disable_event(struct perf_evlist *evlist,
 			       struct perf_evsel *evsel);

From 5d484f99aed547e235f2229653c95392a1bc3692 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Wed, 17 Jun 2015 16:50:52 -0300
Subject: [PATCH 294/305] perf top: Allow disabling/enabling events dynamicly

Now it is possible to press CTRL+z at anytime and that will disable the
events being monitored, essentially turning 'top' into 'report', with
pressing CTRL+z again making it enable the events again, returning to
the 'top' behaviour, i.e. dynamic + decaying of older samples.

One may want, for instance, play with:

    -d, --delay <n>       number of seconds to delay between refreshes

and:

    -z, --zero            zero history across updates

Plus CTRL+z to see only the events since last zeroing, etc.

Suggested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-zq7tnh5462blt2yda0bcxh5b@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c       | 52 +++++++++++++++++++++++-----------
 tools/perf/ui/browsers/hists.c |  2 ++
 2 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 6b987424d015..72d8a7ae5986 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -235,10 +235,13 @@ static void perf_top__show_details(struct perf_top *top)
 
 	more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel,
 				       0, top->sym_pcnt_filter, top->print_entries, 4);
-	if (top->zero)
-		symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
-	else
-		symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+
+	if (top->evlist->enabled) {
+		if (top->zero)
+			symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx);
+		else
+			symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx);
+	}
 	if (more != 0)
 		printf("%d lines not displayed, maybe increase display entries [e]\n", more);
 out_unlock:
@@ -276,11 +279,13 @@ static void perf_top__print_sym_table(struct perf_top *top)
 		return;
 	}
 
-	if (top->zero) {
-		hists__delete_entries(hists);
-	} else {
-		hists__decay_entries(hists, top->hide_user_symbols,
-				     top->hide_kernel_symbols);
+	if (top->evlist->enabled) {
+		if (top->zero) {
+			hists__delete_entries(hists);
+		} else {
+			hists__decay_entries(hists, top->hide_user_symbols,
+					     top->hide_kernel_symbols);
+		}
 	}
 
 	hists__collapse_resort(hists, NULL);
@@ -545,11 +550,13 @@ static void perf_top__sort_new_samples(void *arg)
 
 	hists = evsel__hists(t->sym_evsel);
 
-	if (t->zero) {
-		hists__delete_entries(hists);
-	} else {
-		hists__decay_entries(hists, t->hide_user_symbols,
-				     t->hide_kernel_symbols);
+	if (t->evlist->enabled) {
+		if (t->zero) {
+			hists__delete_entries(hists);
+		} else {
+			hists__decay_entries(hists, t->hide_user_symbols,
+					     t->hide_kernel_symbols);
+		}
 	}
 
 	hists__collapse_resort(hists, NULL);
@@ -579,8 +586,21 @@ static void *display_thread_tui(void *arg)
 		hists->uid_filter_str = top->record_opts.target.uid_str;
 	}
 
-	perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent,
-				      &top->session->header.env);
+	while (true)  {
+		int key = perf_evlist__tui_browse_hists(top->evlist, help, &hbt,
+							top->min_percent,
+							&top->session->header.env);
+
+		if (key != CTRL('z'))
+			break;
+
+		perf_evlist__toggle_enable(top->evlist);
+		/*
+		 * No need to refresh, resort/decay histogram entries
+		 * if we are not collecting samples:
+		 */
+		hbt.refresh = top->evlist->enabled ? top->delay_secs : 0;
+	}
 
 	done = 1;
 	return NULL;
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e64893f2fd7f..8f7c4d49d327 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1736,6 +1736,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	"t             Zoom into current Thread\n"
 	"V             Verbose (DSO names in callchains, etc)\n"
 	"z             Toggle zeroing of samples\n"
+	"CTRL+z        Enable/Disable events\n"
 	"/             Filter symbol by name";
 
 	if (browser == NULL)
@@ -1900,6 +1901,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			/* Fall thru */
 		case 'q':
 		case CTRL('c'):
+		case CTRL('z'):
 			goto out_free_stack;
 		default:
 			continue;

From a5499b37197ab4b5fed101370df7ccadacbb4340 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Fri, 29 May 2015 16:33:30 +0300
Subject: [PATCH 295/305] perf tools: Ensure thread-stack is flushed

The thread-stack represents a thread's current stack.  When a thread
exits there can still be many functions on the stack e.g. exit() can be
called many levels deep, so all the callers will never return.  To get
that information output, the thread-stack must be flushed.

Previously it was assumed the thread-stack would be flushed when the
struct thread was deleted.  With thread ref-counting it is no longer
clear when that will be, if ever. So instead explicitly flush all the
thread-stacks at the end of a session.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1432906425-9911-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/machine.c      | 21 +++++++++++++++++++++
 tools/perf/util/machine.h      |  3 +++
 tools/perf/util/session.c      | 20 ++++++++++++++++++++
 tools/perf/util/thread-stack.c | 18 +++++++++++++-----
 tools/perf/util/thread-stack.h |  1 +
 5 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 132e35765101..8b3b1937cb9e 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1890,6 +1890,27 @@ int machine__for_each_thread(struct machine *machine,
 	return rc;
 }
 
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv)
+{
+	struct rb_node *nd;
+	int rc = 0;
+
+	rc = machine__for_each_thread(&machines->host, fn, priv);
+	if (rc != 0)
+		return rc;
+
+	for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) {
+		struct machine *machine = rb_entry(nd, struct machine, rb_node);
+
+		rc = machine__for_each_thread(machine, fn, priv);
+		if (rc != 0)
+			return rc;
+	}
+	return rc;
+}
+
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
 				  perf_event__handler_t process, bool data_mmap)
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index ca267c41f28d..cea62f6fb144 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -216,6 +216,9 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp);
 int machine__for_each_thread(struct machine *machine,
 			     int (*fn)(struct thread *thread, void *p),
 			     void *priv);
+int machines__for_each_thread(struct machines *machines,
+			      int (*fn)(struct thread *thread, void *p),
+			      void *priv);
 
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index e1cd17c2afab..c371336d1eb2 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -16,6 +16,7 @@
 #include "perf_regs.h"
 #include "asm/bug.h"
 #include "auxtrace.h"
+#include "thread-stack.h"
 
 static int perf_session__deliver_event(struct perf_session *session,
 				       union perf_event *event,
@@ -1361,6 +1362,19 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 	events_stats__auxtrace_error_warn(stats);
 }
 
+static int perf_session__flush_thread_stack(struct thread *thread,
+					    void *p __maybe_unused)
+{
+	return thread_stack__flush(thread);
+}
+
+static int perf_session__flush_thread_stacks(struct perf_session *session)
+{
+	return machines__for_each_thread(&session->machines,
+					 perf_session__flush_thread_stack,
+					 NULL);
+}
+
 volatile int session_done;
 
 static int __perf_session__process_pipe_events(struct perf_session *session)
@@ -1450,6 +1464,9 @@ done:
 	if (err)
 		goto out_err;
 	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
 out_err:
 	free(buf);
 	perf_session__warn_about_errors(session);
@@ -1600,6 +1617,9 @@ out:
 	if (err)
 		goto out_err;
 	err = auxtrace__flush_events(session, tool);
+	if (err)
+		goto out_err;
+	err = perf_session__flush_thread_stacks(session);
 out_err:
 	ui_progress__finish();
 	perf_session__warn_about_errors(session);
diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c
index 9ed59a452d1f..679688e70ae7 100644
--- a/tools/perf/util/thread-stack.c
+++ b/tools/perf/util/thread-stack.c
@@ -219,7 +219,7 @@ static int thread_stack__call_return(struct thread *thread,
 	return crp->process(&cr, crp->data);
 }
 
-static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
+static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 {
 	struct call_return_processor *crp = ts->crp;
 	int err;
@@ -242,6 +242,14 @@ static int thread_stack__flush(struct thread *thread, struct thread_stack *ts)
 	return 0;
 }
 
+int thread_stack__flush(struct thread *thread)
+{
+	if (thread->ts)
+		return __thread_stack__flush(thread, thread->ts);
+
+	return 0;
+}
+
 int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 			u64 to_ip, u16 insn_len, u64 trace_nr)
 {
@@ -264,7 +272,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 	 */
 	if (trace_nr != thread->ts->trace_nr) {
 		if (thread->ts->trace_nr)
-			thread_stack__flush(thread, thread->ts);
+			__thread_stack__flush(thread, thread->ts);
 		thread->ts->trace_nr = trace_nr;
 	}
 
@@ -297,7 +305,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
 
 	if (trace_nr != thread->ts->trace_nr) {
 		if (thread->ts->trace_nr)
-			thread_stack__flush(thread, thread->ts);
+			__thread_stack__flush(thread, thread->ts);
 		thread->ts->trace_nr = trace_nr;
 	}
 }
@@ -305,7 +313,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr)
 void thread_stack__free(struct thread *thread)
 {
 	if (thread->ts) {
-		thread_stack__flush(thread, thread->ts);
+		__thread_stack__flush(thread, thread->ts);
 		zfree(&thread->ts->stack);
 		zfree(&thread->ts);
 	}
@@ -689,7 +697,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm,
 
 	/* Flush stack on exec */
 	if (ts->comm != comm && thread->pid_ == thread->tid) {
-		err = thread_stack__flush(thread, ts);
+		err = __thread_stack__flush(thread, ts);
 		if (err)
 			return err;
 		ts->comm = comm;
diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h
index b843bbef8ba2..e1528f1374c3 100644
--- a/tools/perf/util/thread-stack.h
+++ b/tools/perf/util/thread-stack.h
@@ -96,6 +96,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip,
 void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr);
 void thread_stack__sample(struct thread *thread, struct ip_callchain *chain,
 			  size_t sz, u64 ip);
+int thread_stack__flush(struct thread *thread);
 void thread_stack__free(struct thread *thread);
 
 struct call_return_processor *

From 0c4a5bcea4609948375173cdea8d73783110a75e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Li=C5=A1ka?= <mliska@suse.cz>
Date: Fri, 19 Jun 2015 16:10:43 -0300
Subject: [PATCH 296/305] perf annotate: Display total number of samples with
 --show-total-period

To compare two records on an instruction base, with --show-total-period
option provided, display total number of samples that belong to a line
in assembly language.

New hot key 't' is introduced for 'perf annotate' TUI.

Signed-off-by: Martin Liska <mliska@suse.cz>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/5583E26D.1040407@suse.cz
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-annotate.c     |  2 ++
 tools/perf/ui/browsers/annotate.c | 60 ++++++++++++++++++++++---------
 tools/perf/util/annotate.c        | 28 ++++++++++++---
 tools/perf/util/annotate.h        |  3 +-
 4 files changed, 70 insertions(+), 23 deletions(-)

diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c
index 4e08c2d2090e..2c1bec39c30e 100644
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@@ -329,6 +329,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused)
 		   "objdump binary to use for disassembly and annotations"),
 	OPT_BOOLEAN(0, "group", &symbol_conf.event_group,
 		    "Show event group information together"),
+	OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period,
+		    "Show a column with the sum of periods"),
 	OPT_END()
 	};
 	int ret = hists__init();
diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c
index acb0e23b138e..5995a8bd7c69 100644
--- a/tools/perf/ui/browsers/annotate.c
+++ b/tools/perf/ui/browsers/annotate.c
@@ -11,16 +11,21 @@
 #include "../../util/evsel.h"
 #include <pthread.h>
 
+struct disasm_line_samples {
+	double		percent;
+	u64		nr;
+};
+
 struct browser_disasm_line {
-	struct rb_node	rb_node;
-	u32		idx;
-	int		idx_asm;
-	int		jump_sources;
+	struct rb_node			rb_node;
+	u32				idx;
+	int				idx_asm;
+	int				jump_sources;
 	/*
 	 * actual length of this array is saved on the nr_events field
 	 * of the struct annotate_browser
 	 */
-	double		percent[1];
+	struct disasm_line_samples	samples[1];
 };
 
 static struct annotate_browser_opt {
@@ -28,7 +33,8 @@ static struct annotate_browser_opt {
 	     use_offset,
 	     jump_arrows,
 	     show_linenr,
-	     show_nr_jumps;
+	     show_nr_jumps,
+	     show_total_period;
 } annotate_browser__opts = {
 	.use_offset	= true,
 	.jump_arrows	= true,
@@ -105,15 +111,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int
 	char bf[256];
 
 	for (i = 0; i < ab->nr_events; i++) {
-		if (bdl->percent[i] > percent_max)
-			percent_max = bdl->percent[i];
+		if (bdl->samples[i].percent > percent_max)
+			percent_max = bdl->samples[i].percent;
 	}
 
 	if (dl->offset != -1 && percent_max != 0.0) {
 		for (i = 0; i < ab->nr_events; i++) {
-			ui_browser__set_percent_color(browser, bdl->percent[i],
+			ui_browser__set_percent_color(browser,
+						      bdl->samples[i].percent,
 						      current_entry);
-			slsmg_printf("%6.2f ", bdl->percent[i]);
+			if (annotate_browser__opts.show_total_period)
+				slsmg_printf("%6" PRIu64 " ",
+					     bdl->samples[i].nr);
+			else
+				slsmg_printf("%6.2f ", bdl->samples[i].percent);
 		}
 	} else {
 		ui_browser__set_percent_color(browser, 0, current_entry);
@@ -273,9 +284,9 @@ static int disasm__cmp(struct browser_disasm_line *a,
 	int i;
 
 	for (i = 0; i < nr_pcnt; i++) {
-		if (a->percent[i] == b->percent[i])
+		if (a->samples[i].percent == b->samples[i].percent)
 			continue;
-		return a->percent[i] < b->percent[i];
+		return a->samples[i].percent < b->samples[i].percent;
 	}
 	return 0;
 }
@@ -366,14 +377,17 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser,
 		next = disasm__get_next_ip_line(&notes->src->source, pos);
 
 		for (i = 0; i < browser->nr_events; i++) {
-			bpos->percent[i] = disasm__calc_percent(notes,
+			u64 nr_samples;
+
+			bpos->samples[i].percent = disasm__calc_percent(notes,
 						evsel->idx + i,
 						pos->offset,
 						next ? next->offset : len,
-					        &path);
+						&path, &nr_samples);
+			bpos->samples[i].nr = nr_samples;
 
-			if (max_percent < bpos->percent[i])
-				max_percent = bpos->percent[i];
+			if (max_percent < bpos->samples[i].percent)
+				max_percent = bpos->samples[i].percent;
 		}
 
 		if (max_percent < 0.01) {
@@ -737,6 +751,7 @@ static int annotate_browser__run(struct annotate_browser *browser,
 		"n             Search next string\n"
 		"o             Toggle disassembler output/simplified view\n"
 		"s             Toggle source code view\n"
+		"t             Toggle total period view\n"
 		"/             Search string\n"
 		"k             Toggle line numbers\n"
 		"r             Run available scripts\n"
@@ -812,6 +827,11 @@ show_sup_ins:
 				ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions.");
 			}
 			continue;
+		case 't':
+			annotate_browser__opts.show_total_period =
+			  !annotate_browser__opts.show_total_period;
+			annotate_browser__update_addr_width(browser);
+			continue;
 		case K_LEFT:
 		case K_ESC:
 		case 'q':
@@ -832,6 +852,10 @@ out:
 int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel,
 			     struct hist_browser_timer *hbt)
 {
+	/* Set default value for show_total_period.  */
+	annotate_browser__opts.show_total_period =
+	  symbol_conf.show_total_period;
+
 	return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt);
 }
 
@@ -929,7 +953,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map,
 
 	if (perf_evsel__is_group_event(evsel)) {
 		nr_pcnt = evsel->nr_members;
-		sizeof_bdl += sizeof(double) * (nr_pcnt - 1);
+		sizeof_bdl += sizeof(struct disasm_line_samples) *
+		  (nr_pcnt - 1);
 	}
 
 	if (symbol__annotate(sym, map, sizeof_bdl) < 0) {
@@ -1006,6 +1031,7 @@ static struct annotate_config {
 	ANNOTATE_CFG(show_linenr),
 	ANNOTATE_CFG(show_nr_jumps),
 	ANNOTATE_CFG(use_offset),
+	ANNOTATE_CFG(show_total_period),
 };
 
 #undef ANNOTATE_CFG
diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index bf8043009909..12914b66d347 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -654,10 +654,11 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
 }
 
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-			    s64 end, const char **path)
+			    s64 end, const char **path, u64 *nr_samples)
 {
 	struct source_line *src_line = notes->src->lines;
 	double percent = 0.0;
+	*nr_samples = 0;
 
 	if (src_line) {
 		size_t sizeof_src_line = sizeof(*src_line) +
@@ -671,6 +672,7 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 				*path = src_line->path;
 
 			percent += src_line->p[evidx].percent;
+			*nr_samples += src_line->p[evidx].samples;
 			offset++;
 		}
 	} else {
@@ -680,8 +682,10 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 		while (offset < end)
 			hits += h->addr[offset++];
 
-		if (h->sum)
+		if (h->sum) {
+			*nr_samples = hits;
 			percent = 100.0 * hits / h->sum;
+		}
 	}
 
 	return percent;
@@ -696,8 +700,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
 	if (dl->offset != -1) {
 		const char *path = NULL;
+		u64 nr_samples;
 		double percent, max_percent = 0.0;
 		double *ppercents = &percent;
+		u64 *psamples = &nr_samples;
 		int i, nr_percent = 1;
 		const char *color;
 		struct annotation *notes = symbol__annotation(sym);
@@ -710,8 +716,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		if (perf_evsel__is_group_event(evsel)) {
 			nr_percent = evsel->nr_members;
 			ppercents = calloc(nr_percent, sizeof(double));
-			if (ppercents == NULL)
+			psamples = calloc(nr_percent, sizeof(u64));
+			if (ppercents == NULL || psamples == NULL) {
 				return -1;
+			}
 		}
 
 		for (i = 0; i < nr_percent; i++) {
@@ -719,9 +727,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 					notes->src->lines ? i : evsel->idx + i,
 					offset,
 					next ? next->offset : (s64) len,
-					&path);
+					&path, &nr_samples);
 
 			ppercents[i] = percent;
+			psamples[i] = nr_samples;
 			if (percent > max_percent)
 				max_percent = percent;
 		}
@@ -759,8 +768,14 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 
 		for (i = 0; i < nr_percent; i++) {
 			percent = ppercents[i];
+			nr_samples = psamples[i];
 			color = get_percent_color(percent);
-			color_fprintf(stdout, color, " %7.2f", percent);
+
+			if (symbol_conf.show_total_period)
+				color_fprintf(stdout, color, " %7" PRIu64,
+					      nr_samples);
+			else
+				color_fprintf(stdout, color, " %7.2f", percent);
 		}
 
 		printf(" :	");
@@ -770,6 +785,9 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st
 		if (ppercents != &percent)
 			free(ppercents);
 
+		if (psamples != &nr_samples)
+			free(psamples);
+
 	} else if (max_lines && printed >= max_lines)
 		return 1;
 	else {
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index cadbdc90a5cb..c8c18cadcd17 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -72,7 +72,7 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa
 int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw);
 size_t disasm__fprintf(struct list_head *head, FILE *fp);
 double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
-			    s64 end, const char **path);
+			    s64 end, const char **path, u64 *nr_samples);
 
 struct sym_hist {
 	u64		sum;
@@ -82,6 +82,7 @@ struct sym_hist {
 struct source_line_percent {
 	double		percent;
 	double		percent_sum;
+	double          samples;
 };
 
 struct source_line {

From 276af92f107fc35ede78c7cdea323aaec364002c Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Jun 2015 16:36:12 -0300
Subject: [PATCH 297/305] perf annotate: Rename source_line_percent to
 source_line_samples

To better reflect the purpose of this struct, that is to hold
info about samples, its total number and is percentage.

Cc: Martin Liska <mliska@suse.cz>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/n/tip-6bf8gwcl975uurl0ttpvtk69@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/annotate.c | 26 +++++++++++++-------------
 tools/perf/util/annotate.h |  6 +++---
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c
index 12914b66d347..03b7bc70eb66 100644
--- a/tools/perf/util/annotate.c
+++ b/tools/perf/util/annotate.c
@@ -662,7 +662,7 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 
 	if (src_line) {
 		size_t sizeof_src_line = sizeof(*src_line) +
-				sizeof(src_line->p) * (src_line->nr_pcnt - 1);
+				sizeof(src_line->samples) * (src_line->nr_pcnt - 1);
 
 		while (offset < end) {
 			src_line = (void *)notes->src->lines +
@@ -671,8 +671,8 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset,
 			if (*path == NULL)
 				*path = src_line->path;
 
-			percent += src_line->p[evidx].percent;
-			*nr_samples += src_line->p[evidx].samples;
+			percent += src_line->samples[evidx].percent;
+			*nr_samples += src_line->samples[evidx].nr;
 			offset++;
 		}
 	} else {
@@ -1121,7 +1121,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
 		ret = strcmp(iter->path, src_line->path);
 		if (ret == 0) {
 			for (i = 0; i < src_line->nr_pcnt; i++)
-				iter->p[i].percent_sum += src_line->p[i].percent;
+				iter->samples[i].percent_sum += src_line->samples[i].percent;
 			return;
 		}
 
@@ -1132,7 +1132,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin
 	}
 
 	for (i = 0; i < src_line->nr_pcnt; i++)
-		src_line->p[i].percent_sum = src_line->p[i].percent;
+		src_line->samples[i].percent_sum = src_line->samples[i].percent;
 
 	rb_link_node(&src_line->node, parent, p);
 	rb_insert_color(&src_line->node, root);
@@ -1143,9 +1143,9 @@ static int cmp_source_line(struct source_line *a, struct source_line *b)
 	int i;
 
 	for (i = 0; i < a->nr_pcnt; i++) {
-		if (a->p[i].percent_sum == b->p[i].percent_sum)
+		if (a->samples[i].percent_sum == b->samples[i].percent_sum)
 			continue;
-		return a->p[i].percent_sum > b->p[i].percent_sum;
+		return a->samples[i].percent_sum > b->samples[i].percent_sum;
 	}
 
 	return 0;
@@ -1197,7 +1197,7 @@ static void symbol__free_source_line(struct symbol *sym, int len)
 	int i;
 
 	sizeof_src_line = sizeof(*src_line) +
-			  (sizeof(src_line->p) * (src_line->nr_pcnt - 1));
+			  (sizeof(src_line->samples) * (src_line->nr_pcnt - 1));
 
 	for (i = 0; i < len; i++) {
 		free_srcline(src_line->path);
@@ -1229,7 +1229,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 			h_sum += h->sum;
 		}
 		nr_pcnt = evsel->nr_members;
-		sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->p);
+		sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples);
 	}
 
 	if (!h_sum)
@@ -1249,10 +1249,10 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map,
 
 		for (k = 0; k < nr_pcnt; k++) {
 			h = annotation__histogram(notes, evidx + k);
-			src_line->p[k].percent = 100.0 * h->addr[i] / h->sum;
+			src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum;
 
-			if (src_line->p[k].percent > percent_max)
-				percent_max = src_line->p[k].percent;
+			if (src_line->samples[k].percent > percent_max)
+				percent_max = src_line->samples[k].percent;
 		}
 
 		if (percent_max <= 0.5)
@@ -1292,7 +1292,7 @@ static void print_summary(struct rb_root *root, const char *filename)
 
 		src_line = rb_entry(node, struct source_line, node);
 		for (i = 0; i < src_line->nr_pcnt; i++) {
-			percent = src_line->p[i].percent_sum;
+			percent = src_line->samples[i].percent_sum;
 			color = get_percent_color(percent);
 			color_fprintf(stdout, color, " %7.2f", percent);
 
diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h
index c8c18cadcd17..7e78e6c27078 100644
--- a/tools/perf/util/annotate.h
+++ b/tools/perf/util/annotate.h
@@ -79,17 +79,17 @@ struct sym_hist {
 	u64		addr[0];
 };
 
-struct source_line_percent {
+struct source_line_samples {
 	double		percent;
 	double		percent_sum;
-	double          samples;
+	double          nr;
 };
 
 struct source_line {
 	struct rb_node	node;
 	char		*path;
 	int		nr_pcnt;
-	struct source_line_percent p[1];
+	struct source_line_samples samples[1];
 };
 
 /** struct annotated_source - symbols with hits have this attached as in sannotation

From fbb7997e308064a0ea26fb5435ee5fa69a588dac Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Jun 2015 16:56:04 -0300
Subject: [PATCH 298/305] perf top: Replace CTRL+z with 'f' as hotkey for
 enable/disable events

I.e. 'freeze'/'unfreeze', this is because CTRL+z has a well known
action, i.e. suspend the app, perf needs to follow that convention, that
will be done on a separate patch, tho.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-oedcl6ovohara4koig14ayip@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c       | 2 +-
 tools/perf/ui/browsers/hists.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 72d8a7ae5986..ccf569ab08aa 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -591,7 +591,7 @@ static void *display_thread_tui(void *arg)
 							top->min_percent,
 							&top->session->header.env);
 
-		if (key != CTRL('z'))
+		if (key != 'f')
 			break;
 
 		perf_evlist__toggle_enable(top->evlist);
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 8f7c4d49d327..764f7cabd6f2 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1736,7 +1736,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 	"t             Zoom into current Thread\n"
 	"V             Verbose (DSO names in callchains, etc)\n"
 	"z             Toggle zeroing of samples\n"
-	"CTRL+z        Enable/Disable events\n"
+	"f             Enable/Disable events\n"
 	"/             Filter symbol by name";
 
 	if (browser == NULL)
@@ -1901,7 +1901,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			/* Fall thru */
 		case 'q':
 		case CTRL('c'):
-		case CTRL('z'):
+		case 'f':
 			goto out_free_stack;
 		default:
 			continue;

From 516e536849d26a2bcacd9050f4a7222a14c0f9e4 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Jun 2015 16:59:43 -0300
Subject: [PATCH 299/305] perf hists browser: Do not exit when 'f' is pressed
 in 'report' mode

The 'f' hotkey is only used when in 'top', dynamic mode, to
enable/disable events, currently not making sense in the 'report',
static mode, where we can't go from showing the histogram entries
created from a perf.data file to adding more events after recreating the
evlist created from the perf.data file, albeit possible, this is not
implemented right now.

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-lholzf472pu98dkkijggwx2m@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 764f7cabd6f2..79a093a7d221 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1901,7 +1901,10 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 			/* Fall thru */
 		case 'q':
 		case CTRL('c'):
+			goto out_free_stack;
 		case 'f':
+			if (is_report_browser(hbt))
+				continue;
 			goto out_free_stack;
 		default:
 			continue;

From 5f00b0f45bc3f1e77f0db0e915fd3302db669914 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Jun 2015 17:30:20 -0300
Subject: [PATCH 300/305] perf hists browser: Honour the help line provided by
 builtin-{top,report}.c

The hists_browser was replacing whatever helpline provided by 'top' or
'report' with a static "Press '?' for help on key bindings", fix it.

Now the message passed by top appears at the bottom of the screen:

"For a higher level overview, try: perf top --sort comm,dso"

As well the message that will be added when the user presses 'f' to
disable the events, something along the lines of "press f again to
re-enable...".

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-dacaja70mbfz3a0yj1n180gx@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index 79a093a7d221..e2b500b26527 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -424,7 +424,7 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser)
 		"Or reduce the sampling frequency.");
 }
 
-static int hist_browser__run(struct hist_browser *browser)
+static int hist_browser__run(struct hist_browser *browser, const char *help)
 {
 	int key;
 	char title[160];
@@ -436,8 +436,7 @@ static int hist_browser__run(struct hist_browser *browser)
 
 	hists__browser_title(browser->hists, hbt, title, sizeof(title));
 
-	if (ui_browser__show(&browser->b, title,
-			     "Press '?' for help on key bindings") < 0)
+	if (ui_browser__show(&browser->b, title, help) < 0)
 		return -1;
 
 	while (1) {
@@ -1773,7 +1772,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 
 		nr_options = 0;
 
-		key = hist_browser__run(browser);
+		key = hist_browser__run(browser, helpline);
 
 		if (browser->he_selection != NULL) {
 			thread = hist_browser__selected_thread(browser);

From ae3b6ab603bce6a74ca089f7a944196863166a52 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Jun 2015 17:35:25 -0300
Subject: [PATCH 301/305] perf top: Tell the user how to unfreeze events after
 pressing 'f'

When the user presses 'f' to disable events the visual cues are, well,
the percentages not changing and the number of events freezing.

Be more explicit by changing the help line at the bottom of the screen
to show the following messages when 'f' is pressed:

  "Press 'f' again to re-enable the events"

And then, when 'f' is pressed again:

  "Press 'f' to disable the events or 'h'

Suggested-by: Ingo Molnar <mingo@kernel.org>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-uhiswg9a9rxm5gxg7ptjskjn@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-top.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index ccf569ab08aa..70a9505aae83 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -599,7 +599,13 @@ static void *display_thread_tui(void *arg)
 		 * No need to refresh, resort/decay histogram entries
 		 * if we are not collecting samples:
 		 */
-		hbt.refresh = top->evlist->enabled ? top->delay_secs : 0;
+		if (top->evlist->enabled) {
+			hbt.refresh = top->delay_secs;
+			help = "Press 'f' to disable the events or 'h' to see other hotkeys";
+		} else {
+			help = "Press 'f' again to re-enable the events";
+			hbt.refresh = 0;
+		}
 	}
 
 	done = 1;

From 3e323dc0a80c3921f30d03ca300426f70dc5a327 Mon Sep 17 00:00:00 2001
From: Arnaldo Carvalho de Melo <acme@redhat.com>
Date: Fri, 19 Jun 2015 17:49:29 -0300
Subject: [PATCH 302/305] perf hists browser: React to unassigned hotkey
 pressing

When that happens we were just ignoring the key press, now this
message is presented in the bottom line (the help line):

  "Press '?' for help on key bindings"

Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Borislav Petkov <bp@suse.de>
Cc: David Ahern <dsahern@gmail.com>
Cc: Don Zickus <dzickus@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lkml.kernel.org/n/tip-iyma2j5kj3q9i1stl4mfh90n@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/ui/browsers/hists.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index e2b500b26527..c42adb600091 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -1902,10 +1902,11 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events,
 		case CTRL('c'):
 			goto out_free_stack;
 		case 'f':
-			if (is_report_browser(hbt))
-				continue;
-			goto out_free_stack;
+			if (!is_report_browser(hbt))
+				goto out_free_stack;
+			/* Fall thru */
 		default:
+			helpline = "Press '?' for help on key bindings";
 			continue;
 		}
 

From c05676c06232e6459a6106ddf0d4e154ce6cd859 Mon Sep 17 00:00:00 2001
From: Yannick Brosseau <scientist@fb.com>
Date: Wed, 17 Jun 2015 16:41:10 -0700
Subject: [PATCH 303/305] perf report: Fix sort__sym_cmp to also compare end of
 symbol

When using a map file from a JIT, due to memory reuse, we can obtain
multiple symbols with the same start address but a different length.

The symbols__find does check for the end so not doing it in
sort__sym_cmp was causing the hist_entry in the annotate part of a
report to match to the wrong entry, causing a fatal error.

Signed-off-by: Yannick Brosseau <scientist@fb.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: kernel-team@fb.com
Link: http://lkml.kernel.org/r/1434584470-17771-1-git-send-email-scientist@fb.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/sort.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 09d4696fd9a1..4c65a143a34c 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -182,18 +182,16 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip)
 
 static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r)
 {
-	u64 ip_l, ip_r;
-
 	if (!sym_l || !sym_r)
 		return cmp_null(sym_l, sym_r);
 
 	if (sym_l == sym_r)
 		return 0;
 
-	ip_l = sym_l->start;
-	ip_r = sym_r->start;
+	if (sym_l->start != sym_r->start)
+		return (int64_t)(sym_r->start - sym_l->start);
 
-	return (int64_t)(ip_r - ip_l);
+	return (int64_t)(sym_r->end - sym_l->end);
 }
 
 static int64_t

From 930e6fcd2bcce9bcd9d4aa7e755678d33f3fe6f4 Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 17 Jun 2015 09:51:10 -0400
Subject: [PATCH 304/305] perf tools: Add time out to force stop proc map
 processing

System wide sampling like 'perf top' or 'perf record -a' read all
threads /proc/xxx/maps before sampling. If there are any threads which
generating a keeping growing huge maps, perf will do infinite loop
during synthesizing. Nothing will be sampled.

This patch fixes this issue by adding per-thread timeout to force stop
this kind of endless proc map processing.

PERF_RECORD_MISC_PROC_MAP_PARSE_TIME_OUT is introduced to indicate that
the mmap record are truncated by time out. User will get warning
notification when truncated mmap records are detected.

Reported-by: Ying Huang <ying.huang@intel.com>
Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ying Huang <ying.huang@intel.com>
Link: http://lkml.kernel.org/r/1434549071-25611-1-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/uapi/linux/perf_event.h |  4 ++++
 tools/perf/util/event.c         | 18 ++++++++++++++++++
 tools/perf/util/event.h         |  1 +
 tools/perf/util/session.c       | 11 +++++++++++
 4 files changed, 34 insertions(+)

diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 613ed9ad588f..d97f84c080da 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -565,6 +565,10 @@ struct perf_event_mmap_page {
 #define PERF_RECORD_MISC_GUEST_KERNEL		(4 << 0)
 #define PERF_RECORD_MISC_GUEST_USER		(5 << 0)
 
+/*
+ * Indicates that /proc/PID/maps parsing are truncated by time out.
+ */
+#define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT	(1 << 12)
 /*
  * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on
  * different events so can reuse the same bit position.
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 793b1503d437..416ba80c628f 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -213,6 +213,8 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
+#define PROC_MAP_PARSE_TIMEOUT	(500 * 1000000ULL)
+
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
@@ -222,6 +224,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 {
 	char filename[PATH_MAX];
 	FILE *fp;
+	unsigned long long t;
+	bool truncation = false;
 	int rc = 0;
 
 	if (machine__is_default_guest(machine))
@@ -240,6 +244,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 	}
 
 	event->header.type = PERF_RECORD_MMAP2;
+	t = rdclock();
 
 	while (1) {
 		char bf[BUFSIZ];
@@ -253,6 +258,12 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
+		if ((rdclock() - t) > PROC_MAP_PARSE_TIMEOUT) {
+			pr_warning("Reading %s time out.\n", filename);
+			truncation = true;
+			goto out;
+		}
+
 		/* ensure null termination since stack will be reused. */
 		strcpy(execname, "");
 
@@ -301,6 +312,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			event->header.misc |= PERF_RECORD_MISC_MMAP_DATA;
 		}
 
+out:
+		if (truncation)
+			event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT;
+
 		if (!strcmp(execname, ""))
 			strcpy(execname, anonstr);
 
@@ -319,6 +334,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 			rc = -1;
 			break;
 		}
+
+		if (truncation)
+			break;
 	}
 
 	fclose(fp);
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 5dc51ada05df..39868f529cab 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -265,6 +265,7 @@ struct events_stats {
 	u32 nr_unknown_id;
 	u32 nr_unprocessable_samples;
 	u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX];
+	u32 nr_proc_map_timeout;
 };
 
 struct attr_event {
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index c371336d1eb2..2d882fd1f1b9 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1064,6 +1064,8 @@ static int machines__deliver_event(struct machines *machines,
 	case PERF_RECORD_MMAP:
 		return tool->mmap(tool, event, sample, machine);
 	case PERF_RECORD_MMAP2:
+		if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT)
+			++evlist->stats.nr_proc_map_timeout;
 		return tool->mmap2(tool, event, sample, machine);
 	case PERF_RECORD_COMM:
 		return tool->comm(tool, event, sample, machine);
@@ -1360,6 +1362,15 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 		ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events);
 
 	events_stats__auxtrace_error_warn(stats);
+
+	if (stats->nr_proc_map_timeout != 0) {
+		ui__warning("%d map information files for pre-existing threads were\n"
+			    "not processed, if there are samples for addresses they\n"
+			    "will not be resolved, you may find out which are these\n"
+			    "threads by running with -v and redirecting the output\n"
+			    "to a file.\n",
+			    stats->nr_proc_map_timeout);
+	}
 }
 
 static int perf_session__flush_thread_stack(struct thread *thread,

From 9d9cad763ca79dd3697e9f2d1df648e37496582b Mon Sep 17 00:00:00 2001
From: Kan Liang <kan.liang@intel.com>
Date: Wed, 17 Jun 2015 09:51:11 -0400
Subject: [PATCH 305/305] perf tools: Configurable per thread proc map
 processing time out

The time out to limit the individual proc map processing was hard code
to 500ms. This patch introduce a new option --proc-map-timeout to make
the time limit configurable.

Signed-off-by: Kan Liang <kan.liang@intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Ying Huang <ying.huang@intel.com>
Link: http://lkml.kernel.org/r/1434549071-25611-2-git-send-email-kan.liang@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-kvm.txt    |  6 ++++
 tools/perf/Documentation/perf-record.txt |  5 ++++
 tools/perf/Documentation/perf-top.txt    |  6 ++++
 tools/perf/Documentation/perf-trace.txt  |  5 ++++
 tools/perf/builtin-kvm.c                 |  5 +++-
 tools/perf/builtin-record.c              |  6 +++-
 tools/perf/builtin-top.c                 |  5 +++-
 tools/perf/builtin-trace.c               |  6 +++-
 tools/perf/perf.h                        |  1 +
 tools/perf/tests/code-reading.c          |  2 +-
 tools/perf/tests/dwarf-unwind.c          |  2 +-
 tools/perf/tests/mmap-thread-lookup.c    |  4 +--
 tools/perf/util/event.c                  | 36 +++++++++++++++---------
 tools/perf/util/event.h                  |  9 ++++--
 tools/perf/util/machine.c                |  7 +++--
 tools/perf/util/machine.h                |  9 ++++--
 tools/perf/util/session.c                |  4 ++-
 17 files changed, 87 insertions(+), 31 deletions(-)

diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt
index 6252e776009c..6a5bb2b17039 100644
--- a/tools/perf/Documentation/perf-kvm.txt
+++ b/tools/perf/Documentation/perf-kvm.txt
@@ -151,6 +151,12 @@ STAT LIVE OPTIONS
        Show events other than HLT (x86 only) or Wait state (s390 only)
        that take longer than duration usecs.
 
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take
+	a long time, because the file may be huge. A time out is needed
+	in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
 SEE ALSO
 --------
 linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1],
diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt
index 6fdf78625c51..9b9d9d086680 100644
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@@ -271,6 +271,11 @@ AUX area tracing event. Optionally the number of bytes to capture per
 snapshot can be specified. In Snapshot Mode, trace data is captured only when
 signal SIGUSR2 is received.
 
+--proc-map-timeout::
+When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+because the file may be huge. A time out is needed in such cases.
+This option sets the time out limit. The default value is 500 ms.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt
index 9e5b07eb7d35..776aec4d0927 100644
--- a/tools/perf/Documentation/perf-top.txt
+++ b/tools/perf/Documentation/perf-top.txt
@@ -201,6 +201,12 @@ Default is to monitor all CPUS.
 	Force each column width to the provided list, for large terminal
 	readability.  0 means no limit (default behavior).
 
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take
+	a long time, because the file may be huge. A time out is needed
+	in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
 
 INTERACTIVE PROMPTING KEYS
 --------------------------
diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
index 1db9c8b79880..7ea078658a87 100644
--- a/tools/perf/Documentation/perf-trace.txt
+++ b/tools/perf/Documentation/perf-trace.txt
@@ -121,6 +121,11 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
 --event::
 	Trace other events, see 'perf list' for a complete list.
 
+--proc-map-timeout::
+	When processing pre-existing threads /proc/XXX/mmap, it may take a long time,
+	because the file may be huge. A time out is needed in such cases.
+	This option sets the time out limit. The default value is 500 ms.
+
 PAGEFAULTS
 ----------
 
diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c
index 15fecd3dc5d8..74878cd75078 100644
--- a/tools/perf/builtin-kvm.c
+++ b/tools/perf/builtin-kvm.c
@@ -1311,6 +1311,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 			"show events other than"
 			" HLT (x86 only) or Wait state (s390 only)"
 			" that take longer than duration usecs"),
+		OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout,
+				"per thread proc mmap processing timeout in ms"),
 		OPT_END()
 	};
 	const char * const live_usage[] = {
@@ -1338,6 +1340,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	kvm->opts.target.uses_mmap = false;
 	kvm->opts.target.uid_str = NULL;
 	kvm->opts.target.uid = UINT_MAX;
+	kvm->opts.proc_map_timeout = 500;
 
 	symbol__init(NULL);
 	disable_buildid_cache();
@@ -1393,7 +1396,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm,
 	perf_session__set_id_hdr_size(kvm->session);
 	ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true);
 	machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target,
-				    kvm->evlist->threads, false);
+				    kvm->evlist->threads, false, kvm->opts.proc_map_timeout);
 	err = kvm_live_open_events(kvm);
 	if (err)
 		goto out;
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 4d6cdeb94fe1..de165a1b9240 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -598,7 +598,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv)
 	}
 
 	err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads,
-					    process_synthesized_event, opts->sample_address);
+					    process_synthesized_event, opts->sample_address,
+					    opts->proc_map_timeout);
 	if (err != 0)
 		goto out_child;
 
@@ -959,6 +960,7 @@ static struct record record = {
 			.uses_mmap   = true,
 			.default_per_cpu = true,
 		},
+		.proc_map_timeout     = 500,
 	},
 	.tool = {
 		.sample		= process_sample_event,
@@ -1066,6 +1068,8 @@ struct option __record_options[] = {
 	parse_clockid),
 	OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts,
 			  "opts", "AUX area tracing Snapshot Mode", ""),
+	OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
 };
 
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 70a9505aae83..619a8696fda7 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -977,7 +977,7 @@ static int __cmd_top(struct perf_top *top)
 		goto out_delete;
 
 	machine__synthesize_threads(&top->session->machines.host, &opts->target,
-				    top->evlist->threads, false);
+				    top->evlist->threads, false, opts->proc_map_timeout);
 	ret = perf_top__start_counters(top);
 	if (ret)
 		goto out_delete;
@@ -1087,6 +1087,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 			.target		= {
 				.uses_mmap   = true,
 			},
+			.proc_map_timeout    = 500,
 		},
 		.max_stack	     = PERF_MAX_STACK_DEPTH,
 		.sym_pcnt_filter     = 5,
@@ -1186,6 +1187,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str,
 		   "width[,width...]",
 		   "don't try to adjust column width, use these fixed values"),
+	OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
 	};
 	const char * const top_usage[] = {
diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
index 4bf805b2fbf6..de5d277d1ad7 100644
--- a/tools/perf/builtin-trace.c
+++ b/tools/perf/builtin-trace.c
@@ -1518,7 +1518,8 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist)
 		return -ENOMEM;
 
 	err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target,
-					    evlist->threads, trace__tool_process, false);
+					    evlist->threads, trace__tool_process, false,
+					    trace->opts.proc_map_timeout);
 	if (err)
 		symbol__exit();
 
@@ -2747,6 +2748,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 			.user_interval = ULLONG_MAX,
 			.no_buffering  = true,
 			.mmap_pages    = UINT_MAX,
+			.proc_map_timeout  = 500,
 		},
 		.output = stdout,
 		.show_comm = true,
@@ -2796,6 +2798,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
 		     "Trace pagefaults", parse_pagefaults, "maj"),
 	OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"),
 	OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"),
+	OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout,
+			"per thread proc mmap processing timeout in ms"),
 	OPT_END()
 	};
 	const char * const trace_subcommands[] = { "record", NULL };
diff --git a/tools/perf/perf.h b/tools/perf/perf.h
index aa79fb8a16d4..4a5827fff799 100644
--- a/tools/perf/perf.h
+++ b/tools/perf/perf.h
@@ -69,6 +69,7 @@ struct record_opts {
 	unsigned     initial_delay;
 	bool         use_clockid;
 	clockid_t    clockid;
+	unsigned int proc_map_timeout;
 };
 
 struct option;
diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c
index e2a432b67d52..22f8a00446e1 100644
--- a/tools/perf/tests/code-reading.c
+++ b/tools/perf/tests/code-reading.c
@@ -451,7 +451,7 @@ static int do_test_code_reading(bool try_kcore)
 	}
 
 	ret = perf_event__synthesize_thread_map(NULL, threads,
-						perf_event__process, machine, false);
+						perf_event__process, machine, false, 500);
 	if (ret < 0) {
 		pr_debug("perf_event__synthesize_thread_map failed\n");
 		goto out_err;
diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c
index 9b748e1ad46e..40b36c462427 100644
--- a/tools/perf/tests/dwarf-unwind.c
+++ b/tools/perf/tests/dwarf-unwind.c
@@ -28,7 +28,7 @@ static int init_live_machine(struct machine *machine)
 	pid_t pid = getpid();
 
 	return perf_event__synthesize_mmap_events(NULL, &event, pid, pid,
-						  mmap_handler, machine, true);
+						  mmap_handler, machine, true, 500);
 }
 
 #define MAX_STACK 8
diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c
index 264e215c0d36..7f48efa7e295 100644
--- a/tools/perf/tests/mmap-thread-lookup.c
+++ b/tools/perf/tests/mmap-thread-lookup.c
@@ -129,7 +129,7 @@ static int synth_all(struct machine *machine)
 {
 	return perf_event__synthesize_threads(NULL,
 					      perf_event__process,
-					      machine, 0);
+					      machine, 0, 500);
 }
 
 static int synth_process(struct machine *machine)
@@ -141,7 +141,7 @@ static int synth_process(struct machine *machine)
 
 	err = perf_event__synthesize_thread_map(NULL, map,
 						perf_event__process,
-						machine, 0);
+						machine, 0, 500);
 
 	thread_map__delete(map);
 	return err;
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 416ba80c628f..d7d986d8f23e 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -213,19 +213,19 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
 	return 0;
 }
 
-#define PROC_MAP_PARSE_TIMEOUT	(500 * 1000000ULL)
-
 int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       union perf_event *event,
 				       pid_t pid, pid_t tgid,
 				       perf_event__handler_t process,
 				       struct machine *machine,
-				       bool mmap_data)
+				       bool mmap_data,
+				       unsigned int proc_map_timeout)
 {
 	char filename[PATH_MAX];
 	FILE *fp;
 	unsigned long long t;
 	bool truncation = false;
+	unsigned long long timeout = proc_map_timeout * 1000000ULL;
 	int rc = 0;
 
 	if (machine__is_default_guest(machine))
@@ -258,8 +258,11 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 		if (fgets(bf, sizeof(bf), fp) == NULL)
 			break;
 
-		if ((rdclock() - t) > PROC_MAP_PARSE_TIMEOUT) {
-			pr_warning("Reading %s time out.\n", filename);
+		if ((rdclock() - t) > timeout) {
+			pr_warning("Reading %s time out. "
+				   "You may want to increase "
+				   "the time limit by --proc-map-timeout\n",
+				   filename);
 			truncation = true;
 			goto out;
 		}
@@ -404,7 +407,9 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 				      pid_t pid, int full,
 					  perf_event__handler_t process,
 				      struct perf_tool *tool,
-				      struct machine *machine, bool mmap_data)
+				      struct machine *machine,
+				      bool mmap_data,
+				      unsigned int proc_map_timeout)
 {
 	char filename[PATH_MAX];
 	DIR *tasks;
@@ -421,7 +426,8 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 			return -1;
 
 		return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-							  process, machine, mmap_data);
+							  process, machine, mmap_data,
+							  proc_map_timeout);
 	}
 
 	if (machine__is_default_guest(machine))
@@ -462,7 +468,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
 		if (_pid == pid) {
 			/* process the parent's maps too */
 			rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid,
-						process, machine, mmap_data);
+						process, machine, mmap_data, proc_map_timeout);
 			if (rc)
 				break;
 		}
@@ -476,7 +482,8 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 				      struct thread_map *threads,
 				      perf_event__handler_t process,
 				      struct machine *machine,
-				      bool mmap_data)
+				      bool mmap_data,
+				      unsigned int proc_map_timeout)
 {
 	union perf_event *comm_event, *mmap_event, *fork_event;
 	int err = -1, thread, j;
@@ -499,7 +506,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 					       fork_event,
 					       threads->map[thread], 0,
 					       process, tool, machine,
-					       mmap_data)) {
+					       mmap_data, proc_map_timeout)) {
 			err = -1;
 			break;
 		}
@@ -525,7 +532,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool,
 						       fork_event,
 						       comm_event->comm.pid, 0,
 						       process, tool, machine,
-						       mmap_data)) {
+						       mmap_data, proc_map_timeout)) {
 				err = -1;
 				break;
 			}
@@ -542,7 +549,9 @@ out:
 
 int perf_event__synthesize_threads(struct perf_tool *tool,
 				   perf_event__handler_t process,
-				   struct machine *machine, bool mmap_data)
+				   struct machine *machine,
+				   bool mmap_data,
+				   unsigned int proc_map_timeout)
 {
 	DIR *proc;
 	char proc_path[PATH_MAX];
@@ -582,7 +591,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool,
  		 * one thread couldn't be synthesized.
  		 */
 		__event__synthesize_thread(comm_event, mmap_event, fork_event, pid,
-					   1, process, tool, machine, mmap_data);
+					   1, process, tool, machine, mmap_data,
+					   proc_map_timeout);
 	}
 
 	err = 0;
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 39868f529cab..c53f36384b64 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -384,10 +384,12 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool,
 int perf_event__synthesize_thread_map(struct perf_tool *tool,
 				      struct thread_map *threads,
 				      perf_event__handler_t process,
-				      struct machine *machine, bool mmap_data);
+				      struct machine *machine, bool mmap_data,
+				      unsigned int proc_map_timeout);
 int perf_event__synthesize_threads(struct perf_tool *tool,
 				   perf_event__handler_t process,
-				   struct machine *machine, bool mmap_data);
+				   struct machine *machine, bool mmap_data,
+				   unsigned int proc_map_timeout);
 int perf_event__synthesize_kernel_mmap(struct perf_tool *tool,
 				       perf_event__handler_t process,
 				       struct machine *machine);
@@ -469,7 +471,8 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 				       pid_t pid, pid_t tgid,
 				       perf_event__handler_t process,
 				       struct machine *machine,
-				       bool mmap_data);
+				       bool mmap_data,
+				       unsigned int proc_map_timeout);
 
 size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp);
 size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp);
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 8b3b1937cb9e..4744673aff1b 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1913,12 +1913,13 @@ int machines__for_each_thread(struct machines *machines,
 
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
-				  perf_event__handler_t process, bool data_mmap)
+				  perf_event__handler_t process, bool data_mmap,
+				  unsigned int proc_map_timeout)
 {
 	if (target__has_task(target))
-		return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap);
+		return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout);
 	else if (target__has_cpu(target))
-		return perf_event__synthesize_threads(tool, process, machine, data_mmap);
+		return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout);
 	/* command specified */
 	return 0;
 }
diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h
index cea62f6fb144..887798e511e9 100644
--- a/tools/perf/util/machine.h
+++ b/tools/perf/util/machine.h
@@ -222,13 +222,16 @@ int machines__for_each_thread(struct machines *machines,
 
 int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool,
 				  struct target *target, struct thread_map *threads,
-				  perf_event__handler_t process, bool data_mmap);
+				  perf_event__handler_t process, bool data_mmap,
+				  unsigned int proc_map_timeout);
 static inline
 int machine__synthesize_threads(struct machine *machine, struct target *target,
-				struct thread_map *threads, bool data_mmap)
+				struct thread_map *threads, bool data_mmap,
+				unsigned int proc_map_timeout)
 {
 	return __machine__synthesize_threads(machine, NULL, target, threads,
-					     perf_event__process, data_mmap);
+					     perf_event__process, data_mmap,
+					     proc_map_timeout);
 }
 
 pid_t machine__get_current_tid(struct machine *machine, int cpu);
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index 2d882fd1f1b9..aa482c10469d 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -1368,7 +1368,9 @@ static void perf_session__warn_about_errors(const struct perf_session *session)
 			    "not processed, if there are samples for addresses they\n"
 			    "will not be resolved, you may find out which are these\n"
 			    "threads by running with -v and redirecting the output\n"
-			    "to a file.\n",
+			    "to a file.\n"
+			    "The time limit to process proc map is too short?\n"
+			    "Increase it by --proc-map-timeout\n",
 			    stats->nr_proc_map_timeout);
 	}
 }