perf/core improvements and fixes:

New features: . Add --dry-run option to 'perf record' to check if command line options can be parsed, but not doing any recording (Wang Nan) . Allow dumping the object files generated by llvm when processing eBPF scriptlet events (Wang Nan) - Add stackcollapse.py script to help generating flame graphs (Paolo Bonzini) Documentation: . Fix 'perf script' documentation of '-f' when it should be '-F' (Adrian Hunter) Infrastructure: - Fix write_backwards fallback when using a new tool on older kernels without support for this feature (Arnaldo Carvalho de Melo) - Remove some leftovers from the initial codebase copying from git (Arnaldo Carvalho de Melo) - List libelf-devel as an alternative, as this is how the libelf development package is called on OpenSuSE (Jean Delvare) - Rename __hists__add_entry to hists__add_entry (Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> -----BEGIN PGP SIGNATURE----- Version: GnuPG v2 iQIcBAABCAAGBQJXaWpiAAoJENZQFvNTUqpANWUQAKeRL3CDlyRfekNCOMNDzZo7 7/SeIcDZQwYR5/lHCk3/2Sy3QFtzAj0bEztYxWSL9L/NsNmW7mwaisYlh07UqSMJ idVehJ53bLXQjNirWmu0pfv0U/dVqH5YsLEgLqpwDgt8aN33m+F6/Z67vnxYEDbk mRYd+KcdShaY/gpwsS+S9ewb8A+CLuILOMQuqEU4BXD1zayg+v+H8ciNT8ai0HQm g9bpA9w6EICfx/aAylHnGNcaMrCl35cUm8cQaqkplqEf/DtUVewY7rt/ZvNj66mX T+CTPWh/Wt5bYjOYd1wF8YG7w1JqCIuEkkMzfqDdiVmv/PfTtNT9ihpF1gwo4Xj1 RZwkhPb1sNCfoLTdPIJTnP61+GWQ3Cqwl/jUnmvpDNAS9ehgLVotdw/dP6D6Loyq pQmQCtu/Ir4PeXwFS8feMoGk0bikAPpbv4HeY7k9JNOSkAQPO11inBHnupxbah9w 4FQE5yaxTuouoS8JNjAA28BqWjbpodKcvLXTl/hnVs+MWI95chaN1FlQJRU81Oaf 9Vf65gYU6iA0SLrLXBo1+4qBd3usFmNAoSyjFw0e+38O+8BNH6YIy7LlU2USFjtZ m8Xsoq2DlEKiO0iWXXLIKrZFYzFlZOl7cUQeqdC6Ho6okS+C+ougAaNxZrRU8fCE TmtWQrFrTPOQHNW8o35d =zbKX -----END PGP SIGNATURE----- Merge tag 'perf-core-for-mingo-20160621' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux into perf/core Pull perf/core improvements and fixes from Arnaldo Carvalho de Melo: New features: - Add --dry-run option to 'perf record' to check if command line options can be parsed, but not doing any recording (Wang Nan) - Allow dumping the object files generated by llvm when processing eBPF scriptlet events (Wang Nan) - Add stackcollapse.py script to help generating flame graphs (Paolo Bonzini) Documentation changes: - Fix 'perf script' documentation of '-f' when it should be '-F' (Adrian Hunter) Infrastructure changes: - Fix write_backwards fallback when using a new tool on older kernels without support for this feature (Arnaldo Carvalho de Melo) - Remove some leftovers from the initial codebase copying from git (Arnaldo Carvalho de Melo) - List libelf-devel as an alternative, as this is how the libelf development package is called on OpenSuSE (Jean Delvare) - Rename __hists__add_entry to hists__add_entry (Jiri Olsa) Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com> Signed-off-by: Ingo Molnar <mingo@kernel.org>
2016-06-22 09:34:19 +02:00 · 2016-06-22 09:34:19 +02:00 · 4330b439bb
parent 02469a9509 6745d8ea82
commit 4330b439bb
20 changed files with 251 additions and 173 deletions
--- a/2
+++ b/2
@ -1038,7 +1038,7 @@ ifdef CONFIG_STACK_VALIDATION
  ifeq ($(has_libelf),1)
    objtool_target := tools/objtool FORCE
  else
-    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev or elfutils-libelf-devel")
+    $(warning "Cannot use CONFIG_STACK_VALIDATION, please install libelf-dev, libelf-devel or elfutils-libelf-devel")
    SKIP_STACK_VALIDATION := 1
    export SKIP_STACK_VALIDATION
  endif
--- a/tools/perf/Documentation/perf-record.txt
+++ b/tools/perf/Documentation/perf-record.txt
@ -360,6 +360,13 @@ particular perf.data snapshot should be kept or not.

 Implies --timestamp-filename, --no-buildid and --no-buildid-cache.

+--dry-run::
+Parse options then exit. --dry-run can be used to detect errors in cmdline
+options.
+
+'perf record --dry-run -e' can act as a BPF script compiler if llvm.dump-obj
+in config file is set to true.
+
 SEE ALSO
 --------
 linkperf:perf-stat[1], linkperf:perf-list[1]
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@ -119,13 +119,13 @@ OPTIONS
 	srcline, period, iregs, brstack, brstacksym, flags.
        Field list can be prepended with the type, trace, sw or hw,
        to indicate to which event type the field list applies.
-        e.g., -f sw:comm,tid,time,ip,sym  and -f trace:time,cpu,trace
+        e.g., -F sw:comm,tid,time,ip,sym  and -F trace:time,cpu,trace

-		perf script -f <fields>
+		perf script -F <fields>

 	is equivalent to:

-		perf script -f trace:<fields> -f sw:<fields> -f hw:<fields>
+		perf script -F trace:<fields> -F sw:<fields> -F hw:<fields>

 	i.e., the specified fields apply to all event types if the type string
 	is not given.
@ -133,9 +133,9 @@ OPTIONS
 	The arguments are processed in the order received. A later usage can
 	reset a prior request. e.g.:

-		-f trace: -f comm,tid,time,ip,sym
+		-F trace: -F comm,tid,time,ip,sym

-	The first -f suppresses trace events (field list is ""), but then the
+	The first -F suppresses trace events (field list is ""), but then the
 	second invocation sets the fields to comm,tid,time,ip,sym. In this case a
 	warning is given to the user:

@ -143,9 +143,9 @@ OPTIONS

 	Alternatively, consider the order:

-		-f comm,tid,time,ip,sym -f trace:
+		-F comm,tid,time,ip,sym -F trace:

-	The first -f sets the fields for all events and the second -f
+	The first -F sets the fields for all events and the second -F
 	suppresses trace events. The user is given a warning message about
 	the override, and the result of the above is that only S/W and H/W
 	events are displayed with the given fields.
@ -154,14 +154,14 @@ OPTIONS
 	event type, a message is displayed to the user that the option is
 	ignored for that type. For example:

-		$ perf script -f comm,tid,trace
+		$ perf script -F comm,tid,trace
 		'trace' not valid for hardware events. Ignoring.
 		'trace' not valid for software events. Ignoring.

 	Alternatively, if the type is given an invalid field is specified it
 	is an error. For example:

-        perf script -v -f sw:comm,tid,trace
+        perf script -v -F sw:comm,tid,trace
        'trace' not valid for software events.

 	At this point usage is displayed, and perf-script exits.
@ -173,7 +173,7 @@ OPTIONS
 	respectively.

 	Finally, a user may not set fields to none for all event types.
-	i.e., -f "" is not allowed.
+	i.e., -F "" is not allowed.

 	The brstack output includes branch related information with raw addresses using the
 	/v/v/v/v/ syntax in the following order:
--- a/tools/perf/builtin-annotate.c
+++ b/tools/perf/builtin-annotate.c
@ -75,7 +75,7 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel,
 	sample->period = 1;
 	sample->weight = 1;

-	he = __hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
+	he = hists__add_entry(hists, al, NULL, NULL, NULL, sample, true);
 	if (he == NULL)
 		return -ENOMEM;

--- a/tools/perf/builtin-diff.c
+++ b/tools/perf/builtin-diff.c
@ -310,16 +310,6 @@ static int formula_fprintf(struct hist_entry *he, struct hist_entry *pair,
 	return -1;
 }

-static int hists__add_entry(struct hists *hists,
-			    struct addr_location *al,
-			    struct perf_sample *sample)
-{
-	if (__hists__add_entry(hists, al, NULL, NULL, NULL,
-			       sample, true) != NULL)
-		return 0;
-	return -ENOMEM;
-}
-
 static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 				      union perf_event *event,
 				      struct perf_sample *sample,
@ -336,7 +326,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused,
 		return -1;
 	}

-	if (hists__add_entry(hists, &al, sample)) {
+	if (!hists__add_entry(hists, &al, NULL, NULL, NULL, sample, true)) {
 		pr_warning("problem incrementing symbol period, skipping event\n");
 		goto out_put;
 	}
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@ -1274,6 +1274,8 @@ static struct record record = {
 const char record_callchain_help[] = CALLCHAIN_RECORD_HELP
 	"\n\t\t\t\tDefault: fp";

+static bool dry_run;
+
 /*
 * XXX Will stay a global variable till we fix builtin-script.c to stop messing
 * with it and switch to use the library functions in perf_evlist that came
@ -1393,6 +1395,8 @@ struct option __record_options[] = {
 		    "append timestamp to output filename"),
 	OPT_BOOLEAN(0, "switch-output", &record.switch_output,
 		    "Switch output when receive SIGUSR2"),
+	OPT_BOOLEAN(0, "dry-run", &dry_run,
+		    "Parse options then exit"),
 	OPT_END()
 };

@ -1462,6 +1466,9 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused)
 	if (err)
 		return err;

+	if (dry_run)
+		return 0;
+
 	err = bpf__setup_stdout(rec->evlist);
 	if (err) {
 		bpf__strerror_setup_stdout(rec->evlist, err, errbuf, sizeof(errbuf));
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@ -257,7 +257,7 @@ else
      LIBC_SUPPORT := 1
    endif
    ifeq ($(LIBC_SUPPORT),1)
-      msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install elfutils-libelf-devel/libelf-dev);
+      msg := $(warning No libelf found, disables 'probe' tool and BPF support in 'perf record', please install libelf-dev, libelf-devel or elfutils-libelf-devel);

      NO_LIBELF := 1
      NO_DWARF := 1
--- a/tools/perf/perf.c
+++ b/tools/perf/perf.c
@ -139,8 +139,6 @@ struct option options[] = {
 	OPT_ARGUMENT("html-path", "html-path"),
 	OPT_ARGUMENT("paginate", "paginate"),
 	OPT_ARGUMENT("no-pager", "no-pager"),
-	OPT_ARGUMENT("perf-dir", "perf-dir"),
-	OPT_ARGUMENT("work-tree", "work-tree"),
 	OPT_ARGUMENT("debugfs-dir", "debugfs-dir"),
 	OPT_ARGUMENT("buildid-dir", "buildid-dir"),
 	OPT_ARGUMENT("list-cmds", "list-cmds"),
@ -200,35 +198,6 @@ static int handle_options(const char ***argv, int *argc, int *envchanged)
 			use_pager = 0;
 			if (envchanged)
 				*envchanged = 1;
-		} else if (!strcmp(cmd, "--perf-dir")) {
-			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --perf-dir.\n");
-				usage(perf_usage_string);
-			}
-			setenv(PERF_DIR_ENVIRONMENT, (*argv)[1], 1);
-			if (envchanged)
-				*envchanged = 1;
-			(*argv)++;
-			(*argc)--;
-			handled++;
-		} else if (!prefixcmp(cmd, CMD_PERF_DIR)) {
-			setenv(PERF_DIR_ENVIRONMENT, cmd + strlen(CMD_PERF_DIR), 1);
-			if (envchanged)
-				*envchanged = 1;
-		} else if (!strcmp(cmd, "--work-tree")) {
-			if (*argc < 2) {
-				fprintf(stderr, "No directory given for --work-tree.\n");
-				usage(perf_usage_string);
-			}
-			setenv(PERF_WORK_TREE_ENVIRONMENT, (*argv)[1], 1);
-			if (envchanged)
-				*envchanged = 1;
-			(*argv)++;
-			(*argc)--;
-		} else if (!prefixcmp(cmd, CMD_WORK_TREE)) {
-			setenv(PERF_WORK_TREE_ENVIRONMENT, cmd + strlen(CMD_WORK_TREE), 1);
-			if (envchanged)
-				*envchanged = 1;
 		} else if (!strcmp(cmd, "--debugfs-dir")) {
 			if (*argc < 2) {
 				fprintf(stderr, "No directory given for --debugfs-dir.\n");
@ -363,11 +332,6 @@ const char perf_version_string[] = PERF_VERSION;

 #define RUN_SETUP	(1<<0)
 #define USE_PAGER	(1<<1)
-/*
- * require working tree to be present -- anything uses this needs
- * RUN_SETUP for reading from the configuration file.
- */
-#define NEED_WORK_TREE	(1<<2)

 static int run_builtin(struct cmd_struct *p, int argc, const char **argv)
 {
--- a/tools/perf/scripts/python/bin/stackcollapse-record
+++ b/tools/perf/scripts/python/bin/stackcollapse-record
@ -0,0 +1,8 @@
+#!/bin/sh
+
+#
+# stackcollapse.py can cover all type of perf samples including
+# the tracepoints, so no special record requirements, just record what
+# you want to analyze.
+#
+perf record "$@"
--- a/tools/perf/scripts/python/bin/stackcollapse-report
+++ b/tools/perf/scripts/python/bin/stackcollapse-report
@ -0,0 +1,3 @@
+#!/bin/sh
+# description: produce callgraphs in short form for scripting use
+perf script -s "$PERF_EXEC_PATH"/scripts/python/stackcollapse.py -- "$@"
--- a/tools/perf/scripts/python/stackcollapse.py
+++ b/tools/perf/scripts/python/stackcollapse.py
@ -0,0 +1,127 @@
+#!/usr/bin/perl -w
+#
+# stackcollapse.py - format perf samples with one line per distinct call stack
+#
+# This script's output has two space-separated fields.  The first is a semicolon
+# separated stack including the program name (from the "comm" field) and the
+# function names from the call stack.  The second is a count:
+#
+#  swapper;start_kernel;rest_init;cpu_idle;default_idle;native_safe_halt 2
+#
+# The file is sorted according to the first field.
+#
+# Input may be created and processed using:
+#
+#  perf record -a -g -F 99 sleep 60
+#  perf script report stackcollapse > out.stacks-folded
+#
+# (perf script record stackcollapse works too).
+#
+# Written by Paolo Bonzini <pbonzini@redhat.com>
+# Based on Brendan Gregg's stackcollapse-perf.pl script.
+
+import os
+import sys
+from collections import defaultdict
+from optparse import OptionParser, make_option
+
+sys.path.append(os.environ['PERF_EXEC_PATH'] + \
+                '/scripts/python/Perf-Trace-Util/lib/Perf/Trace')
+
+from perf_trace_context import *
+from Core import *
+from EventClass import *
+
+# command line parsing
+
+option_list = [
+    # formatting options for the bottom entry of the stack
+    make_option("--include-tid", dest="include_tid",
+                 action="store_true", default=False,
+                 help="include thread id in stack"),
+    make_option("--include-pid", dest="include_pid",
+                 action="store_true", default=False,
+                 help="include process id in stack"),
+    make_option("--no-comm", dest="include_comm",
+                 action="store_false", default=True,
+                 help="do not separate stacks according to comm"),
+    make_option("--tidy-java", dest="tidy_java",
+                 action="store_true", default=False,
+                 help="beautify Java signatures"),
+    make_option("--kernel", dest="annotate_kernel",
+                 action="store_true", default=False,
+                 help="annotate kernel functions with _[k]")
+]
+
+parser = OptionParser(option_list=option_list)
+(opts, args) = parser.parse_args()
+
+if len(args) != 0:
+    parser.error("unexpected command line argument")
+if opts.include_tid and not opts.include_comm:
+    parser.error("requesting tid but not comm is invalid")
+if opts.include_pid and not opts.include_comm:
+    parser.error("requesting pid but not comm is invalid")
+
+# event handlers
+
+lines = defaultdict(lambda: 0)
+
+def process_event(param_dict):
+    def tidy_function_name(sym, dso):
+        if sym is None:
+            sym = '[unknown]'
+
+        sym = sym.replace(';', ':')
+        if opts.tidy_java:
+            # the original stackcollapse-perf.pl script gives the
+            # example of converting this:
+            #    Lorg/mozilla/javascript/MemberBox;.<init>(Ljava/lang/reflect/Method;)V
+            # to this:
+            #    org/mozilla/javascript/MemberBox:.init
+            sym = sym.replace('<', '')
+            sym = sym.replace('>', '')
+            if sym[0] == 'L' and sym.find('/'):
+                sym = sym[1:]
+            try:
+                sym = sym[:sym.index('(')]
+            except ValueError:
+                pass
+
+        if opts.annotate_kernel and dso == '[kernel.kallsyms]':
+            return sym + '_[k]'
+        else:
+            return sym
+
+    stack = list()
+    if 'callchain' in param_dict:
+        for entry in param_dict['callchain']:
+            entry.setdefault('sym', dict())
+            entry['sym'].setdefault('name', None)
+            entry.setdefault('dso', None)
+            stack.append(tidy_function_name(entry['sym']['name'],
+                                            entry['dso']))
+    else:
+        param_dict.setdefault('symbol', None)
+        param_dict.setdefault('dso', None)
+        stack.append(tidy_function_name(param_dict['symbol'],
+                                        param_dict['dso']))
+
+    if opts.include_comm:
+        comm = param_dict["comm"].replace(' ', '_')
+        sep = "-"
+        if opts.include_pid:
+            comm = comm + sep + str(param_dict['sample']['pid'])
+            sep = "/"
+        if opts.include_tid:
+            comm = comm + sep + str(param_dict['sample']['tid'])
+        stack.append(comm)
+
+    stack_string = ';'.join(reversed(stack))
+    lines[stack_string] = lines[stack_string] + 1
+
+def trace_end():
+    list = lines.keys()
+    list.sort()
+    for stack in list:
+        print "%s %d" % (stack, lines[stack])
--- a/tools/perf/tests/hists_link.c
+++ b/tools/perf/tests/hists_link.c
@ -84,7 +84,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;

-			he = __hists__add_entry(hists, &al, NULL,
+			he = hists__add_entry(hists, &al, NULL,
 						NULL, NULL, &sample, true);
 			if (he == NULL) {
 				addr_location__put(&al);
@ -103,7 +103,7 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine)
 			if (machine__resolve(machine, &al, &sample) < 0)
 				goto out;

-			he = __hists__add_entry(hists, &al, NULL,
+			he = hists__add_entry(hists, &al, NULL,
 						NULL, NULL, &sample, true);
 			if (he == NULL) {
 				addr_location__put(&al);
--- a/tools/perf/util/cache.h
+++ b/tools/perf/util/cache.h
@ -11,14 +11,9 @@
 #include <linux/string.h>

 #define CMD_EXEC_PATH "--exec-path"
-#define CMD_PERF_DIR "--perf-dir="
-#define CMD_WORK_TREE "--work-tree="
 #define CMD_DEBUGFS_DIR "--debugfs-dir="

-#define PERF_DIR_ENVIRONMENT "PERF_DIR"
-#define PERF_WORK_TREE_ENVIRONMENT "PERF_WORK_TREE"
 #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH"
-#define DEFAULT_PERF_DIR_ENVIRONMENT ".perf"
 #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR"
 #define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR"
 #define PERF_PAGER_ENVIRONMENT "PERF_PAGER"
@ -32,7 +27,6 @@ int perf_config_int(const char *, const char *);
 u64 perf_config_u64(const char *, const char *);
 int perf_config_bool(const char *, const char *);
 int config_error_nonbool(const char *);
-const char *perf_config_dirname(const char *, const char *);
 const char *perf_etc_perfconfig(void);

 char *alias_lookup(const char *alias);
@ -45,9 +39,6 @@ static inline int is_absolute_path(const char *path)
 	return path[0] == '/';
 }

-char *strip_path_suffix(const char *path, const char *suffix);
-
 char *mkpath(const char *fmt, ...) __attribute__((format (printf, 1, 2)));
-char *perf_path(const char *fmt, ...) __attribute__((format (printf, 1, 2)));

 #endif /* __PERF_CACHE_H */
--- a/tools/perf/util/config.c
+++ b/tools/perf/util/config.c
@ -372,7 +372,7 @@ int perf_config_bool(const char *name, const char *value)
 	return !!perf_config_bool_or_int(name, value, &discard);
 }

-const char *perf_config_dirname(const char *name, const char *value)
+static const char *perf_config_dirname(const char *name, const char *value)
 {
 	if (!name)
 		return NULL;
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@ -1389,8 +1389,11 @@ fallback_missing_features:
 	if (perf_missing_features.lbr_flags)
 		evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
 				     PERF_SAMPLE_BRANCH_NO_CYCLES);
-	if (perf_missing_features.write_backward)
+	if (perf_missing_features.write_backward) {
+		if (evsel->overwrite)
+			return -EINVAL;
 		evsel->attr.write_backward = false;
+	}
 retry_sample_id:
 	if (perf_missing_features.sample_id_all)
 		evsel->attr.sample_id_all = 0;
@ -1453,12 +1456,6 @@ retry_open:
 				err = -EINVAL;
 				goto out_close;
 			}
-
-			if (evsel->overwrite &&
-			    perf_missing_features.write_backward) {
-				err = -EINVAL;
-				goto out_close;
-			}
 		}
 	}

@ -1496,7 +1493,10 @@ try_fallback:
 	 * Must probe features in the order they were added to the
 	 * perf_event_attr interface.
 	 */
-	if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
+	if (!perf_missing_features.write_backward && evsel->attr.write_backward) {
+		perf_missing_features.write_backward = true;
+		goto fallback_missing_features;
+	} else if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) {
 		perf_missing_features.clockid_wrong = true;
 		goto fallback_missing_features;
 	} else if (!perf_missing_features.clockid && evsel->attr.use_clockid) {
@ -1521,12 +1521,7 @@ try_fallback:
 			  PERF_SAMPLE_BRANCH_NO_FLAGS))) {
 		perf_missing_features.lbr_flags = true;
 		goto fallback_missing_features;
-	} else if (!perf_missing_features.write_backward &&
-			evsel->attr.write_backward) {
-		perf_missing_features.write_backward = true;
-		goto fallback_missing_features;
 	}
-
 out_close:
 	do {
 		while (--thread >= 0) {
@ -2409,6 +2404,8 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target,
 	"We found oprofile daemon running, please stop it and try again.");
 		break;
 	case EINVAL:
+		if (evsel->overwrite && perf_missing_features.write_backward)
+			return scnprintf(msg, size, "Reading from overwrite event is not supported by this kernel.");
 		if (perf_missing_features.clockid)
 			return scnprintf(msg, size, "clockid feature not supported.");
 		if (perf_missing_features.clockid_wrong)
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@ -531,13 +531,13 @@ out:
 	return he;
 }

-struct hist_entry *__hists__add_entry(struct hists *hists,
-				      struct addr_location *al,
-				      struct symbol *sym_parent,
-				      struct branch_info *bi,
-				      struct mem_info *mi,
-				      struct perf_sample *sample,
-				      bool sample_self)
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *sym_parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self)
 {
 	struct hist_entry entry = {
 		.thread	= al->thread,
@ -622,8 +622,8 @@ iter_add_single_mem_entry(struct hist_entry_iter *iter, struct addr_location *al
 	 */
 	sample->period = cost;

-	he = __hists__add_entry(hists, al, iter->parent, NULL, mi,
-				sample, true);
+	he = hists__add_entry(hists, al, iter->parent, NULL, mi,
+			      sample, true);
 	if (!he)
 		return -ENOMEM;

@ -727,8 +727,8 @@ iter_add_next_branch_entry(struct hist_entry_iter *iter, struct addr_location *a
 	sample->period = 1;
 	sample->weight = bi->flags.cycles ? bi->flags.cycles : 1;

-	he = __hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
-				sample, true);
+	he = hists__add_entry(hists, al, iter->parent, &bi[i], NULL,
+			      sample, true);
 	if (he == NULL)
 		return -ENOMEM;

@ -764,8 +764,8 @@ iter_add_single_normal_entry(struct hist_entry_iter *iter, struct addr_location
 	struct perf_sample *sample = iter->sample;
 	struct hist_entry *he;

-	he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
-				sample, true);
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, true);
 	if (he == NULL)
 		return -ENOMEM;

@ -825,8 +825,8 @@ iter_add_single_cumulative_entry(struct hist_entry_iter *iter,
 	struct hist_entry *he;
 	int err = 0;

-	he = __hists__add_entry(hists, al, iter->parent, NULL, NULL,
-				sample, true);
+	he = hists__add_entry(hists, al, iter->parent, NULL, NULL,
+			      sample, true);
 	if (he == NULL)
 		return -ENOMEM;

@ -900,8 +900,8 @@ iter_add_next_cumulative_entry(struct hist_entry_iter *iter,
 		}
 	}

-	he = __hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
-				sample, false);
+	he = hists__add_entry(evsel__hists(evsel), al, iter->parent, NULL, NULL,
+			      sample, false);
 	if (he == NULL)
 		return -ENOMEM;

--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@ -120,13 +120,13 @@ extern const struct hist_iter_ops hist_iter_branch;
 extern const struct hist_iter_ops hist_iter_mem;
 extern const struct hist_iter_ops hist_iter_cumulative;

-struct hist_entry *__hists__add_entry(struct hists *hists,
-				      struct addr_location *al,
-				      struct symbol *parent,
-				      struct branch_info *bi,
-				      struct mem_info *mi,
-				      struct perf_sample *sample,
-				      bool sample_self);
+struct hist_entry *hists__add_entry(struct hists *hists,
+				    struct addr_location *al,
+				    struct symbol *parent,
+				    struct branch_info *bi,
+				    struct mem_info *mi,
+				    struct perf_sample *sample,
+				    bool sample_self);
 int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al,
 			 int max_stack_depth, void *arg);

--- a/tools/perf/util/llvm-utils.c
+++ b/tools/perf/util/llvm-utils.c
@ -42,6 +42,8 @@ int perf_llvm_config(const char *var, const char *value)
 		llvm_param.kbuild_dir = strdup(value);
 	else if (!strcmp(var, "kbuild-opts"))
 		llvm_param.kbuild_opts = strdup(value);
+	else if (!strcmp(var, "dump-obj"))
+		llvm_param.dump_obj = !!perf_config_bool(var, value);
 	else
 		return -1;
 	llvm_param.user_set_param = true;
@ -326,6 +328,42 @@ get_kbuild_opts(char **kbuild_dir, char **kbuild_include_opts)
 	pr_debug("include option is set to %s\n", *kbuild_include_opts);
 }

+static void
+dump_obj(const char *path, void *obj_buf, size_t size)
+{
+	char *obj_path = strdup(path);
+	FILE *fp;
+	char *p;
+
+	if (!obj_path) {
+		pr_warning("WARNING: No enough memory, skip object dumping\n");
+		return;
+	}
+
+	p = strrchr(obj_path, '.');
+	if (!p || (strcmp(p, ".c") != 0)) {
+		pr_warning("WARNING: invalid llvm source path: '%s', skip object dumping\n",
+			   obj_path);
+		goto out;
+	}
+
+	p[1] = 'o';
+	fp = fopen(obj_path, "wb");
+	if (!fp) {
+		pr_warning("WARNING: failed to open '%s': %s, skip object dumping\n",
+			   obj_path, strerror(errno));
+		goto out;
+	}
+
+	pr_info("LLVM: dumping %s\n", obj_path);
+	if (fwrite(obj_buf, size, 1, fp) != 1)
+		pr_warning("WARNING: failed to write to file '%s': %s, skip object dumping\n",
+			   obj_path, strerror(errno));
+	fclose(fp);
+out:
+	free(obj_path);
+}
+
 int llvm__compile_bpf(const char *path, void **p_obj_buf,
 		      size_t *p_obj_buf_sz)
 {
@ -411,6 +449,10 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf,

 	free(kbuild_dir);
 	free(kbuild_include_opts);
+
+	if (llvm_param.dump_obj)
+		dump_obj(path, obj_buf, obj_buf_sz);
+
 	if (!p_obj_buf)
 		free(obj_buf);
 	else
--- a/tools/perf/util/llvm-utils.h
+++ b/tools/perf/util/llvm-utils.h
@ -29,6 +29,11 @@ struct llvm_param {
 	 * compiling. Should not be used for dynamic compiling.
 	 */
 	const char *kbuild_opts;
+	/*
+	 * Default is false. If set to true, write compiling result
+	 * to object file.
+	 */
+	bool dump_obj;
 	/*
 	 * Default is false. If one of the above fields is set by user
 	 * explicitly then user_set_llvm is set to true. This is used
--- a/tools/perf/util/path.c
+++ b/tools/perf/util/path.c
@ -14,14 +14,8 @@

 static char bad_path[] = "/bad-path/";
 /*
- * Two hacks:
+ * One hack:
 */
-
-static const char *get_perf_dir(void)
-{
-	return ".";
-}
-
 static char *get_pathname(void)
 {
 	static char pathname_array[4][PATH_MAX];
@ -54,60 +48,3 @@ char *mkpath(const char *fmt, ...)
 		return bad_path;
 	return cleanup_path(pathname);
 }
-
-char *perf_path(const char *fmt, ...)
-{
-	const char *perf_dir = get_perf_dir();
-	char *pathname = get_pathname();
-	va_list args;
-	unsigned len;
-
-	len = strlen(perf_dir);
-	if (len > PATH_MAX-100)
-		return bad_path;
-	memcpy(pathname, perf_dir, len);
-	if (len && perf_dir[len-1] != '/')
-		pathname[len++] = '/';
-	va_start(args, fmt);
-	len += vsnprintf(pathname + len, PATH_MAX - len, fmt, args);
-	va_end(args);
-	if (len >= PATH_MAX)
-		return bad_path;
-	return cleanup_path(pathname);
-}
-
-/* strip arbitrary amount of directory separators at end of path */
-static inline int chomp_trailing_dir_sep(const char *path, int len)
-{
-	while (len && is_dir_sep(path[len - 1]))
-		len--;
-	return len;
-}
-
-/*
- * If path ends with suffix (complete path components), returns the
- * part before suffix (sans trailing directory separators).
- * Otherwise returns NULL.
- */
-char *strip_path_suffix(const char *path, const char *suffix)
-{
-	int path_len = strlen(path), suffix_len = strlen(suffix);
-
-	while (suffix_len) {
-		if (!path_len)
-			return NULL;
-
-		if (is_dir_sep(path[path_len - 1])) {
-			if (!is_dir_sep(suffix[suffix_len - 1]))
-				return NULL;
-			path_len = chomp_trailing_dir_sep(path, path_len);
-			suffix_len = chomp_trailing_dir_sep(suffix, suffix_len);
-		}
-		else if (path[--path_len] != suffix[--suffix_len])
-			return NULL;
-	}
-
-	if (path_len && !is_dir_sep(path[path_len - 1]))
-		return NULL;
-	return strndup(path, chomp_trailing_dir_sep(path, path_len));
-}