diff --git a/lib/Transforms/Scalar/SampleProfile.cpp b/lib/Transforms/Scalar/SampleProfile.cpp index e3d290194cf..47da32a5f28 100644 --- a/lib/Transforms/Scalar/SampleProfile.cpp +++ b/lib/Transforms/Scalar/SampleProfile.cpp @@ -45,6 +45,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" +#include "llvm/Support/LineIterator.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/raw_ostream.h" @@ -115,6 +116,7 @@ protected: unsigned TotalSamples; /// \brief Total number of samples collected at the head of the function. + /// FIXME: Use head samples to estimate a cold/hot attribute for the function. unsigned TotalHeadSamples; /// \brief Line number for the function header. Used to compute relative @@ -204,6 +206,11 @@ public: return Profiles[F.getName()]; } + /// \brief Report a parse error message and stop compilation. + void reportParseError(int64_t LineNumber, Twine Msg) const { + report_fatal_error(Filename + ":" + Twine(LineNumber) + ": " + Msg + "\n"); + } + protected: /// \brief Map every function to its associated profile. /// @@ -221,63 +228,6 @@ protected: StringRef Filename; }; -/// \brief Loader class for text-based profiles. -/// -/// This class defines a simple interface to read text files containing -/// profiles. It keeps track of line number information and location of -/// the file pointer. Users of this class are responsible for actually -/// parsing the lines returned by the readLine function. -/// -/// TODO - This does not really belong here. It is a generic text file -/// reader. It should be moved to the Support library and made more general. -class ExternalProfileTextLoader { -public: - ExternalProfileTextLoader(StringRef F) : Filename(F) { - error_code EC; - EC = MemoryBuffer::getFile(Filename, Buffer); - if (EC) - report_fatal_error("Could not open profile file " + Filename + ": " + - EC.message()); - FP = Buffer->getBufferStart(); - Lineno = 0; - } - - /// \brief Read a line from the mapped file. - StringRef readLine() { - size_t Length = 0; - const char *start = FP; - while (FP != Buffer->getBufferEnd() && *FP != '\n') { - Length++; - FP++; - } - if (FP != Buffer->getBufferEnd()) - FP++; - Lineno++; - return StringRef(start, Length); - } - - /// \brief Return true, if we've reached EOF. - bool atEOF() const { return FP == Buffer->getBufferEnd(); } - - /// \brief Report a parse error message and stop compilation. - void reportParseError(Twine Msg) const { - report_fatal_error(Filename + ":" + Twine(Lineno) + ": " + Msg + "\n"); - } - -private: - /// \brief Memory buffer holding the text file. - OwningPtr Buffer; - - /// \brief Current position into the memory buffer. - const char *FP; - - /// \brief Current line number. - int64_t Lineno; - - /// \brief Path name where to the profile file. - StringRef Filename; -}; - /// \brief Sample profile pass. /// /// This pass reads profile data from the file specified by @@ -386,77 +336,118 @@ void SampleModuleProfile::dump() { /// \brief Load samples from a text file. /// -/// The file is divided in two segments: +/// The file contains a list of samples for every function executed at +/// runtime. Each function profile has the following format: /// -/// Symbol table (represented with the string "symbol table") -/// Number of symbols in the table -/// symbol 1 -/// symbol 2 +/// function1:total_samples:total_head_samples +/// offset1[.discriminator]: number_of_samples [fn1:num fn2:num ... ] +/// offset2[.discriminator]: number_of_samples [fn3:num fn4:num ... ] /// ... -/// symbol N -/// -/// Function body profiles -/// function1:total_samples:total_head_samples:number_of_locations -/// location_offset_1: number_of_samples -/// location_offset_2: number_of_samples -/// ... -/// location_offset_N: number_of_samples +/// offsetN[.discriminator]: number_of_samples [fn5:num fn6:num ... ] /// /// Function names must be mangled in order for the profile loader to -/// match them in the current translation unit. +/// match them in the current translation unit. The two numbers in the +/// function header specify how many total samples were accumulated in +/// the function (first number), and the total number of samples accumulated +/// at the prologue of the function (second number). This head sample +/// count provides an indicator of how frequent is the function invoked. +/// +/// Each sampled line may contain several items. Some are optional +/// (marked below): +/// +/// a- Source line offset. This number represents the line number +/// in the function where the sample was collected. The line number +/// is always relative to the line where symbol of the function +/// is defined. So, if the function has its header at line 280, +/// the offset 13 is at line 293 in the file. +/// +/// b- [OPTIONAL] Discriminator. This is used if the sampled program +/// was compiled with DWARF discriminator support +/// (http://wiki.dwarfstd.org/index.php?title=Path_Discriminators) +/// This is currently only emitted by GCC and we just ignore it. +/// +/// FIXME: Handle discriminators, since they are needed to distinguish +/// multiple control flow within a single source LOC. +/// +/// c- Number of samples. This is the number of samples collected by +/// the profiler at this source location. +/// +/// d- [OPTIONAL] Potential call targets and samples. If present, this +/// line contains a call instruction. This models both direct and +/// indirect calls. Each called target is listed together with the +/// number of samples. For example, +/// +/// 130: 7 foo:3 bar:2 baz:7 +/// +/// The above means that at relative line offset 130 there is a +/// call instruction that calls one of foo(), bar() and baz(). With +/// baz() being the relatively more frequent call target. +/// +/// FIXME: This is currently unhandled, but it has a lot of +/// potential for aiding the inliner. +/// /// /// Since this is a flat profile, a function that shows up more than /// once gets all its samples aggregated across all its instances. -/// TODO - flat profiles are too imprecise to provide good optimization -/// opportunities. Convert them to context-sensitive profile. +/// +/// FIXME: flat profiles are too imprecise to provide good optimization +/// opportunities. Convert them to context-sensitive profile. /// /// This textual representation is useful to generate unit tests and /// for debugging purposes, but it should not be used to generate /// profiles for large programs, as the representation is extremely /// inefficient. void SampleModuleProfile::loadText() { - ExternalProfileTextLoader Loader(Filename); - - // Read the symbol table. - StringRef Line = Loader.readLine(); - if (Line != "symbol table") - Loader.reportParseError("Expected 'symbol table', found " + Line); - int NumSymbols; - Line = Loader.readLine(); - if (Line.getAsInteger(10, NumSymbols)) - Loader.reportParseError("Expected a number, found " + Line); - for (int I = 0; I < NumSymbols; I++) - Profiles[Loader.readLine()] = SampleFunctionProfile(); + OwningPtr Buffer; + error_code EC = MemoryBuffer::getFile(Filename, Buffer); + if (EC) + report_fatal_error("Could not open file " + Filename + ": " + EC.message()); + line_iterator LineIt(*Buffer, '#'); // Read the profile of each function. Since each function may be // mentioned more than once, and we are collecting flat profiles, // accumulate samples as we parse them. - Regex HeadRE("^([^:]+):([0-9]+):([0-9]+):([0-9]+)$"); - Regex LineSample("^([0-9]+): ([0-9]+)$"); - while (!Loader.atEOF()) { - SmallVector Matches; - Line = Loader.readLine(); - if (!HeadRE.match(Line, &Matches)) - Loader.reportParseError("Expected 'mangled_name:NUM:NUM:NUM', found " + - Line); - assert(Matches.size() == 5); + Regex HeadRE("^([^:]+):([0-9]+):([0-9]+)$"); + Regex LineSample("^([0-9]+)(\\.[0-9]+)?: ([0-9]+)(.*)$"); + while (!LineIt.is_at_eof()) { + // Read the header of each function. The function header should + // have this format: + // + // function_name:total_samples:total_head_samples + // + // See above for an explanation of each field. + SmallVector Matches; + if (!HeadRE.match(*LineIt, &Matches)) + reportParseError(LineIt.line_number(), + "Expected 'mangled_name:NUM:NUM', found " + *LineIt); + assert(Matches.size() == 4); StringRef FName = Matches[1]; - unsigned NumSamples, NumHeadSamples, NumSampledLines; + unsigned NumSamples, NumHeadSamples; Matches[2].getAsInteger(10, NumSamples); Matches[3].getAsInteger(10, NumHeadSamples); - Matches[4].getAsInteger(10, NumSampledLines); + Profiles[FName] = SampleFunctionProfile(); SampleFunctionProfile &FProfile = Profiles[FName]; FProfile.addTotalSamples(NumSamples); FProfile.addHeadSamples(NumHeadSamples); - unsigned I; - for (I = 0; I < NumSampledLines && !Loader.atEOF(); I++) { - Line = Loader.readLine(); - if (!LineSample.match(Line, &Matches)) - Loader.reportParseError("Expected 'NUM: NUM', found " + Line); - assert(Matches.size() == 3); + ++LineIt; + + // Now read the body. The body of the function ends when we reach + // EOF or when we see the start of the next function. + while (!LineIt.is_at_eof() && isdigit((*LineIt)[0])) { + if (!LineSample.match(*LineIt, &Matches)) + reportParseError( + LineIt.line_number(), + "Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found " + *LineIt); + assert(Matches.size() == 5); unsigned LineOffset, NumSamples; Matches[1].getAsInteger(10, LineOffset); - Matches[2].getAsInteger(10, NumSamples); + + // FIXME: Handle discriminator information (in Matches[2]). + + Matches[3].getAsInteger(10, NumSamples); + + // FIXME: Handle called targets (in Matches[4]). + // When dealing with instruction weights, we use the value // zero to indicate the absence of a sample. If we read an // actual zero from the profile file, return it as 1 to @@ -464,10 +455,8 @@ void SampleModuleProfile::loadText() { if (NumSamples == 0) NumSamples = 1; FProfile.addBodySamples(LineOffset, NumSamples); + ++LineIt; } - - if (I < NumSampledLines) - Loader.reportParseError("Unexpected end of file"); } } diff --git a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof index 90459e65d04..763956f0688 100644 --- a/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof +++ b/test/Transforms/SampleProfile/Inputs/bad_fn_header.prof @@ -1,6 +1,3 @@ -symbol table -1 -empty empty:100:BAD 0: 0 1: 100 diff --git a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof index 8c0d7630f91..038c45f77e3 100644 --- a/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof +++ b/test/Transforms/SampleProfile/Inputs/bad_sample_line.prof @@ -1,6 +1,3 @@ -symbol table -1 -empty -empty:100:0:1 +empty:100:0 0: 0 1: BAD diff --git a/test/Transforms/SampleProfile/Inputs/branch.prof b/test/Transforms/SampleProfile/Inputs/branch.prof index d19894d428c..cd1cb5b1f16 100644 --- a/test/Transforms/SampleProfile/Inputs/branch.prof +++ b/test/Transforms/SampleProfile/Inputs/branch.prof @@ -1,7 +1,4 @@ -symbol table -1 -main -main:15680:0:7 +main:15680:0 0: 0 4: 0 7: 0 diff --git a/test/Transforms/SampleProfile/Inputs/calls.prof b/test/Transforms/SampleProfile/Inputs/calls.prof new file mode 100644 index 00000000000..251cb538eff --- /dev/null +++ b/test/Transforms/SampleProfile/Inputs/calls.prof @@ -0,0 +1,11 @@ +_Z3sumii:105580:5279 +0: 5279 +1: 5279 +2: 5279 +main:225715:0 +2: 5553 +3: 5391 +# This indicates that at line 3 of this function, the 'then' branch +# of the conditional is taken (discriminator '1'). However, we still +# do not handle this case, so we compute the wrong branch weights here. +3.1: 5752 _Z3sumii:5860 diff --git a/test/Transforms/SampleProfile/Inputs/missing_num_syms.prof b/test/Transforms/SampleProfile/Inputs/missing_num_syms.prof deleted file mode 100644 index 7cd053611c8..00000000000 --- a/test/Transforms/SampleProfile/Inputs/missing_num_syms.prof +++ /dev/null @@ -1,5 +0,0 @@ -symbol table -empty -empty:100:0:1 -0: 0 -1: 100 diff --git a/test/Transforms/SampleProfile/Inputs/missing_samples.prof b/test/Transforms/SampleProfile/Inputs/missing_samples.prof deleted file mode 100644 index edd36c2f52f..00000000000 --- a/test/Transforms/SampleProfile/Inputs/missing_samples.prof +++ /dev/null @@ -1,6 +0,0 @@ -symbol table -1 -empty -empty:100:0:10 -0: 0 -1: 100 diff --git a/test/Transforms/SampleProfile/Inputs/missing_symtab.prof b/test/Transforms/SampleProfile/Inputs/missing_symtab.prof deleted file mode 100644 index 2a826138b08..00000000000 --- a/test/Transforms/SampleProfile/Inputs/missing_symtab.prof +++ /dev/null @@ -1,5 +0,0 @@ -1 -empty -empty:100:0:1 -0: 0 -1: 100 diff --git a/test/Transforms/SampleProfile/Inputs/propagate.prof b/test/Transforms/SampleProfile/Inputs/propagate.prof index ea799e93eed..b28609be66c 100644 --- a/test/Transforms/SampleProfile/Inputs/propagate.prof +++ b/test/Transforms/SampleProfile/Inputs/propagate.prof @@ -1,7 +1,4 @@ -symbol table -1 -_Z3fooiil -_Z3fooiil:58139:0:16 +_Z3fooiil:58139:0 0: 0 1: 0 2: 0 diff --git a/test/Transforms/SampleProfile/Inputs/syntax.prof b/test/Transforms/SampleProfile/Inputs/syntax.prof index 9280751b2d7..f3738912a9d 100644 --- a/test/Transforms/SampleProfile/Inputs/syntax.prof +++ b/test/Transforms/SampleProfile/Inputs/syntax.prof @@ -1,6 +1,3 @@ -symbol table -1 -empty -empty:100:0:2 +empty:100:0 0: 0 1: 100 diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll new file mode 100644 index 00000000000..8f986e4e819 --- /dev/null +++ b/test/Transforms/SampleProfile/calls.ll @@ -0,0 +1,107 @@ +; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s + +; Original C++ test case +; +; #include +; +; int sum(int x, int y) { +; return x + y; +; } +; +; int main() { +; int s, i = 0; +; while (i++ < 20000 * 20000) +; if (i != 100) s = sum(i, s); else s = 30; +; printf("sum is %d\n", s); +; return 0; +; } + +@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1 + +; Function Attrs: nounwind uwtable +define i32 @_Z3sumii(i32 %x, i32 %y) { +entry: + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + %0 = load i32* %x.addr, align 4, !dbg !11 + %1 = load i32* %y.addr, align 4, !dbg !11 + %add = add nsw i32 %0, %1, !dbg !11 + ret i32 %add, !dbg !11 +} + +; Function Attrs: uwtable +define i32 @main() { +entry: + %retval = alloca i32, align 4 + %s = alloca i32, align 4 + %i = alloca i32, align 4 + store i32 0, i32* %retval + store i32 0, i32* %i, align 4, !dbg !12 + br label %while.cond, !dbg !13 + +while.cond: ; preds = %if.end, %entry + %0 = load i32* %i, align 4, !dbg !13 + %inc = add nsw i32 %0, 1, !dbg !13 + store i32 %inc, i32* %i, align 4, !dbg !13 + %cmp = icmp slt i32 %0, 400000000, !dbg !13 + br i1 %cmp, label %while.body, label %while.end, !dbg !13 +; CHECK: edge while.cond -> while.body probability is 11143 / 16696 = 66.7405% +; CHECK: edge while.cond -> while.end probability is 5553 / 16696 = 33.2595% + +while.body: ; preds = %while.cond + %1 = load i32* %i, align 4, !dbg !14 + %cmp1 = icmp ne i32 %1, 100, !dbg !14 + br i1 %cmp1, label %if.then, label %if.else, !dbg !14 +; NOTE: These weights are wrong. We currently do not handle multiple +; control flow paths in the same line. The edge while.body -> if.then +; should be much heavier than the other one. Support for DWARF +; discriminators will fix this. +; CHECK: edge while.body -> if.then probability is 11143 / 22286 = 50% +; CHECK: edge while.body -> if.else probability is 11143 / 22286 = 50% + +if.then: ; preds = %while.body + %2 = load i32* %i, align 4, !dbg !14 + %3 = load i32* %s, align 4, !dbg !14 + %call = call i32 @_Z3sumii(i32 %2, i32 %3), !dbg !14 + store i32 %call, i32* %s, align 4, !dbg !14 + br label %if.end, !dbg !14 + +if.else: ; preds = %while.body + store i32 30, i32* %s, align 4, !dbg !14 + br label %if.end + +if.end: ; preds = %if.else, %if.then + br label %while.cond, !dbg !14 + +while.end: ; preds = %while.cond + %4 = load i32* %s, align 4, !dbg !16 + %call2 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @.str, i32 0, i32 0), i32 %4), !dbg !16 + ret i32 0, !dbg !17 +} + +declare i32 @printf(i8*, ...) + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!8, !9} +!llvm.ident = !{!10} + +!0 = metadata !{i32 786449, metadata !1, i32 4, metadata !"clang version 3.5 ", i1 false, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [./calls.cc] [DW_LANG_C_plus_plus] +!1 = metadata !{metadata !"calls.cc", metadata !"."} +!2 = metadata !{i32 0} +!3 = metadata !{metadata !4, metadata !7} +!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"sum", metadata !"sum", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 (i32, i32)* @_Z3sumii, null, null, metadata !2, i32 3} ; [ DW_TAG_subprogram ] [line 3] [def] [sum] +!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [./calls.cc] +!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ] +!7 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"main", metadata !"main", metadata !"", i32 7, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, i32 ()* @main, null, null, metadata !2, i32 7} ; [ DW_TAG_subprogram ] [line 7] [def] [main] +!8 = metadata !{i32 2, metadata !"Dwarf Version", i32 4} +!9 = metadata !{i32 1, metadata !"Debug Info Version", i32 1} +!10 = metadata !{metadata !"clang version 3.5 "} +!11 = metadata !{i32 4, i32 0, metadata !4, null} +!12 = metadata !{i32 8, i32 0, metadata !7, null} ; [ DW_TAG_imported_declaration ] +!13 = metadata !{i32 9, i32 0, metadata !7, null} +!14 = metadata !{i32 10, i32 0, metadata !15, null} +!15 = metadata !{i32 786443, metadata !1, metadata !7, i32 10, i32 0, i32 0} ; [ DW_TAG_lexical_block ] [./calls.cc] +!16 = metadata !{i32 11, i32 0, metadata !7, null} +!17 = metadata !{i32 12, i32 0, metadata !7, null} diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll index 41a08bc2959..4fdfeae8496 100644 --- a/test/Transforms/SampleProfile/syntax.ll +++ b/test/Transforms/SampleProfile/syntax.ll @@ -1,10 +1,7 @@ ; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s ; RUN: not opt < %s -sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s -; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/missing_symtab.prof 2>&1 | FileCheck -check-prefix=MISSING-SYMTAB %s -; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/missing_num_syms.prof 2>&1 | FileCheck -check-prefix=MISSING-NUM-SYMS %s ; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s ; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s -; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/missing_samples.prof 2>&1 | FileCheck -check-prefix=MISSING-SAMPLES %s define void @empty() { entry: @@ -12,8 +9,5 @@ entry: } ; NO-DEBUG: LLVM ERROR: No debug information found in function empty ; MISSING-FILE: LLVM ERROR: Could not open file missing.prof: No such file or directory -; MISSING-SYMTAB: LLVM ERROR: {{.*}}missing_symtab.prof:1: Expected 'symbol table', found 1 -; MISSING-NUM-SYMS: LLVM ERROR: {{.*}}missing_num_syms.prof:2: Expected a number, found empty -; BAD-FN-HEADER: LLVM ERROR: {{.*}}bad_fn_header.prof:4: Expected 'mangled_name:NUM:NUM:NUM', found empty:100:BAD -; BAD-SAMPLE-LINE: LLVM ERROR: {{.*}}bad_sample_line.prof:6: Expected 'mangled_name:NUM:NUM:NUM', found 1: BAD -; MISSING-SAMPLES: LLVM ERROR: {{.*}}missing_samples.prof:6: Unexpected end of file +; BAD-FN-HEADER: LLVM ERROR: {{.*}}bad_fn_header.prof:1: Expected 'mangled_name:NUM:NUM', found empty:100:BAD +; BAD-SAMPLE-LINE: LLVM ERROR: {{.*}}bad_sample_line.prof:3: Expected 'NUM[.NUM]: NUM[ mangled_name:NUM]*', found 1: BAD